package com.yeshi.buwan.util; import com.yeshi.buwan.domain.VideoInfo; import net.sf.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.yeshi.utils.HttpUtil; import javax.persistence.Entity; import java.io.ByteArrayInputStream; import java.io.IOException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @Entity public class DouBanUtil { public static String baseUrl = "https://www.douban.com/search?cat=1002&"; public static int maxCount = 1; /** * @title: * @description: 搜索 * @author Administrator * @date 2021/9/22 17:46 */ public static List startSearch(String st) throws IOException { Document doc = Jsoup .connect( baseUrl + "q=" + URLEncoder.encode(st, "UTF-8")) .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") .header("Host", "www.douban.com") .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") .header("sec-ch-ua-mobile", "?0") .header("sec-ch-ua-platform", "\"Windows\"") .header("Sec-Fetch-Dest", "document") .header("Sec-Fetch-Mode", "navigate") .header("Sec-Fetch-Site", "none") .header("Sec-Fetch-User", "?1") .header("Upgrade-Insecure-Requests", "1") .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .timeout(20000).get(); Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); Elements els = el.getElementsByClass("result"); List list = new ArrayList<>(); int count = els.size() >= maxCount ? maxCount : els.size(); for (int i = 0; i < count; i++) { try { //获取豆瓣ID SearchResultItem resultItem = parseSearchResultItem(els.get(i)); String url = ""; LogUtil.i(url); Document detailDoc = Jsoup .connect(url) .timeout(20000) .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .get(); Element detail = detailDoc.getElementById("content"); Element vinfo = detailDoc.getElementById("info"); String year = ""; String picture = detail.getElementById("mainpic") .getElementsByTag("img").get(0).attr("src") .replace("/spst/", "/lpst/"); String actor = ""; try { Elements actors = detail .getElementsByAttributeValue("class", "actor") .get(0) .getElementsByAttributeValue("class", "attrs") .get(0).getElementsByTag("span").get(0) .getElementsByTag("a"); for (Element w : actors) { actor += w.text() + " "; } } catch (Exception e) { // e.printStackTrace(); } Elements beiz = detail.getElementsByAttributeValue("property", "v:genre"); String beizhu = ""; for (Element e : beiz) { beizhu += e.text() + " "; } String duration = "0"; try { detail.getElementsByAttributeValue("property", "v:runtime") .get(0).attr("content"); } catch (Exception e) { } String introduction = ""; try { introduction = detailDoc .getElementsByAttributeValue("property", "v:summary").get(0).text(); } catch (Exception e) { } String score = detailDoc .getElementsByAttributeValue("property", "v:average") .get(0).text(); String data = ""; try { data = detailDoc .getElementsByAttributeValue("property", "v:initialReleaseDate").get(0).text(); } catch (Exception e) { } try { score = detail .getElementsByAttributeValue("property", "v:average").get(0).text(); } catch (Exception e) { } String director = ""; try { Elements directors = detail.getElementsByAttributeValue( "rel", "v:directedBy"); for (int n = 0; n < directors.size(); n++) { director += directors.get(n).text() + " "; } } catch (Exception e) { } if (director.endsWith(" ")) director = director.substring(0, director.length() - 1); Pattern pattern = Pattern .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); Matcher matcher = pattern.matcher(data); String dateStr = null; if (matcher.find()) { dateStr = matcher.group(0); } String month = "0"; String day = "0"; if (dateStr != null) { year = dateStr.split("-")[0]; month = dateStr.split("-")[1]; day = dateStr.split("-")[2]; } String area = ""; String[] infoSt = vinfo.toString().split("
"); for (String ist : infoSt) { if (ist.contains("��Ƭ���")) { try { int start = ist.indexOf(""); area = ist.substring(start + 7, ist.length()) .replace("\"", "").trim(); System.out.println(area); } catch (Exception e) { e.printStackTrace(); } break; } } VideoInfo info = new VideoInfo(); info.setName(resultItem.getName()); info.setBeizhu(beizhu); info.setDuration(duration); info.setIntroduction(introduction); info.setMainActor(actor); info.setPicture(picture); info.setScore(score); info.setYear(year); info.setDay(day); info.setMonth(month); info.setDirector(director); info.setArea(area); list.add(info); } catch (Exception e) { e.printStackTrace(); } } return list; } //解析搜索结果子项 public static SearchResultItem parseSearchResultItem(Element ele) throws Exception { String url = ele.getElementsByTag("a").get(0) .attr("href"); Map params = HttpUtil.getPramsFromUrl(url); String subUrl = params.get("url"); subUrl = URLDecoder.decode(subUrl, "UTF-8"); String[] sts = subUrl.split("/"); SearchResultItem resultItem = new SearchResultItem(); String id = null; for (String s : sts) { if (NumberUtil.isNumeric(s)) { id = s; break; } } resultItem.setId(id); //获取名称 String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim(); resultItem.setName(name); String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim(); //解析年份 sts = cast.split("/"); String year = null; List subCastList = new ArrayList<>(); for (String s : sts) { s = s.trim(); if (NumberUtil.isNumeric(s)) { year = s; } else { if (s.length() > 1) { //演职员表代表 subCastList.add(s); } } } resultItem.setYear(year); resultItem.setSubCastList(subCastList); return resultItem; } public static void saveSearch(String name, String path) throws Exception { Document doc = Jsoup .connect( baseUrl + "q=" + URLEncoder.encode(name, "UTF-8")) .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") .header("Host", "www.douban.com") .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") .header("sec-ch-ua-mobile", "?0") .header("sec-ch-ua-platform", "\"Windows\"") .header("Sec-Fetch-Dest", "document") .header("Sec-Fetch-Mode", "navigate") .header("Sec-Fetch-Site", "none") .header("Sec-Fetch-User", "?1") .header("Upgrade-Insecure-Requests", "1") .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .timeout(20000).get(); Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); String html = el.html(); byte[] bs = html.getBytes("UTF-8"); FileUtil.saveAsFile(new ByteArrayInputStream(bs), path); } private static DouBanStar getStarInfo(Element item) { DouBanStar star = new DouBanStar(); String avatarStyle = item.getElementsByClass("avatar").get(0).attr("style"); String avatar = avatarStyle.replace("background-image: url(", "").replace(")", "").trim(); String name = item.getElementsByTag("a").get(0).attr("title"); if (name.contains(" ")) { String chaineseName = name.split(" ")[0].trim(); String englishName = name.substring(chaineseName.length()).trim(); star.setChineseName(chaineseName); star.setEnglishName(englishName); } String href = item.getElementsByTag("a").get(0).attr("href"); if (href.endsWith("/")) href = href.substring(0, href.length() - 1); String[] sts = href.split("/"); String id = sts[sts.length - 1]; if (!avatar.contains("default")) star.setAvatar(avatar); star.setId(id); star.setUrl(href); return star; } private static List getStarList(Element root) { Elements items = root.getElementsByTag("li"); List list = new ArrayList<>(); for (int i = 0; i < items.size(); i++) { Element item = items.get(i); DouBanStar star = getStarInfo(item); if (star != null) { list.add(star); } } return list; } public static String getDetail(String id) { String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id); Map headers = new HashMap<>(); headers.put("Accept", "application/json"); headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id)); headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\""); headers.put("sec-ch-ua-mobile", "?1"); headers.put("sec-ch-ua-platform", "Android"); headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36"); String result = HttpUtil.get(url, new HashMap<>(), headers); return result; } public static void saveDetail(String id) throws Exception { String result = getDetail(id); JSONObject object = JSONObject.fromObject(result); result = object.toString(); byte[] bs = result.getBytes("UTF-8"); FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json"); } //获取电影的影人信息 public static Celebrities getMovieStars(String movieId) throws IOException { String url = String.format("https://movie.douban.com/subject/%s/celebrities", movieId); Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36").get(); Elements eles = doc.getElementById("celebrities").getElementsByClass("list-wrapper"); Celebrities celebrities = new Celebrities(); for (int i = 0; i < eles.size(); i++) { String type = eles.get(i).getElementsByTag("h2").get(0).ownText(); if (type.contains("导演")) { List list = getStarList(eles.get(i)); celebrities.setDirectors(list); } else if (type.contains("演员")) { List list = getStarList(eles.get(i)); celebrities.setStars(list); } } return celebrities; } static class Celebrities { private List directors; private List stars; public List getDirectors() { return directors; } public void setDirectors(List directors) { this.directors = directors; } public List getStars() { return stars; } public void setStars(List stars) { this.stars = stars; } } static class DouBanStar { private String id; private String chineseName; private String englishName; private String avatar; private String url; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getChineseName() { return chineseName; } public void setChineseName(String chineseName) { this.chineseName = chineseName; } public String getEnglishName() { return englishName; } public void setEnglishName(String englishName) { this.englishName = englishName; } public String getAvatar() { return avatar; } public void setAvatar(String avatar) { this.avatar = avatar; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } } //搜索结果 static class SearchResultItem { private String id; private String name; private String year; private List subCastList; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getYear() { return year; } public void setYear(String year) { this.year = year; } public List getSubCastList() { return subCastList; } public void setSubCastList(List subCastList) { this.subCastList = subCastList; } } public static void main(String[] args) throws Exception { // DouBanUtil.saveSearch("奔跑吧"); // String id = "35400242"; // try { // DouBanUtil.saveDetail(id); // } catch (Exception e) { // e.printStackTrace(); // } } }