| | |
| | | package com.yeshi.buwan.util; |
| | | |
| | | import java.io.IOException; |
| | | import java.net.URLEncoder; |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | import javax.persistence.Entity; |
| | | |
| | | import com.yeshi.buwan.domain.VideoInfo; |
| | | import net.sf.json.JSONObject; |
| | | import org.jsoup.Jsoup; |
| | | import org.jsoup.nodes.Document; |
| | | import org.jsoup.nodes.Element; |
| | | import org.jsoup.select.Elements; |
| | | import org.yeshi.utils.HttpUtil; |
| | | |
| | | import com.yeshi.buwan.domain.VideoInfo; |
| | | import javax.persistence.Entity; |
| | | import java.io.ByteArrayInputStream; |
| | | import java.io.IOException; |
| | | import java.net.URLDecoder; |
| | | import java.net.URLEncoder; |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | @Entity |
| | | public class DouBanUtil { |
| | | public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; |
| | | public static int maxCount = 1; |
| | | public static String baseUrl = "https://www.douban.com/search?cat=1002&"; |
| | | public static int maxCount = 1; |
| | | |
| | | public static List<VideoInfo> startSearch(String st) throws IOException { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "search_text=" |
| | | + URLEncoder.encode(st, "UTF-8")) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content"); |
| | | Elements els = el.getElementsByTag("table"); |
| | | List<VideoInfo> list = new ArrayList<VideoInfo>(); |
| | | int count = els.size() >= maxCount ? maxCount : els.size(); |
| | | /** |
| | | * @title: |
| | | * @description: 搜索 |
| | | * @author Administrator |
| | | * @date 2021/9/22 17:46 |
| | | */ |
| | | public static List<VideoInfo> startSearch(String st) throws IOException { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "q=" |
| | | + URLEncoder.encode(st, "UTF-8")) |
| | | .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") |
| | | .header("Host", "www.douban.com") |
| | | .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") |
| | | .header("sec-ch-ua-mobile", "?0") |
| | | .header("sec-ch-ua-platform", "\"Windows\"") |
| | | .header("Sec-Fetch-Dest", "document") |
| | | .header("Sec-Fetch-Mode", "navigate") |
| | | .header("Sec-Fetch-Site", "none") |
| | | .header("Sec-Fetch-User", "?1") |
| | | .header("Upgrade-Insecure-Requests", "1") |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); |
| | | Elements els = el.getElementsByClass("result"); |
| | | List<VideoInfo> list = new ArrayList<>(); |
| | | int count = els.size() >= maxCount ? maxCount : els.size(); |
| | | |
| | | for (int i = 0; i < count; i++) { |
| | | try { |
| | | String url = els.get(i).getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | LogUtil.i(url); |
| | | Document detailDoc = Jsoup |
| | | .connect(url) |
| | | .timeout(20000) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .get(); |
| | | Element detail = detailDoc.getElementById("content"); |
| | | Element vinfo = detailDoc.getElementById("info"); |
| | | for (int i = 0; i < count; i++) { |
| | | try { |
| | | //获取豆瓣ID |
| | | SearchResultItem resultItem = parseSearchResultItem(els.get(i)); |
| | | String url = ""; |
| | | |
| | | String name = detail.getElementsByTag("h1").get(0) |
| | | .getElementsByTag("span").get(0).text(); |
| | | LogUtil.i(url); |
| | | Document detailDoc = Jsoup |
| | | .connect(url) |
| | | .timeout(20000) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .get(); |
| | | Element detail = detailDoc.getElementById("content"); |
| | | Element vinfo = detailDoc.getElementById("info"); |
| | | |
| | | String year = ""; |
| | | String picture = detail.getElementById("mainpic") |
| | | .getElementsByTag("img").get(0).attr("src") |
| | | .replace("/spst/", "/lpst/"); |
| | | String actor = ""; |
| | | try { |
| | | Elements actors = detail |
| | | .getElementsByAttributeValue("class", "actor") |
| | | .get(0) |
| | | .getElementsByAttributeValue("class", "attrs") |
| | | .get(0).getElementsByTag("span").get(0) |
| | | .getElementsByTag("a"); |
| | | |
| | | for (Element w : actors) { |
| | | actor += w.text() + " "; |
| | | } |
| | | } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | } |
| | | Elements beiz = detail.getElementsByAttributeValue("property", |
| | | "v:genre"); |
| | | String beizhu = ""; |
| | | for (Element e : beiz) { |
| | | beizhu += e.text() + " "; |
| | | } |
| | | String year = ""; |
| | | String picture = detail.getElementById("mainpic") |
| | | .getElementsByTag("img").get(0).attr("src") |
| | | .replace("/spst/", "/lpst/"); |
| | | String actor = ""; |
| | | try { |
| | | Elements actors = detail |
| | | .getElementsByAttributeValue("class", "actor") |
| | | .get(0) |
| | | .getElementsByAttributeValue("class", "attrs") |
| | | .get(0).getElementsByTag("span").get(0) |
| | | .getElementsByTag("a"); |
| | | |
| | | String duration = "0"; |
| | | try { |
| | | detail.getElementsByAttributeValue("property", "v:runtime") |
| | | .get(0).attr("content"); |
| | | } catch (Exception e) { |
| | | for (Element w : actors) { |
| | | actor += w.text() + " "; |
| | | } |
| | | } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | } |
| | | Elements beiz = detail.getElementsByAttributeValue("property", |
| | | "v:genre"); |
| | | String beizhu = ""; |
| | | for (Element e : beiz) { |
| | | beizhu += e.text() + " "; |
| | | } |
| | | |
| | | } |
| | | String introduction = ""; |
| | | try { |
| | | introduction = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:summary").get(0).text(); |
| | | } catch (Exception e) { |
| | | String duration = "0"; |
| | | try { |
| | | detail.getElementsByAttributeValue("property", "v:runtime") |
| | | .get(0).attr("content"); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | String score = detailDoc |
| | | .getElementsByAttributeValue("property", "v:average") |
| | | .get(0).text(); |
| | | } |
| | | String introduction = ""; |
| | | try { |
| | | introduction = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:summary").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | String data = ""; |
| | | try { |
| | | data = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:initialReleaseDate").get(0).text(); |
| | | } catch (Exception e) { |
| | | } |
| | | String score = detailDoc |
| | | .getElementsByAttributeValue("property", "v:average") |
| | | .get(0).text(); |
| | | |
| | | } |
| | | try { |
| | | score = detail |
| | | .getElementsByAttributeValue("property", |
| | | "v:average").get(0).text(); |
| | | } catch (Exception e) { |
| | | String data = ""; |
| | | try { |
| | | data = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:initialReleaseDate").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | String director = ""; |
| | | try { |
| | | Elements directors = detail.getElementsByAttributeValue( |
| | | "rel", "v:directedBy"); |
| | | for (int n = 0; n < directors.size(); n++) { |
| | | director += directors.get(n).text() + " "; |
| | | } |
| | | } |
| | | try { |
| | | score = detail |
| | | .getElementsByAttributeValue("property", |
| | | "v:average").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | } catch (Exception e) { |
| | | } |
| | | String director = ""; |
| | | try { |
| | | Elements directors = detail.getElementsByAttributeValue( |
| | | "rel", "v:directedBy"); |
| | | for (int n = 0; n < directors.size(); n++) { |
| | | director += directors.get(n).text() + " "; |
| | | } |
| | | |
| | | } |
| | | } catch (Exception e) { |
| | | |
| | | if (director.endsWith(" ")) |
| | | director = director.substring(0, director.length() - 1); |
| | | Pattern pattern = Pattern |
| | | .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); |
| | | Matcher matcher = pattern.matcher(data); |
| | | String dateStr = null; |
| | | if (matcher.find()) { |
| | | dateStr = matcher.group(0); |
| | | } |
| | | String month = "0"; |
| | | String day = "0"; |
| | | if (dateStr != null) { |
| | | year = dateStr.split("-")[0]; |
| | | month = dateStr.split("-")[1]; |
| | | day = dateStr.split("-")[2]; |
| | | } |
| | | String area =""; |
| | | String[] infoSt = vinfo.toString().split("<br />"); |
| | | for (String ist : infoSt) { |
| | | if (ist.contains("��Ƭ���")) { |
| | | try{ |
| | | int start = ist.indexOf("</span>"); |
| | | area = ist.substring(start + 7, ist.length()) |
| | | .replace("\"", "").trim(); |
| | | System.out.println(area); |
| | | }catch(Exception e) |
| | | { |
| | | e.printStackTrace(); |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | |
| | | VideoInfo info = new VideoInfo(); |
| | | info.setName(name); |
| | | info.setBeizhu(beizhu); |
| | | info.setDuration(duration); |
| | | info.setIntroduction(introduction); |
| | | info.setMainActor(actor); |
| | | info.setPicture(picture); |
| | | info.setScore(score); |
| | | info.setYear(year); |
| | | info.setDay(day); |
| | | info.setMonth(month); |
| | | info.setDirector(director); |
| | | info.setArea(area); |
| | | list.add(info); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | if (director.endsWith(" ")) |
| | | director = director.substring(0, director.length() - 1); |
| | | Pattern pattern = Pattern |
| | | .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); |
| | | Matcher matcher = pattern.matcher(data); |
| | | String dateStr = null; |
| | | if (matcher.find()) { |
| | | dateStr = matcher.group(0); |
| | | } |
| | | String month = "0"; |
| | | String day = "0"; |
| | | if (dateStr != null) { |
| | | year = dateStr.split("-")[0]; |
| | | month = dateStr.split("-")[1]; |
| | | day = dateStr.split("-")[2]; |
| | | } |
| | | String area = ""; |
| | | String[] infoSt = vinfo.toString().split("<br />"); |
| | | for (String ist : infoSt) { |
| | | if (ist.contains("��Ƭ���")) { |
| | | try { |
| | | int start = ist.indexOf("</span>"); |
| | | area = ist.substring(start + 7, ist.length()) |
| | | .replace("\"", "").trim(); |
| | | System.out.println(area); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | |
| | | VideoInfo info = new VideoInfo(); |
| | | info.setName(resultItem.getName()); |
| | | info.setBeizhu(beizhu); |
| | | info.setDuration(duration); |
| | | info.setIntroduction(introduction); |
| | | info.setMainActor(actor); |
| | | info.setPicture(picture); |
| | | info.setScore(score); |
| | | info.setYear(year); |
| | | info.setDay(day); |
| | | info.setMonth(month); |
| | | info.setDirector(director); |
| | | info.setArea(area); |
| | | list.add(info); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | //解析搜索结果子项 |
| | | public static SearchResultItem parseSearchResultItem(Element ele) throws Exception { |
| | | String url = ele.getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | Map<String, String> params = HttpUtil.getPramsFromUrl(url); |
| | | String subUrl = params.get("url"); |
| | | subUrl = URLDecoder.decode(subUrl, "UTF-8"); |
| | | String[] sts = subUrl.split("/"); |
| | | |
| | | SearchResultItem resultItem = new SearchResultItem(); |
| | | |
| | | String id = null; |
| | | for (String s : sts) { |
| | | if (NumberUtil.isNumeric(s)) { |
| | | id = s; |
| | | break; |
| | | } |
| | | } |
| | | resultItem.setId(id); |
| | | //获取名称 |
| | | String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim(); |
| | | resultItem.setName(name); |
| | | |
| | | String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim(); |
| | | //解析年份 |
| | | sts = cast.split("/"); |
| | | String year = null; |
| | | List<String> subCastList = new ArrayList<>(); |
| | | for (String s : sts) { |
| | | s = s.trim(); |
| | | if (NumberUtil.isNumeric(s)) { |
| | | year = s; |
| | | } else { |
| | | if (s.length() > 1) { |
| | | //演职员表代表 |
| | | subCastList.add(s); |
| | | } |
| | | } |
| | | } |
| | | resultItem.setYear(year); |
| | | resultItem.setSubCastList(subCastList); |
| | | return resultItem; |
| | | } |
| | | |
| | | |
| | | public static void saveSearch(String name, String path) throws Exception { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "q=" |
| | | + URLEncoder.encode(name, "UTF-8")) |
| | | .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") |
| | | .header("Host", "www.douban.com") |
| | | .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") |
| | | .header("sec-ch-ua-mobile", "?0") |
| | | .header("sec-ch-ua-platform", "\"Windows\"") |
| | | .header("Sec-Fetch-Dest", "document") |
| | | .header("Sec-Fetch-Mode", "navigate") |
| | | .header("Sec-Fetch-Site", "none") |
| | | .header("Sec-Fetch-User", "?1") |
| | | .header("Upgrade-Insecure-Requests", "1") |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); |
| | | String html = el.html(); |
| | | |
| | | byte[] bs = html.getBytes("UTF-8"); |
| | | FileUtil.saveAsFile(new ByteArrayInputStream(bs), path); |
| | | } |
| | | |
| | | |
| | | private static DouBanStar getStarInfo(Element item) { |
| | | DouBanStar star = new DouBanStar(); |
| | | String avatarStyle = item.getElementsByClass("avatar").get(0).attr("style"); |
| | | String avatar = avatarStyle.replace("background-image: url(", "").replace(")", "").trim(); |
| | | String name = item.getElementsByTag("a").get(0).attr("title"); |
| | | if (name.contains(" ")) { |
| | | String chaineseName = name.split(" ")[0].trim(); |
| | | String englishName = name.substring(chaineseName.length()).trim(); |
| | | star.setChineseName(chaineseName); |
| | | star.setEnglishName(englishName); |
| | | } |
| | | |
| | | String href = item.getElementsByTag("a").get(0).attr("href"); |
| | | if (href.endsWith("/")) |
| | | href = href.substring(0, href.length() - 1); |
| | | String[] sts = href.split("/"); |
| | | String id = sts[sts.length - 1]; |
| | | if (!avatar.contains("default")) |
| | | star.setAvatar(avatar); |
| | | star.setId(id); |
| | | star.setUrl(href); |
| | | return star; |
| | | } |
| | | |
| | | private static List<DouBanStar> getStarList(Element root) { |
| | | Elements items = root.getElementsByTag("li"); |
| | | List<DouBanStar> list = new ArrayList<>(); |
| | | for (int i = 0; i < items.size(); i++) { |
| | | Element item = items.get(i); |
| | | DouBanStar star = getStarInfo(item); |
| | | if (star != null) { |
| | | list.add(star); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | |
| | | public static String getDetail(String id) { |
| | | String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id); |
| | | Map<String, String> headers = new HashMap<>(); |
| | | headers.put("Accept", "application/json"); |
| | | headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id)); |
| | | headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\""); |
| | | headers.put("sec-ch-ua-mobile", "?1"); |
| | | headers.put("sec-ch-ua-platform", "Android"); |
| | | headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36"); |
| | | String result = HttpUtil.get(url, new HashMap<>(), headers); |
| | | return result; |
| | | } |
| | | |
| | | public static void saveDetail(String id) throws Exception { |
| | | String result = getDetail(id); |
| | | JSONObject object = JSONObject.fromObject(result); |
| | | result = object.toString(); |
| | | byte[] bs = result.getBytes("UTF-8"); |
| | | FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json"); |
| | | } |
| | | |
| | | //获取电影的影人信息 |
| | | public static Celebrities getMovieStars(String movieId) throws IOException { |
| | | String url = String.format("https://movie.douban.com/subject/%s/celebrities", movieId); |
| | | Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36").get(); |
| | | |
| | | Elements eles = doc.getElementById("celebrities").getElementsByClass("list-wrapper"); |
| | | |
| | | Celebrities celebrities = new Celebrities(); |
| | | |
| | | for (int i = 0; i < eles.size(); i++) { |
| | | String type = eles.get(i).getElementsByTag("h2").get(0).ownText(); |
| | | if (type.contains("导演")) { |
| | | List<DouBanStar> list = getStarList(eles.get(i)); |
| | | celebrities.setDirectors(list); |
| | | } else if (type.contains("演员")) { |
| | | List<DouBanStar> list = getStarList(eles.get(i)); |
| | | celebrities.setStars(list); |
| | | } |
| | | } |
| | | return celebrities; |
| | | } |
| | | |
| | | |
| | | static class Celebrities { |
| | | |
| | | private List<DouBanStar> directors; |
| | | private List<DouBanStar> stars; |
| | | |
| | | public List<DouBanStar> getDirectors() { |
| | | return directors; |
| | | } |
| | | |
| | | public void setDirectors(List<DouBanStar> directors) { |
| | | this.directors = directors; |
| | | } |
| | | |
| | | public List<DouBanStar> getStars() { |
| | | return stars; |
| | | } |
| | | |
| | | public void setStars(List<DouBanStar> stars) { |
| | | this.stars = stars; |
| | | } |
| | | } |
| | | |
| | | |
| | | static class DouBanStar { |
| | | |
| | | private String id; |
| | | private String chineseName; |
| | | private String englishName; |
| | | private String avatar; |
| | | private String url; |
| | | |
| | | public String getId() { |
| | | return id; |
| | | } |
| | | |
| | | public void setId(String id) { |
| | | this.id = id; |
| | | } |
| | | |
| | | public String getChineseName() { |
| | | return chineseName; |
| | | } |
| | | |
| | | public void setChineseName(String chineseName) { |
| | | this.chineseName = chineseName; |
| | | } |
| | | |
| | | public String getEnglishName() { |
| | | return englishName; |
| | | } |
| | | |
| | | public void setEnglishName(String englishName) { |
| | | this.englishName = englishName; |
| | | } |
| | | |
| | | public String getAvatar() { |
| | | return avatar; |
| | | } |
| | | |
| | | public void setAvatar(String avatar) { |
| | | this.avatar = avatar; |
| | | } |
| | | |
| | | public String getUrl() { |
| | | return url; |
| | | } |
| | | |
| | | public void setUrl(String url) { |
| | | this.url = url; |
| | | } |
| | | } |
| | | |
| | | |
| | | //搜索结果 |
| | | static class SearchResultItem { |
| | | private String id; |
| | | private String name; |
| | | private String year; |
| | | private List<String> subCastList; |
| | | |
| | | public String getId() { |
| | | return id; |
| | | } |
| | | |
| | | public void setId(String id) { |
| | | this.id = id; |
| | | } |
| | | |
| | | public String getName() { |
| | | return name; |
| | | } |
| | | |
| | | public void setName(String name) { |
| | | this.name = name; |
| | | } |
| | | |
| | | public String getYear() { |
| | | return year; |
| | | } |
| | | |
| | | public void setYear(String year) { |
| | | this.year = year; |
| | | } |
| | | |
| | | public List<String> getSubCastList() { |
| | | return subCastList; |
| | | } |
| | | |
| | | public void setSubCastList(List<String> subCastList) { |
| | | this.subCastList = subCastList; |
| | | } |
| | | } |
| | | |
| | | |
| | | public static void main(String[] args) throws Exception { |
| | | |
| | | |
| | | // DouBanUtil.saveSearch("奔跑吧"); |
| | | // String id = "35400242"; |
| | | // try { |
| | | // DouBanUtil.saveDetail(id); |
| | | // } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | // } |
| | | } |
| | | |
| | | } |