From 04f09e52ffd4681bdfd85e51acd3da0d1280c3d3 Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期六, 16 四月 2022 16:07:10 +0800 Subject: [PATCH] bug修复 --- src/main/java/com/yeshi/buwan/util/DouBanUtil.java | 200 ++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 178 insertions(+), 22 deletions(-) diff --git a/src/main/java/com/yeshi/buwan/util/DouBanUtil.java b/src/main/java/com/yeshi/buwan/util/DouBanUtil.java index 3fdb58d..25f67b5 100644 --- a/src/main/java/com/yeshi/buwan/util/DouBanUtil.java +++ b/src/main/java/com/yeshi/buwan/util/DouBanUtil.java @@ -1,24 +1,28 @@ package com.yeshi.buwan.util; -import java.io.IOException; -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.persistence.Entity; - +import com.yeshi.buwan.domain.VideoInfo; +import net.sf.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.yeshi.utils.HttpUtil; -import com.yeshi.buwan.domain.VideoInfo; +import javax.persistence.Entity; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; @Entity public class DouBanUtil { - public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; + public static String baseUrl = "https://www.douban.com/search?cat=1002&"; public static int maxCount = 1; /** @@ -30,20 +34,32 @@ public static List<VideoInfo> startSearch(String st) throws IOException { Document doc = Jsoup .connect( - baseUrl + "search_text=" + baseUrl + "q=" + URLEncoder.encode(st, "UTF-8")) + .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") + .header("Host", "www.douban.com") + .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") + .header("sec-ch-ua-mobile", "?0") + .header("sec-ch-ua-platform", "\"Windows\"") + .header("Sec-Fetch-Dest", "document") + .header("Sec-Fetch-Mode", "navigate") + .header("Sec-Fetch-Site", "none") + .header("Sec-Fetch-User", "?1") + .header("Upgrade-Insecure-Requests", "1") .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .timeout(20000).get(); - Element el = doc.getElementById("content"); - Elements els = el.getElementsByTag("table"); - List<VideoInfo> list = new ArrayList<VideoInfo>(); + Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); + Elements els = el.getElementsByClass("result"); + List<VideoInfo> list = new ArrayList<>(); int count = els.size() >= maxCount ? maxCount : els.size(); for (int i = 0; i < count; i++) { try { - String url = els.get(i).getElementsByTag("a").get(0) - .attr("href"); + //鑾峰彇璞嗙摚ID + SearchResultItem resultItem = parseSearchResultItem(els.get(i)); + String url = ""; + LogUtil.i(url); Document detailDoc = Jsoup .connect(url) @@ -54,8 +70,6 @@ Element detail = detailDoc.getElementById("content"); Element vinfo = detailDoc.getElementById("info"); - String name = detail.getElementsByTag("h1").get(0) - .getElementsByTag("span").get(0).text(); String year = ""; String picture = detail.getElementById("mainpic") @@ -162,7 +176,7 @@ } VideoInfo info = new VideoInfo(); - info.setName(name); + info.setName(resultItem.getName()); info.setBeizhu(beizhu); info.setDuration(duration); info.setIntroduction(introduction); @@ -181,6 +195,77 @@ } return list; } + + //瑙f瀽鎼滅储缁撴灉瀛愰」 + public static SearchResultItem parseSearchResultItem(Element ele) throws Exception { + String url = ele.getElementsByTag("a").get(0) + .attr("href"); + Map<String, String> params = HttpUtil.getPramsFromUrl(url); + String subUrl = params.get("url"); + subUrl = URLDecoder.decode(subUrl, "UTF-8"); + String[] sts = subUrl.split("/"); + + SearchResultItem resultItem = new SearchResultItem(); + + String id = null; + for (String s : sts) { + if (NumberUtil.isNumeric(s)) { + id = s; + break; + } + } + resultItem.setId(id); + //鑾峰彇鍚嶇О + String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim(); + resultItem.setName(name); + + String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim(); + //瑙f瀽骞翠唤 + sts = cast.split("/"); + String year = null; + List<String> subCastList = new ArrayList<>(); + for (String s : sts) { + s = s.trim(); + if (NumberUtil.isNumeric(s)) { + year = s; + } else { + if (s.length() > 1) { + //婕旇亴鍛樿〃浠h〃 + subCastList.add(s); + } + } + } + resultItem.setYear(year); + resultItem.setSubCastList(subCastList); + return resultItem; + } + + + public static void saveSearch(String name, String path) throws Exception { + Document doc = Jsoup + .connect( + baseUrl + "q=" + + URLEncoder.encode(name, "UTF-8")) + .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") + .header("Host", "www.douban.com") + .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") + .header("sec-ch-ua-mobile", "?0") + .header("sec-ch-ua-platform", "\"Windows\"") + .header("Sec-Fetch-Dest", "document") + .header("Sec-Fetch-Mode", "navigate") + .header("Sec-Fetch-Site", "none") + .header("Sec-Fetch-User", "?1") + .header("Upgrade-Insecure-Requests", "1") + .userAgent( + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") + .timeout(20000).get(); + Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); + String html = el.html(); + + byte[] bs = html.getBytes("UTF-8"); + FileUtil.saveAsFile(new ByteArrayInputStream(bs), path); + } + private static DouBanStar getStarInfo(Element item) { DouBanStar star = new DouBanStar(); @@ -217,6 +302,28 @@ } } return list; + } + + + public static String getDetail(String id) { + String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id); + Map<String, String> headers = new HashMap<>(); + headers.put("Accept", "application/json"); + headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id)); + headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\""); + headers.put("sec-ch-ua-mobile", "?1"); + headers.put("sec-ch-ua-platform", "Android"); + headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36"); + String result = HttpUtil.get(url, new HashMap<>(), headers); + return result; + } + + public static void saveDetail(String id) throws Exception { + String result = getDetail(id); + JSONObject object = JSONObject.fromObject(result); + result = object.toString(); + byte[] bs = result.getBytes("UTF-8"); + FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\璞嗙摚褰辫淇℃伅\\" + id + ".json"); } //鑾峰彇鐢靛奖鐨勫奖浜轰俊鎭� @@ -315,8 +422,57 @@ } - public static void main(String[] args) throws IOException { - getMovieStars("26309788"); + //鎼滅储缁撴灉 + static class SearchResultItem { + private String id; + private String name; + private String year; + private List<String> subCastList; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getYear() { + return year; + } + + public void setYear(String year) { + this.year = year; + } + + public List<String> getSubCastList() { + return subCastList; + } + + public void setSubCastList(List<String> subCastList) { + this.subCastList = subCastList; + } + } + + + public static void main(String[] args) throws Exception { + + +// DouBanUtil.saveSearch("濂旇窇鍚�"); +// String id = "35400242"; +// try { +// DouBanUtil.saveDetail(id); +// } catch (Exception e) { +// e.printStackTrace(); +// } } } -- Gitblit v1.8.0