admin
2022-04-16 04f09e52ffd4681bdfd85e51acd3da0d1280c3d3
src/main/java/com/yeshi/buwan/util/DouBanUtil.java
@@ -1,24 +1,28 @@
package com.yeshi.buwan.util;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.persistence.Entity;
import com.yeshi.buwan.domain.VideoInfo;
import net.sf.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.yeshi.utils.HttpUtil;
import com.yeshi.buwan.domain.VideoInfo;
import javax.persistence.Entity;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Entity
public class DouBanUtil {
    public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&";
    public static String baseUrl = "https://www.douban.com/search?cat=1002&";
    public static int maxCount = 1;
    /**
@@ -30,20 +34,32 @@
    public static List<VideoInfo> startSearch(String st) throws IOException {
        Document doc = Jsoup
                .connect(
                        baseUrl + "search_text="
                        baseUrl + "q="
                                + URLEncoder.encode(st, "UTF-8"))
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
                .header("Host", "www.douban.com")
                .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
                .header("sec-ch-ua-mobile", "?0")
                .header("sec-ch-ua-platform", "\"Windows\"")
                .header("Sec-Fetch-Dest", "document")
                .header("Sec-Fetch-Mode", "navigate")
                .header("Sec-Fetch-Site", "none")
                .header("Sec-Fetch-User", "?1")
                .header("Upgrade-Insecure-Requests", "1")
                .userAgent(
                        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                .timeout(20000).get();
        Element el = doc.getElementById("content");
        Elements els = el.getElementsByTag("table");
        List<VideoInfo> list = new ArrayList<VideoInfo>();
        Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
        Elements els = el.getElementsByClass("result");
        List<VideoInfo> list = new ArrayList<>();
        int count = els.size() >= maxCount ? maxCount : els.size();
        for (int i = 0; i < count; i++) {
            try {
                String url = els.get(i).getElementsByTag("a").get(0)
                        .attr("href");
                //获取豆瓣ID
                SearchResultItem resultItem = parseSearchResultItem(els.get(i));
                String url = "";
                LogUtil.i(url);
                Document detailDoc = Jsoup
                        .connect(url)
@@ -54,8 +70,6 @@
                Element detail = detailDoc.getElementById("content");
                Element vinfo = detailDoc.getElementById("info");
                String name = detail.getElementsByTag("h1").get(0)
                        .getElementsByTag("span").get(0).text();
                String year = "";
                String picture = detail.getElementById("mainpic")
@@ -162,7 +176,7 @@
                }
                VideoInfo info = new VideoInfo();
                info.setName(name);
                info.setName(resultItem.getName());
                info.setBeizhu(beizhu);
                info.setDuration(duration);
                info.setIntroduction(introduction);
@@ -181,6 +195,77 @@
        }
        return list;
    }
    //解析搜索结果子项
    public static SearchResultItem parseSearchResultItem(Element ele) throws Exception {
        String url = ele.getElementsByTag("a").get(0)
                .attr("href");
        Map<String, String> params = HttpUtil.getPramsFromUrl(url);
        String subUrl = params.get("url");
        subUrl = URLDecoder.decode(subUrl, "UTF-8");
        String[] sts = subUrl.split("/");
        SearchResultItem resultItem = new SearchResultItem();
        String id = null;
        for (String s : sts) {
            if (NumberUtil.isNumeric(s)) {
                id = s;
                break;
            }
        }
        resultItem.setId(id);
        //获取名称
        String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim();
        resultItem.setName(name);
        String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim();
        //解析年份
        sts = cast.split("/");
        String year = null;
        List<String> subCastList = new ArrayList<>();
        for (String s : sts) {
            s = s.trim();
            if (NumberUtil.isNumeric(s)) {
                year = s;
            } else {
                if (s.length() > 1) {
                    //演职员表代表
                    subCastList.add(s);
                }
            }
        }
        resultItem.setYear(year);
        resultItem.setSubCastList(subCastList);
        return resultItem;
    }
    public static void saveSearch(String name, String path) throws Exception {
        Document doc = Jsoup
                .connect(
                        baseUrl + "q="
                                + URLEncoder.encode(name, "UTF-8"))
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
                .header("Host", "www.douban.com")
                .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
                .header("sec-ch-ua-mobile", "?0")
                .header("sec-ch-ua-platform", "\"Windows\"")
                .header("Sec-Fetch-Dest", "document")
                .header("Sec-Fetch-Mode", "navigate")
                .header("Sec-Fetch-Site", "none")
                .header("Sec-Fetch-User", "?1")
                .header("Upgrade-Insecure-Requests", "1")
                .userAgent(
                        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                .timeout(20000).get();
        Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
        String html = el.html();
        byte[] bs = html.getBytes("UTF-8");
        FileUtil.saveAsFile(new ByteArrayInputStream(bs), path);
    }
    private static DouBanStar getStarInfo(Element item) {
        DouBanStar star = new DouBanStar();
@@ -217,6 +302,28 @@
            }
        }
        return list;
    }
    public static String getDetail(String id) {
        String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id);
        Map<String, String> headers = new HashMap<>();
        headers.put("Accept", "application/json");
        headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id));
        headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"");
        headers.put("sec-ch-ua-mobile", "?1");
        headers.put("sec-ch-ua-platform", "Android");
        headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36");
        String result = HttpUtil.get(url, new HashMap<>(), headers);
        return result;
    }
    public static void saveDetail(String id) throws Exception {
        String result = getDetail(id);
        JSONObject object = JSONObject.fromObject(result);
        result = object.toString();
        byte[] bs = result.getBytes("UTF-8");
        FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json");
    }
    //获取电影的影人信息
@@ -315,8 +422,57 @@
    }
    public static void main(String[] args) throws IOException {
        getMovieStars("26309788");
    //搜索结果
    static class SearchResultItem {
        private String id;
        private String name;
        private String year;
        private List<String> subCastList;
        public String getId() {
            return id;
        }
        public void setId(String id) {
            this.id = id;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public String getYear() {
            return year;
        }
        public void setYear(String year) {
            this.year = year;
        }
        public List<String> getSubCastList() {
            return subCastList;
        }
        public void setSubCastList(List<String> subCastList) {
            this.subCastList = subCastList;
        }
    }
    public static void main(String[] args) throws Exception {
//        DouBanUtil.saveSearch("奔跑吧");
//        String id = "35400242";
//        try {
//            DouBanUtil.saveDetail(id);
//        } catch (Exception e) {
//            e.printStackTrace();
//        }
    }
}