admin
2023-04-12 f06a592dd1a7e995bf313ccb5efe7dff73ccfc4e
src/main/java/com/yeshi/buwan/util/DouBanUtil.java
@@ -1,179 +1,478 @@
package com.yeshi.buwan.util;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.persistence.Entity;
import com.yeshi.buwan.domain.VideoInfo;
import net.sf.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.yeshi.utils.HttpUtil;
import com.yeshi.buwan.domain.VideoInfo;
import javax.persistence.Entity;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Entity
public class DouBanUtil {
   public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&";
   public static int maxCount = 1;
    public static String baseUrl = "https://www.douban.com/search?cat=1002&";
    public static int maxCount = 1;
   public static List<VideoInfo> startSearch(String st) throws IOException {
      Document doc = Jsoup
            .connect(
                  baseUrl + "search_text="
                        + URLEncoder.encode(st, "UTF-8"))
            .userAgent(
                  "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
            .timeout(20000).get();
      Element el = doc.getElementById("content");
      Elements els = el.getElementsByTag("table");
      List<VideoInfo> list = new ArrayList<VideoInfo>();
      int count = els.size() >= maxCount ? maxCount : els.size();
    /**
     * @title:
     * @description: 搜索
     * @author Administrator
     * @date 2021/9/22 17:46
     */
    public static List<VideoInfo> startSearch(String st) throws IOException {
        Document doc = Jsoup
                .connect(
                        baseUrl + "q="
                                + URLEncoder.encode(st, "UTF-8"))
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
                .header("Host", "www.douban.com")
                .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
                .header("sec-ch-ua-mobile", "?0")
                .header("sec-ch-ua-platform", "\"Windows\"")
                .header("Sec-Fetch-Dest", "document")
                .header("Sec-Fetch-Mode", "navigate")
                .header("Sec-Fetch-Site", "none")
                .header("Sec-Fetch-User", "?1")
                .header("Upgrade-Insecure-Requests", "1")
                .userAgent(
                        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                .timeout(20000).get();
        Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
        Elements els = el.getElementsByClass("result");
        List<VideoInfo> list = new ArrayList<>();
        int count = els.size() >= maxCount ? maxCount : els.size();
      for (int i = 0; i < count; i++) {
         try {
            String url = els.get(i).getElementsByTag("a").get(0)
                  .attr("href");
            LogUtil.i(url);
            Document detailDoc = Jsoup
                  .connect(url)
                  .timeout(20000)
                  .userAgent(
                        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                  .get();
            Element detail = detailDoc.getElementById("content");
            Element vinfo = detailDoc.getElementById("info");
        for (int i = 0; i < count; i++) {
            try {
                //获取豆瓣ID
                SearchResultItem resultItem = parseSearchResultItem(els.get(i));
                String url = "";
            String name = detail.getElementsByTag("h1").get(0)
                  .getElementsByTag("span").get(0).text();
                LogUtil.i(url);
                Document detailDoc = Jsoup
                        .connect(url)
                        .timeout(20000)
                        .userAgent(
                                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                        .get();
                Element detail = detailDoc.getElementById("content");
                Element vinfo = detailDoc.getElementById("info");
            String year = "";
            String picture = detail.getElementById("mainpic")
                  .getElementsByTag("img").get(0).attr("src")
                  .replace("/spst/", "/lpst/");
            String actor = "";
            try {
               Elements actors = detail
                     .getElementsByAttributeValue("class", "actor")
                     .get(0)
                     .getElementsByAttributeValue("class", "attrs")
                     .get(0).getElementsByTag("span").get(0)
                     .getElementsByTag("a");
               for (Element w : actors) {
                  actor += w.text() + " ";
               }
            } catch (Exception e) {
               // e.printStackTrace();
            }
            Elements beiz = detail.getElementsByAttributeValue("property",
                  "v:genre");
            String beizhu = "";
            for (Element e : beiz) {
               beizhu += e.text() + " ";
            }
                String year = "";
                String picture = detail.getElementById("mainpic")
                        .getElementsByTag("img").get(0).attr("src")
                        .replace("/spst/", "/lpst/");
                String actor = "";
                try {
                    Elements actors = detail
                            .getElementsByAttributeValue("class", "actor")
                            .get(0)
                            .getElementsByAttributeValue("class", "attrs")
                            .get(0).getElementsByTag("span").get(0)
                            .getElementsByTag("a");
            String duration = "0";
            try {
               detail.getElementsByAttributeValue("property", "v:runtime")
                     .get(0).attr("content");
            } catch (Exception e) {
                    for (Element w : actors) {
                        actor += w.text() + " ";
                    }
                } catch (Exception e) {
                    // e.printStackTrace();
                }
                Elements beiz = detail.getElementsByAttributeValue("property",
                        "v:genre");
                String beizhu = "";
                for (Element e : beiz) {
                    beizhu += e.text() + " ";
                }
            }
            String introduction = "";
            try {
               introduction = detailDoc
                     .getElementsByAttributeValue("property",
                           "v:summary").get(0).text();
            } catch (Exception e) {
                String duration = "0";
                try {
                    detail.getElementsByAttributeValue("property", "v:runtime")
                            .get(0).attr("content");
                } catch (Exception e) {
            }
            String score = detailDoc
                  .getElementsByAttributeValue("property", "v:average")
                  .get(0).text();
                }
                String introduction = "";
                try {
                    introduction = detailDoc
                            .getElementsByAttributeValue("property",
                                    "v:summary").get(0).text();
                } catch (Exception e) {
            String data = "";
            try {
               data = detailDoc
                     .getElementsByAttributeValue("property",
                           "v:initialReleaseDate").get(0).text();
            } catch (Exception e) {
                }
                String score = detailDoc
                        .getElementsByAttributeValue("property", "v:average")
                        .get(0).text();
            }
            try {
               score = detail
                     .getElementsByAttributeValue("property",
                           "v:average").get(0).text();
            } catch (Exception e) {
                String data = "";
                try {
                    data = detailDoc
                            .getElementsByAttributeValue("property",
                                    "v:initialReleaseDate").get(0).text();
                } catch (Exception e) {
            }
            String director = "";
            try {
               Elements directors = detail.getElementsByAttributeValue(
                     "rel", "v:directedBy");
               for (int n = 0; n < directors.size(); n++) {
                  director += directors.get(n).text() + " ";
               }
                }
                try {
                    score = detail
                            .getElementsByAttributeValue("property",
                                    "v:average").get(0).text();
                } catch (Exception e) {
            } catch (Exception e) {
                }
                String director = "";
                try {
                    Elements directors = detail.getElementsByAttributeValue(
                            "rel", "v:directedBy");
                    for (int n = 0; n < directors.size(); n++) {
                        director += directors.get(n).text() + " ";
                    }
            }
                } catch (Exception e) {
            if (director.endsWith(" "))
               director = director.substring(0, director.length() - 1);
            Pattern pattern = Pattern
                  .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}");
            Matcher matcher = pattern.matcher(data);
            String dateStr = null;
            if (matcher.find()) {
               dateStr = matcher.group(0);
            }
            String month = "0";
            String day = "0";
            if (dateStr != null) {
               year = dateStr.split("-")[0];
               month = dateStr.split("-")[1];
               day = dateStr.split("-")[2];
            }
            String area ="";
            String[] infoSt = vinfo.toString().split("<br />");
            for (String ist : infoSt) {
               if (ist.contains("��Ƭ���")) {
                  try{
                  int start = ist.indexOf("</span>");
                   area = ist.substring(start + 7, ist.length())
                        .replace("\"", "").trim();
                   System.out.println(area);
                  }catch(Exception e)
                  {
                     e.printStackTrace();
                  }
                  break;
               }
            }
                }
            VideoInfo info = new VideoInfo();
            info.setName(name);
            info.setBeizhu(beizhu);
            info.setDuration(duration);
            info.setIntroduction(introduction);
            info.setMainActor(actor);
            info.setPicture(picture);
            info.setScore(score);
            info.setYear(year);
            info.setDay(day);
            info.setMonth(month);
            info.setDirector(director);
            info.setArea(area);
            list.add(info);
         } catch (Exception e) {
            e.printStackTrace();
         }
      }
      return list;
   }
                if (director.endsWith(" "))
                    director = director.substring(0, director.length() - 1);
                Pattern pattern = Pattern
                        .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}");
                Matcher matcher = pattern.matcher(data);
                String dateStr = null;
                if (matcher.find()) {
                    dateStr = matcher.group(0);
                }
                String month = "0";
                String day = "0";
                if (dateStr != null) {
                    year = dateStr.split("-")[0];
                    month = dateStr.split("-")[1];
                    day = dateStr.split("-")[2];
                }
                String area = "";
                String[] infoSt = vinfo.toString().split("<br />");
                for (String ist : infoSt) {
                    if (ist.contains("��Ƭ���")) {
                        try {
                            int start = ist.indexOf("</span>");
                            area = ist.substring(start + 7, ist.length())
                                    .replace("\"", "").trim();
                            System.out.println(area);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                        break;
                    }
                }
                VideoInfo info = new VideoInfo();
                info.setName(resultItem.getName());
                info.setBeizhu(beizhu);
                info.setDuration(duration);
                info.setIntroduction(introduction);
                info.setMainActor(actor);
                info.setPicture(picture);
                info.setScore(score);
                info.setYear(year);
                info.setDay(day);
                info.setMonth(month);
                info.setDirector(director);
                info.setArea(area);
                list.add(info);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return list;
    }
    //解析搜索结果子项
    public static SearchResultItem parseSearchResultItem(Element ele) throws Exception {
        String url = ele.getElementsByTag("a").get(0)
                .attr("href");
        Map<String, String> params = HttpUtil.getPramsFromUrl(url);
        String subUrl = params.get("url");
        subUrl = URLDecoder.decode(subUrl, "UTF-8");
        String[] sts = subUrl.split("/");
        SearchResultItem resultItem = new SearchResultItem();
        String id = null;
        for (String s : sts) {
            if (NumberUtil.isNumeric(s)) {
                id = s;
                break;
            }
        }
        resultItem.setId(id);
        //获取名称
        String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim();
        resultItem.setName(name);
        String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim();
        //解析年份
        sts = cast.split("/");
        String year = null;
        List<String> subCastList = new ArrayList<>();
        for (String s : sts) {
            s = s.trim();
            if (NumberUtil.isNumeric(s)) {
                year = s;
            } else {
                if (s.length() > 1) {
                    //演职员表代表
                    subCastList.add(s);
                }
            }
        }
        resultItem.setYear(year);
        resultItem.setSubCastList(subCastList);
        return resultItem;
    }
    public static void saveSearch(String name, String path) throws Exception {
        Document doc = Jsoup
                .connect(
                        baseUrl + "q="
                                + URLEncoder.encode(name, "UTF-8"))
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
                .header("Host", "www.douban.com")
                .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
                .header("sec-ch-ua-mobile", "?0")
                .header("sec-ch-ua-platform", "\"Windows\"")
                .header("Sec-Fetch-Dest", "document")
                .header("Sec-Fetch-Mode", "navigate")
                .header("Sec-Fetch-Site", "none")
                .header("Sec-Fetch-User", "?1")
                .header("Upgrade-Insecure-Requests", "1")
                .userAgent(
                        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
                .timeout(20000).get();
        Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
        String html = el.html();
        byte[] bs = html.getBytes("UTF-8");
        FileUtil.saveAsFile(new ByteArrayInputStream(bs), path);
    }
    private static DouBanStar getStarInfo(Element item) {
        DouBanStar star = new DouBanStar();
        String avatarStyle = item.getElementsByClass("avatar").get(0).attr("style");
        String avatar = avatarStyle.replace("background-image: url(", "").replace(")", "").trim();
        String name = item.getElementsByTag("a").get(0).attr("title");
        if (name.contains(" ")) {
            String chaineseName = name.split(" ")[0].trim();
            String englishName = name.substring(chaineseName.length()).trim();
            star.setChineseName(chaineseName);
            star.setEnglishName(englishName);
        }
        String href = item.getElementsByTag("a").get(0).attr("href");
        if (href.endsWith("/"))
            href = href.substring(0, href.length() - 1);
        String[] sts = href.split("/");
        String id = sts[sts.length - 1];
        if (!avatar.contains("default"))
            star.setAvatar(avatar);
        star.setId(id);
        star.setUrl(href);
        return star;
    }
    private static List<DouBanStar> getStarList(Element root) {
        Elements items = root.getElementsByTag("li");
        List<DouBanStar> list = new ArrayList<>();
        for (int i = 0; i < items.size(); i++) {
            Element item = items.get(i);
            DouBanStar star = getStarInfo(item);
            if (star != null) {
                list.add(star);
            }
        }
        return list;
    }
    public static String getDetail(String id) {
        String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id);
        Map<String, String> headers = new HashMap<>();
        headers.put("Accept", "application/json");
        headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id));
        headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"");
        headers.put("sec-ch-ua-mobile", "?1");
        headers.put("sec-ch-ua-platform", "Android");
        headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36");
        String result = HttpUtil.get(url, new HashMap<>(), headers);
        return result;
    }
    public static void saveDetail(String id) throws Exception {
        String result = getDetail(id);
        JSONObject object = JSONObject.fromObject(result);
        result = object.toString();
        byte[] bs = result.getBytes("UTF-8");
        FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json");
    }
    //获取电影的影人信息
    public static Celebrities getMovieStars(String movieId) throws IOException {
        String url = String.format("https://movie.douban.com/subject/%s/celebrities", movieId);
        Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36").get();
        Elements eles = doc.getElementById("celebrities").getElementsByClass("list-wrapper");
        Celebrities celebrities = new Celebrities();
        for (int i = 0; i < eles.size(); i++) {
            String type = eles.get(i).getElementsByTag("h2").get(0).ownText();
            if (type.contains("导演")) {
                List<DouBanStar> list = getStarList(eles.get(i));
                celebrities.setDirectors(list);
            } else if (type.contains("演员")) {
                List<DouBanStar> list = getStarList(eles.get(i));
                celebrities.setStars(list);
            }
        }
        return celebrities;
    }
    static class Celebrities {
        private List<DouBanStar> directors;
        private List<DouBanStar> stars;
        public List<DouBanStar> getDirectors() {
            return directors;
        }
        public void setDirectors(List<DouBanStar> directors) {
            this.directors = directors;
        }
        public List<DouBanStar> getStars() {
            return stars;
        }
        public void setStars(List<DouBanStar> stars) {
            this.stars = stars;
        }
    }
    static class DouBanStar {
        private String id;
        private String chineseName;
        private String englishName;
        private String avatar;
        private String url;
        public String getId() {
            return id;
        }
        public void setId(String id) {
            this.id = id;
        }
        public String getChineseName() {
            return chineseName;
        }
        public void setChineseName(String chineseName) {
            this.chineseName = chineseName;
        }
        public String getEnglishName() {
            return englishName;
        }
        public void setEnglishName(String englishName) {
            this.englishName = englishName;
        }
        public String getAvatar() {
            return avatar;
        }
        public void setAvatar(String avatar) {
            this.avatar = avatar;
        }
        public String getUrl() {
            return url;
        }
        public void setUrl(String url) {
            this.url = url;
        }
    }
    //搜索结果
    static class SearchResultItem {
        private String id;
        private String name;
        private String year;
        private List<String> subCastList;
        public String getId() {
            return id;
        }
        public void setId(String id) {
            this.id = id;
        }
        public String getName() {
            return name;
        }
        public void setName(String name) {
            this.name = name;
        }
        public String getYear() {
            return year;
        }
        public void setYear(String year) {
            this.year = year;
        }
        public List<String> getSubCastList() {
            return subCastList;
        }
        public void setSubCastList(List<String> subCastList) {
            this.subCastList = subCastList;
        }
    }
    public static void main(String[] args) throws Exception {
//        DouBanUtil.saveSearch("奔跑吧");
//        String id = "35400242";
//        try {
//            DouBanUtil.saveDetail(id);
//        } catch (Exception e) {
//            e.printStackTrace();
//        }
    }
}