| | |
| | | package com.yeshi.buwan.util; |
| | | |
| | | import java.io.IOException; |
| | | import java.net.URLEncoder; |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | import javax.persistence.Entity; |
| | | |
| | | import com.yeshi.buwan.domain.VideoInfo; |
| | | import net.sf.json.JSONObject; |
| | | import org.jsoup.Jsoup; |
| | | import org.jsoup.nodes.Document; |
| | | import org.jsoup.nodes.Element; |
| | | import org.jsoup.select.Elements; |
| | | import org.yeshi.utils.HttpUtil; |
| | | |
| | | import com.yeshi.buwan.domain.VideoInfo; |
| | | import javax.persistence.Entity; |
| | | import java.io.ByteArrayInputStream; |
| | | import java.io.IOException; |
| | | import java.net.URLDecoder; |
| | | import java.net.URLEncoder; |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | @Entity |
| | | public class DouBanUtil { |
| | | public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; |
| | | public static String baseUrl = "https://www.douban.com/search?cat=1002&"; |
| | | public static int maxCount = 1; |
| | | |
| | | /** |
| | |
| | | public static List<VideoInfo> startSearch(String st) throws IOException { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "search_text=" |
| | | baseUrl + "q=" |
| | | + URLEncoder.encode(st, "UTF-8")) |
| | | .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") |
| | | .header("Host", "www.douban.com") |
| | | .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") |
| | | .header("sec-ch-ua-mobile", "?0") |
| | | .header("sec-ch-ua-platform", "\"Windows\"") |
| | | .header("Sec-Fetch-Dest", "document") |
| | | .header("Sec-Fetch-Mode", "navigate") |
| | | .header("Sec-Fetch-Site", "none") |
| | | .header("Sec-Fetch-User", "?1") |
| | | .header("Upgrade-Insecure-Requests", "1") |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content"); |
| | | Elements els = el.getElementsByTag("table"); |
| | | List<VideoInfo> list = new ArrayList<VideoInfo>(); |
| | | Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); |
| | | Elements els = el.getElementsByClass("result"); |
| | | List<VideoInfo> list = new ArrayList<>(); |
| | | int count = els.size() >= maxCount ? maxCount : els.size(); |
| | | |
| | | for (int i = 0; i < count; i++) { |
| | | try { |
| | | String url = els.get(i).getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | //获取豆瓣ID |
| | | SearchResultItem resultItem = parseSearchResultItem(els.get(i)); |
| | | String url = ""; |
| | | |
| | | LogUtil.i(url); |
| | | Document detailDoc = Jsoup |
| | | .connect(url) |
| | |
| | | Element detail = detailDoc.getElementById("content"); |
| | | Element vinfo = detailDoc.getElementById("info"); |
| | | |
| | | String name = detail.getElementsByTag("h1").get(0) |
| | | .getElementsByTag("span").get(0).text(); |
| | | |
| | | String year = ""; |
| | | String picture = detail.getElementById("mainpic") |
| | |
| | | } |
| | | |
| | | VideoInfo info = new VideoInfo(); |
| | | info.setName(name); |
| | | info.setName(resultItem.getName()); |
| | | info.setBeizhu(beizhu); |
| | | info.setDuration(duration); |
| | | info.setIntroduction(introduction); |
| | |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | //解析搜索结果子项 |
| | | public static SearchResultItem parseSearchResultItem(Element ele) throws Exception { |
| | | String url = ele.getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | Map<String, String> params = HttpUtil.getPramsFromUrl(url); |
| | | String subUrl = params.get("url"); |
| | | subUrl = URLDecoder.decode(subUrl, "UTF-8"); |
| | | String[] sts = subUrl.split("/"); |
| | | |
| | | SearchResultItem resultItem = new SearchResultItem(); |
| | | |
| | | String id = null; |
| | | for (String s : sts) { |
| | | if (NumberUtil.isNumeric(s)) { |
| | | id = s; |
| | | break; |
| | | } |
| | | } |
| | | resultItem.setId(id); |
| | | //获取名称 |
| | | String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim(); |
| | | resultItem.setName(name); |
| | | |
| | | String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim(); |
| | | //解析年份 |
| | | sts = cast.split("/"); |
| | | String year = null; |
| | | List<String> subCastList = new ArrayList<>(); |
| | | for (String s : sts) { |
| | | s = s.trim(); |
| | | if (NumberUtil.isNumeric(s)) { |
| | | year = s; |
| | | } else { |
| | | if (s.length() > 1) { |
| | | //演职员表代表 |
| | | subCastList.add(s); |
| | | } |
| | | } |
| | | } |
| | | resultItem.setYear(year); |
| | | resultItem.setSubCastList(subCastList); |
| | | return resultItem; |
| | | } |
| | | |
| | | |
| | | public static void saveSearch(String name, String path) throws Exception { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "q=" |
| | | + URLEncoder.encode(name, "UTF-8")) |
| | | .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9") |
| | | .header("Host", "www.douban.com") |
| | | .header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"") |
| | | .header("sec-ch-ua-mobile", "?0") |
| | | .header("sec-ch-ua-platform", "\"Windows\"") |
| | | .header("Sec-Fetch-Dest", "document") |
| | | .header("Sec-Fetch-Mode", "navigate") |
| | | .header("Sec-Fetch-Site", "none") |
| | | .header("Sec-Fetch-User", "?1") |
| | | .header("Upgrade-Insecure-Requests", "1") |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0); |
| | | String html = el.html(); |
| | | |
| | | byte[] bs = html.getBytes("UTF-8"); |
| | | FileUtil.saveAsFile(new ByteArrayInputStream(bs), path); |
| | | } |
| | | |
| | | |
| | | private static DouBanStar getStarInfo(Element item) { |
| | | DouBanStar star = new DouBanStar(); |
| | |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | |
| | | public static String getDetail(String id) { |
| | | String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id); |
| | | Map<String, String> headers = new HashMap<>(); |
| | | headers.put("Accept", "application/json"); |
| | | headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id)); |
| | | headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\""); |
| | | headers.put("sec-ch-ua-mobile", "?1"); |
| | | headers.put("sec-ch-ua-platform", "Android"); |
| | | headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36"); |
| | | String result = HttpUtil.get(url, new HashMap<>(), headers); |
| | | return result; |
| | | } |
| | | |
| | | public static void saveDetail(String id) throws Exception { |
| | | String result = getDetail(id); |
| | | JSONObject object = JSONObject.fromObject(result); |
| | | result = object.toString(); |
| | | byte[] bs = result.getBytes("UTF-8"); |
| | | FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json"); |
| | | } |
| | | |
| | | //获取电影的影人信息 |
| | |
| | | } |
| | | |
| | | |
| | | public static void main(String[] args) throws IOException { |
| | | getMovieStars("26309788"); |
| | | //搜索结果 |
| | | static class SearchResultItem { |
| | | private String id; |
| | | private String name; |
| | | private String year; |
| | | private List<String> subCastList; |
| | | |
| | | public String getId() { |
| | | return id; |
| | | } |
| | | |
| | | public void setId(String id) { |
| | | this.id = id; |
| | | } |
| | | |
| | | public String getName() { |
| | | return name; |
| | | } |
| | | |
| | | public void setName(String name) { |
| | | this.name = name; |
| | | } |
| | | |
| | | public String getYear() { |
| | | return year; |
| | | } |
| | | |
| | | public void setYear(String year) { |
| | | this.year = year; |
| | | } |
| | | |
| | | public List<String> getSubCastList() { |
| | | return subCastList; |
| | | } |
| | | |
| | | public void setSubCastList(List<String> subCastList) { |
| | | this.subCastList = subCastList; |
| | | } |
| | | } |
| | | |
| | | |
| | | public static void main(String[] args) throws Exception { |
| | | |
| | | |
| | | // DouBanUtil.saveSearch("奔跑吧"); |
| | | // String id = "35400242"; |
| | | // try { |
| | | // DouBanUtil.saveDetail(id); |
| | | // } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | // } |
| | | } |
| | | |
| | | } |