package com.yeshi.buwan.util;
|
|
import com.yeshi.buwan.domain.VideoInfo;
|
import net.sf.json.JSONObject;
|
import org.jsoup.Jsoup;
|
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Element;
|
import org.jsoup.select.Elements;
|
import org.yeshi.utils.HttpUtil;
|
|
import javax.persistence.Entity;
|
import java.io.ByteArrayInputStream;
|
import java.io.IOException;
|
import java.net.URLDecoder;
|
import java.net.URLEncoder;
|
import java.util.ArrayList;
|
import java.util.HashMap;
|
import java.util.List;
|
import java.util.Map;
|
import java.util.regex.Matcher;
|
import java.util.regex.Pattern;
|
|
@Entity
|
public class DouBanUtil {
|
public static String baseUrl = "https://www.douban.com/search?cat=1002&";
|
public static int maxCount = 1;
|
|
/**
|
* @title:
|
* @description: 搜索
|
* @author Administrator
|
* @date 2021/9/22 17:46
|
*/
|
public static List<VideoInfo> startSearch(String st) throws IOException {
|
Document doc = Jsoup
|
.connect(
|
baseUrl + "q="
|
+ URLEncoder.encode(st, "UTF-8"))
|
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
.header("Host", "www.douban.com")
|
.header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
|
.header("sec-ch-ua-mobile", "?0")
|
.header("sec-ch-ua-platform", "\"Windows\"")
|
.header("Sec-Fetch-Dest", "document")
|
.header("Sec-Fetch-Mode", "navigate")
|
.header("Sec-Fetch-Site", "none")
|
.header("Sec-Fetch-User", "?1")
|
.header("Upgrade-Insecure-Requests", "1")
|
.userAgent(
|
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
|
.timeout(20000).get();
|
Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
|
Elements els = el.getElementsByClass("result");
|
List<VideoInfo> list = new ArrayList<>();
|
int count = els.size() >= maxCount ? maxCount : els.size();
|
|
for (int i = 0; i < count; i++) {
|
try {
|
//获取豆瓣ID
|
SearchResultItem resultItem = parseSearchResultItem(els.get(i));
|
String url = "";
|
|
LogUtil.i(url);
|
Document detailDoc = Jsoup
|
.connect(url)
|
.timeout(20000)
|
.userAgent(
|
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
|
.get();
|
Element detail = detailDoc.getElementById("content");
|
Element vinfo = detailDoc.getElementById("info");
|
|
|
String year = "";
|
String picture = detail.getElementById("mainpic")
|
.getElementsByTag("img").get(0).attr("src")
|
.replace("/spst/", "/lpst/");
|
String actor = "";
|
try {
|
Elements actors = detail
|
.getElementsByAttributeValue("class", "actor")
|
.get(0)
|
.getElementsByAttributeValue("class", "attrs")
|
.get(0).getElementsByTag("span").get(0)
|
.getElementsByTag("a");
|
|
for (Element w : actors) {
|
actor += w.text() + " ";
|
}
|
} catch (Exception e) {
|
// e.printStackTrace();
|
}
|
Elements beiz = detail.getElementsByAttributeValue("property",
|
"v:genre");
|
String beizhu = "";
|
for (Element e : beiz) {
|
beizhu += e.text() + " ";
|
}
|
|
String duration = "0";
|
try {
|
detail.getElementsByAttributeValue("property", "v:runtime")
|
.get(0).attr("content");
|
} catch (Exception e) {
|
|
}
|
String introduction = "";
|
try {
|
introduction = detailDoc
|
.getElementsByAttributeValue("property",
|
"v:summary").get(0).text();
|
} catch (Exception e) {
|
|
}
|
String score = detailDoc
|
.getElementsByAttributeValue("property", "v:average")
|
.get(0).text();
|
|
String data = "";
|
try {
|
data = detailDoc
|
.getElementsByAttributeValue("property",
|
"v:initialReleaseDate").get(0).text();
|
} catch (Exception e) {
|
|
}
|
try {
|
score = detail
|
.getElementsByAttributeValue("property",
|
"v:average").get(0).text();
|
} catch (Exception e) {
|
|
}
|
String director = "";
|
try {
|
Elements directors = detail.getElementsByAttributeValue(
|
"rel", "v:directedBy");
|
for (int n = 0; n < directors.size(); n++) {
|
director += directors.get(n).text() + " ";
|
}
|
|
} catch (Exception e) {
|
|
}
|
|
if (director.endsWith(" "))
|
director = director.substring(0, director.length() - 1);
|
Pattern pattern = Pattern
|
.compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}");
|
Matcher matcher = pattern.matcher(data);
|
String dateStr = null;
|
if (matcher.find()) {
|
dateStr = matcher.group(0);
|
}
|
String month = "0";
|
String day = "0";
|
if (dateStr != null) {
|
year = dateStr.split("-")[0];
|
month = dateStr.split("-")[1];
|
day = dateStr.split("-")[2];
|
}
|
String area = "";
|
String[] infoSt = vinfo.toString().split("<br />");
|
for (String ist : infoSt) {
|
if (ist.contains("��Ƭ���")) {
|
try {
|
int start = ist.indexOf("</span>");
|
area = ist.substring(start + 7, ist.length())
|
.replace("\"", "").trim();
|
System.out.println(area);
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
break;
|
}
|
}
|
|
VideoInfo info = new VideoInfo();
|
info.setName(resultItem.getName());
|
info.setBeizhu(beizhu);
|
info.setDuration(duration);
|
info.setIntroduction(introduction);
|
info.setMainActor(actor);
|
info.setPicture(picture);
|
info.setScore(score);
|
info.setYear(year);
|
info.setDay(day);
|
info.setMonth(month);
|
info.setDirector(director);
|
info.setArea(area);
|
list.add(info);
|
} catch (Exception e) {
|
e.printStackTrace();
|
}
|
}
|
return list;
|
}
|
|
//解析搜索结果子项
|
public static SearchResultItem parseSearchResultItem(Element ele) throws Exception {
|
String url = ele.getElementsByTag("a").get(0)
|
.attr("href");
|
Map<String, String> params = HttpUtil.getPramsFromUrl(url);
|
String subUrl = params.get("url");
|
subUrl = URLDecoder.decode(subUrl, "UTF-8");
|
String[] sts = subUrl.split("/");
|
|
SearchResultItem resultItem = new SearchResultItem();
|
|
String id = null;
|
for (String s : sts) {
|
if (NumberUtil.isNumeric(s)) {
|
id = s;
|
break;
|
}
|
}
|
resultItem.setId(id);
|
//获取名称
|
String name = ele.getElementsByClass("title").get(0).getElementsByTag("a").text().trim();
|
resultItem.setName(name);
|
|
String cast = ele.getElementsByClass("title").get(0).getElementsByClass("subject-cast").get(0).text().trim();
|
//解析年份
|
sts = cast.split("/");
|
String year = null;
|
List<String> subCastList = new ArrayList<>();
|
for (String s : sts) {
|
s = s.trim();
|
if (NumberUtil.isNumeric(s)) {
|
year = s;
|
} else {
|
if (s.length() > 1) {
|
//演职员表代表
|
subCastList.add(s);
|
}
|
}
|
}
|
resultItem.setYear(year);
|
resultItem.setSubCastList(subCastList);
|
return resultItem;
|
}
|
|
|
public static void saveSearch(String name, String path) throws Exception {
|
Document doc = Jsoup
|
.connect(
|
baseUrl + "q="
|
+ URLEncoder.encode(name, "UTF-8"))
|
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
|
.header("Host", "www.douban.com")
|
.header("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"")
|
.header("sec-ch-ua-mobile", "?0")
|
.header("sec-ch-ua-platform", "\"Windows\"")
|
.header("Sec-Fetch-Dest", "document")
|
.header("Sec-Fetch-Mode", "navigate")
|
.header("Sec-Fetch-Site", "none")
|
.header("Sec-Fetch-User", "?1")
|
.header("Upgrade-Insecure-Requests", "1")
|
.userAgent(
|
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36")
|
.timeout(20000).get();
|
Element el = doc.getElementById("content").getElementsByClass("search-result").get(0).getElementsByClass("result-list").get(0);
|
String html = el.html();
|
|
byte[] bs = html.getBytes("UTF-8");
|
FileUtil.saveAsFile(new ByteArrayInputStream(bs), path);
|
}
|
|
|
private static DouBanStar getStarInfo(Element item) {
|
DouBanStar star = new DouBanStar();
|
String avatarStyle = item.getElementsByClass("avatar").get(0).attr("style");
|
String avatar = avatarStyle.replace("background-image: url(", "").replace(")", "").trim();
|
String name = item.getElementsByTag("a").get(0).attr("title");
|
if (name.contains(" ")) {
|
String chaineseName = name.split(" ")[0].trim();
|
String englishName = name.substring(chaineseName.length()).trim();
|
star.setChineseName(chaineseName);
|
star.setEnglishName(englishName);
|
}
|
|
String href = item.getElementsByTag("a").get(0).attr("href");
|
if (href.endsWith("/"))
|
href = href.substring(0, href.length() - 1);
|
String[] sts = href.split("/");
|
String id = sts[sts.length - 1];
|
if (!avatar.contains("default"))
|
star.setAvatar(avatar);
|
star.setId(id);
|
star.setUrl(href);
|
return star;
|
}
|
|
private static List<DouBanStar> getStarList(Element root) {
|
Elements items = root.getElementsByTag("li");
|
List<DouBanStar> list = new ArrayList<>();
|
for (int i = 0; i < items.size(); i++) {
|
Element item = items.get(i);
|
DouBanStar star = getStarInfo(item);
|
if (star != null) {
|
list.add(star);
|
}
|
}
|
return list;
|
}
|
|
|
public static String getDetail(String id) {
|
String url = String.format("https://m.douban.com/rexxar/api/v2/tv/%s?ck=&for_mobile=1", id);
|
Map<String, String> headers = new HashMap<>();
|
headers.put("Accept", "application/json");
|
headers.put("Referer", String.format("https://m.douban.com/movie/subject/%s/", id));
|
headers.put("sec-ch-ua", "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\", \"Google Chrome\";v=\"99\"");
|
headers.put("sec-ch-ua-mobile", "?1");
|
headers.put("sec-ch-ua-platform", "Android");
|
headers.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36");
|
String result = HttpUtil.get(url, new HashMap<>(), headers);
|
return result;
|
}
|
|
public static void saveDetail(String id) throws Exception {
|
String result = getDetail(id);
|
JSONObject object = JSONObject.fromObject(result);
|
result = object.toString();
|
byte[] bs = result.getBytes("UTF-8");
|
FileUtil.saveAsFile(new ByteArrayInputStream(bs), "F:\\豆瓣影视信息\\" + id + ".json");
|
}
|
|
//获取电影的影人信息
|
public static Celebrities getMovieStars(String movieId) throws IOException {
|
String url = String.format("https://movie.douban.com/subject/%s/celebrities", movieId);
|
Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36").get();
|
|
Elements eles = doc.getElementById("celebrities").getElementsByClass("list-wrapper");
|
|
Celebrities celebrities = new Celebrities();
|
|
for (int i = 0; i < eles.size(); i++) {
|
String type = eles.get(i).getElementsByTag("h2").get(0).ownText();
|
if (type.contains("导演")) {
|
List<DouBanStar> list = getStarList(eles.get(i));
|
celebrities.setDirectors(list);
|
} else if (type.contains("演员")) {
|
List<DouBanStar> list = getStarList(eles.get(i));
|
celebrities.setStars(list);
|
}
|
}
|
return celebrities;
|
}
|
|
|
static class Celebrities {
|
|
private List<DouBanStar> directors;
|
private List<DouBanStar> stars;
|
|
public List<DouBanStar> getDirectors() {
|
return directors;
|
}
|
|
public void setDirectors(List<DouBanStar> directors) {
|
this.directors = directors;
|
}
|
|
public List<DouBanStar> getStars() {
|
return stars;
|
}
|
|
public void setStars(List<DouBanStar> stars) {
|
this.stars = stars;
|
}
|
}
|
|
|
static class DouBanStar {
|
|
private String id;
|
private String chineseName;
|
private String englishName;
|
private String avatar;
|
private String url;
|
|
public String getId() {
|
return id;
|
}
|
|
public void setId(String id) {
|
this.id = id;
|
}
|
|
public String getChineseName() {
|
return chineseName;
|
}
|
|
public void setChineseName(String chineseName) {
|
this.chineseName = chineseName;
|
}
|
|
public String getEnglishName() {
|
return englishName;
|
}
|
|
public void setEnglishName(String englishName) {
|
this.englishName = englishName;
|
}
|
|
public String getAvatar() {
|
return avatar;
|
}
|
|
public void setAvatar(String avatar) {
|
this.avatar = avatar;
|
}
|
|
public String getUrl() {
|
return url;
|
}
|
|
public void setUrl(String url) {
|
this.url = url;
|
}
|
}
|
|
|
//搜索结果
|
static class SearchResultItem {
|
private String id;
|
private String name;
|
private String year;
|
private List<String> subCastList;
|
|
public String getId() {
|
return id;
|
}
|
|
public void setId(String id) {
|
this.id = id;
|
}
|
|
public String getName() {
|
return name;
|
}
|
|
public void setName(String name) {
|
this.name = name;
|
}
|
|
public String getYear() {
|
return year;
|
}
|
|
public void setYear(String year) {
|
this.year = year;
|
}
|
|
public List<String> getSubCastList() {
|
return subCastList;
|
}
|
|
public void setSubCastList(List<String> subCastList) {
|
this.subCastList = subCastList;
|
}
|
}
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
// DouBanUtil.saveSearch("奔跑吧");
|
// String id = "35400242";
|
// try {
|
// DouBanUtil.saveDetail(id);
|
// } catch (Exception e) {
|
// e.printStackTrace();
|
// }
|
}
|
|
}
|