| | |
| | | |
| | | @Entity |
| | | public class DouBanUtil { |
| | | public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; |
| | | public static int maxCount = 1; |
| | | public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; |
| | | public static int maxCount = 1; |
| | | |
| | | public static List<VideoInfo> startSearch(String st) throws IOException { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "search_text=" |
| | | + URLEncoder.encode(st, "UTF-8")) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content"); |
| | | Elements els = el.getElementsByTag("table"); |
| | | List<VideoInfo> list = new ArrayList<VideoInfo>(); |
| | | int count = els.size() >= maxCount ? maxCount : els.size(); |
| | | /** |
| | | * @title: |
| | | * @description: 搜索 |
| | | * @author Administrator |
| | | * @date 2021/9/22 17:46 |
| | | */ |
| | | public static List<VideoInfo> startSearch(String st) throws IOException { |
| | | Document doc = Jsoup |
| | | .connect( |
| | | baseUrl + "search_text=" |
| | | + URLEncoder.encode(st, "UTF-8")) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .timeout(20000).get(); |
| | | Element el = doc.getElementById("content"); |
| | | Elements els = el.getElementsByTag("table"); |
| | | List<VideoInfo> list = new ArrayList<VideoInfo>(); |
| | | int count = els.size() >= maxCount ? maxCount : els.size(); |
| | | |
| | | for (int i = 0; i < count; i++) { |
| | | try { |
| | | String url = els.get(i).getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | LogUtil.i(url); |
| | | Document detailDoc = Jsoup |
| | | .connect(url) |
| | | .timeout(20000) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .get(); |
| | | Element detail = detailDoc.getElementById("content"); |
| | | Element vinfo = detailDoc.getElementById("info"); |
| | | for (int i = 0; i < count; i++) { |
| | | try { |
| | | String url = els.get(i).getElementsByTag("a").get(0) |
| | | .attr("href"); |
| | | LogUtil.i(url); |
| | | Document detailDoc = Jsoup |
| | | .connect(url) |
| | | .timeout(20000) |
| | | .userAgent( |
| | | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") |
| | | .get(); |
| | | Element detail = detailDoc.getElementById("content"); |
| | | Element vinfo = detailDoc.getElementById("info"); |
| | | |
| | | String name = detail.getElementsByTag("h1").get(0) |
| | | .getElementsByTag("span").get(0).text(); |
| | | String name = detail.getElementsByTag("h1").get(0) |
| | | .getElementsByTag("span").get(0).text(); |
| | | |
| | | String year = ""; |
| | | String picture = detail.getElementById("mainpic") |
| | | .getElementsByTag("img").get(0).attr("src") |
| | | .replace("/spst/", "/lpst/"); |
| | | String actor = ""; |
| | | try { |
| | | Elements actors = detail |
| | | .getElementsByAttributeValue("class", "actor") |
| | | .get(0) |
| | | .getElementsByAttributeValue("class", "attrs") |
| | | .get(0).getElementsByTag("span").get(0) |
| | | .getElementsByTag("a"); |
| | | String year = ""; |
| | | String picture = detail.getElementById("mainpic") |
| | | .getElementsByTag("img").get(0).attr("src") |
| | | .replace("/spst/", "/lpst/"); |
| | | String actor = ""; |
| | | try { |
| | | Elements actors = detail |
| | | .getElementsByAttributeValue("class", "actor") |
| | | .get(0) |
| | | .getElementsByAttributeValue("class", "attrs") |
| | | .get(0).getElementsByTag("span").get(0) |
| | | .getElementsByTag("a"); |
| | | |
| | | for (Element w : actors) { |
| | | actor += w.text() + " "; |
| | | } |
| | | } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | } |
| | | Elements beiz = detail.getElementsByAttributeValue("property", |
| | | "v:genre"); |
| | | String beizhu = ""; |
| | | for (Element e : beiz) { |
| | | beizhu += e.text() + " "; |
| | | } |
| | | for (Element w : actors) { |
| | | actor += w.text() + " "; |
| | | } |
| | | } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | } |
| | | Elements beiz = detail.getElementsByAttributeValue("property", |
| | | "v:genre"); |
| | | String beizhu = ""; |
| | | for (Element e : beiz) { |
| | | beizhu += e.text() + " "; |
| | | } |
| | | |
| | | String duration = "0"; |
| | | try { |
| | | detail.getElementsByAttributeValue("property", "v:runtime") |
| | | .get(0).attr("content"); |
| | | } catch (Exception e) { |
| | | String duration = "0"; |
| | | try { |
| | | detail.getElementsByAttributeValue("property", "v:runtime") |
| | | .get(0).attr("content"); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | String introduction = ""; |
| | | try { |
| | | introduction = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:summary").get(0).text(); |
| | | } catch (Exception e) { |
| | | } |
| | | String introduction = ""; |
| | | try { |
| | | introduction = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:summary").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | String score = detailDoc |
| | | .getElementsByAttributeValue("property", "v:average") |
| | | .get(0).text(); |
| | | } |
| | | String score = detailDoc |
| | | .getElementsByAttributeValue("property", "v:average") |
| | | .get(0).text(); |
| | | |
| | | String data = ""; |
| | | try { |
| | | data = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:initialReleaseDate").get(0).text(); |
| | | } catch (Exception e) { |
| | | String data = ""; |
| | | try { |
| | | data = detailDoc |
| | | .getElementsByAttributeValue("property", |
| | | "v:initialReleaseDate").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | try { |
| | | score = detail |
| | | .getElementsByAttributeValue("property", |
| | | "v:average").get(0).text(); |
| | | } catch (Exception e) { |
| | | } |
| | | try { |
| | | score = detail |
| | | .getElementsByAttributeValue("property", |
| | | "v:average").get(0).text(); |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | String director = ""; |
| | | try { |
| | | Elements directors = detail.getElementsByAttributeValue( |
| | | "rel", "v:directedBy"); |
| | | for (int n = 0; n < directors.size(); n++) { |
| | | director += directors.get(n).text() + " "; |
| | | } |
| | | } |
| | | String director = ""; |
| | | try { |
| | | Elements directors = detail.getElementsByAttributeValue( |
| | | "rel", "v:directedBy"); |
| | | for (int n = 0; n < directors.size(); n++) { |
| | | director += directors.get(n).text() + " "; |
| | | } |
| | | |
| | | } catch (Exception e) { |
| | | } catch (Exception e) { |
| | | |
| | | } |
| | | } |
| | | |
| | | if (director.endsWith(" ")) |
| | | director = director.substring(0, director.length() - 1); |
| | | Pattern pattern = Pattern |
| | | .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); |
| | | Matcher matcher = pattern.matcher(data); |
| | | String dateStr = null; |
| | | if (matcher.find()) { |
| | | dateStr = matcher.group(0); |
| | | } |
| | | String month = "0"; |
| | | String day = "0"; |
| | | if (dateStr != null) { |
| | | year = dateStr.split("-")[0]; |
| | | month = dateStr.split("-")[1]; |
| | | day = dateStr.split("-")[2]; |
| | | } |
| | | String area =""; |
| | | String[] infoSt = vinfo.toString().split("<br />"); |
| | | for (String ist : infoSt) { |
| | | if (ist.contains("��Ƭ���")) { |
| | | try{ |
| | | int start = ist.indexOf("</span>"); |
| | | area = ist.substring(start + 7, ist.length()) |
| | | .replace("\"", "").trim(); |
| | | System.out.println(area); |
| | | }catch(Exception e) |
| | | { |
| | | e.printStackTrace(); |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | if (director.endsWith(" ")) |
| | | director = director.substring(0, director.length() - 1); |
| | | Pattern pattern = Pattern |
| | | .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); |
| | | Matcher matcher = pattern.matcher(data); |
| | | String dateStr = null; |
| | | if (matcher.find()) { |
| | | dateStr = matcher.group(0); |
| | | } |
| | | String month = "0"; |
| | | String day = "0"; |
| | | if (dateStr != null) { |
| | | year = dateStr.split("-")[0]; |
| | | month = dateStr.split("-")[1]; |
| | | day = dateStr.split("-")[2]; |
| | | } |
| | | String area = ""; |
| | | String[] infoSt = vinfo.toString().split("<br />"); |
| | | for (String ist : infoSt) { |
| | | if (ist.contains("��Ƭ���")) { |
| | | try { |
| | | int start = ist.indexOf("</span>"); |
| | | area = ist.substring(start + 7, ist.length()) |
| | | .replace("\"", "").trim(); |
| | | System.out.println(area); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | |
| | | VideoInfo info = new VideoInfo(); |
| | | info.setName(name); |
| | | info.setBeizhu(beizhu); |
| | | info.setDuration(duration); |
| | | info.setIntroduction(introduction); |
| | | info.setMainActor(actor); |
| | | info.setPicture(picture); |
| | | info.setScore(score); |
| | | info.setYear(year); |
| | | info.setDay(day); |
| | | info.setMonth(month); |
| | | info.setDirector(director); |
| | | info.setArea(area); |
| | | list.add(info); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | VideoInfo info = new VideoInfo(); |
| | | info.setName(name); |
| | | info.setBeizhu(beizhu); |
| | | info.setDuration(duration); |
| | | info.setIntroduction(introduction); |
| | | info.setMainActor(actor); |
| | | info.setPicture(picture); |
| | | info.setScore(score); |
| | | info.setYear(year); |
| | | info.setDay(day); |
| | | info.setMonth(month); |
| | | info.setDirector(director); |
| | | info.setArea(area); |
| | | list.add(info); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | private static DouBanStar getStarInfo(Element item) { |
| | | DouBanStar star = new DouBanStar(); |
| | | String avatarStyle = item.getElementsByClass("avatar").get(0).attr("style"); |
| | | String avatar = avatarStyle.replace("background-image: url(", "").replace(")", "").trim(); |
| | | String name = item.getElementsByTag("a").get(0).attr("title"); |
| | | if (name.contains(" ")) { |
| | | String chaineseName = name.split(" ")[0].trim(); |
| | | String englishName = name.substring(chaineseName.length()).trim(); |
| | | star.setChineseName(chaineseName); |
| | | star.setEnglishName(englishName); |
| | | } |
| | | |
| | | String href = item.getElementsByTag("a").get(0).attr("href"); |
| | | if (href.endsWith("/")) |
| | | href = href.substring(0, href.length() - 1); |
| | | String[] sts = href.split("/"); |
| | | String id = sts[sts.length - 1]; |
| | | if (!avatar.contains("default")) |
| | | star.setAvatar(avatar); |
| | | star.setId(id); |
| | | star.setUrl(href); |
| | | return star; |
| | | } |
| | | |
| | | private static List<DouBanStar> getStarList(Element root) { |
| | | Elements items = root.getElementsByTag("li"); |
| | | List<DouBanStar> list = new ArrayList<>(); |
| | | for (int i = 0; i < items.size(); i++) { |
| | | Element item = items.get(i); |
| | | DouBanStar star = getStarInfo(item); |
| | | if (star != null) { |
| | | list.add(star); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | //获取电影的影人信息 |
| | | public static Celebrities getMovieStars(String movieId) throws IOException { |
| | | String url = String.format("https://movie.douban.com/subject/%s/celebrities", movieId); |
| | | Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36").get(); |
| | | |
| | | Elements eles = doc.getElementById("celebrities").getElementsByClass("list-wrapper"); |
| | | |
| | | Celebrities celebrities = new Celebrities(); |
| | | |
| | | for (int i = 0; i < eles.size(); i++) { |
| | | String type = eles.get(i).getElementsByTag("h2").get(0).ownText(); |
| | | if (type.contains("导演")) { |
| | | List<DouBanStar> list = getStarList(eles.get(i)); |
| | | celebrities.setDirectors(list); |
| | | } else if (type.contains("演员")) { |
| | | List<DouBanStar> list = getStarList(eles.get(i)); |
| | | celebrities.setStars(list); |
| | | } |
| | | } |
| | | return celebrities; |
| | | } |
| | | |
| | | |
| | | static class Celebrities { |
| | | |
| | | private List<DouBanStar> directors; |
| | | private List<DouBanStar> stars; |
| | | |
| | | public List<DouBanStar> getDirectors() { |
| | | return directors; |
| | | } |
| | | |
| | | public void setDirectors(List<DouBanStar> directors) { |
| | | this.directors = directors; |
| | | } |
| | | |
| | | public List<DouBanStar> getStars() { |
| | | return stars; |
| | | } |
| | | |
| | | public void setStars(List<DouBanStar> stars) { |
| | | this.stars = stars; |
| | | } |
| | | } |
| | | |
| | | |
| | | static class DouBanStar { |
| | | |
| | | private String id; |
| | | private String chineseName; |
| | | private String englishName; |
| | | private String avatar; |
| | | private String url; |
| | | |
| | | public String getId() { |
| | | return id; |
| | | } |
| | | |
| | | public void setId(String id) { |
| | | this.id = id; |
| | | } |
| | | |
| | | public String getChineseName() { |
| | | return chineseName; |
| | | } |
| | | |
| | | public void setChineseName(String chineseName) { |
| | | this.chineseName = chineseName; |
| | | } |
| | | |
| | | public String getEnglishName() { |
| | | return englishName; |
| | | } |
| | | |
| | | public void setEnglishName(String englishName) { |
| | | this.englishName = englishName; |
| | | } |
| | | |
| | | public String getAvatar() { |
| | | return avatar; |
| | | } |
| | | |
| | | public void setAvatar(String avatar) { |
| | | this.avatar = avatar; |
| | | } |
| | | |
| | | public String getUrl() { |
| | | return url; |
| | | } |
| | | |
| | | public void setUrl(String url) { |
| | | this.url = url; |
| | | } |
| | | } |
| | | |
| | | |
| | | public static void main(String[] args) throws IOException { |
| | | getMovieStars("26309788"); |
| | | } |
| | | |
| | | } |