package com.newvideo.util; import java.io.IOException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.persistence.Entity; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.newvideo.domain.VideoInfo; @Entity public class DouBanUtil { public static String baseUrl = "https://movie.douban.com/subject_search?cat=102&"; public static int maxCount = 1; public static List startSearch(String st) throws IOException { Document doc = Jsoup .connect( baseUrl + "search_text=" + URLEncoder.encode(st, "UTF-8")) .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .timeout(20000).get(); Element el = doc.getElementById("content"); Elements els = el.getElementsByTag("table"); List list = new ArrayList(); int count = els.size() >= maxCount ? maxCount : els.size(); for (int i = 0; i < count; i++) { try { String url = els.get(i).getElementsByTag("a").get(0) .attr("href"); LogUtil.i(url); Document detailDoc = Jsoup .connect(url) .timeout(20000) .userAgent( "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.81 Safari/537.36") .get(); Element detail = detailDoc.getElementById("content"); Element vinfo = detailDoc.getElementById("info"); String name = detail.getElementsByTag("h1").get(0) .getElementsByTag("span").get(0).text(); String year = ""; String picture = detail.getElementById("mainpic") .getElementsByTag("img").get(0).attr("src") .replace("/spst/", "/lpst/"); String actor = ""; try { Elements actors = detail .getElementsByAttributeValue("class", "actor") .get(0) .getElementsByAttributeValue("class", "attrs") .get(0).getElementsByTag("span").get(0) .getElementsByTag("a"); for (Element w : actors) { actor += w.text() + " "; } } catch (Exception e) { // e.printStackTrace(); } Elements beiz = detail.getElementsByAttributeValue("property", "v:genre"); String beizhu = ""; for (Element e : beiz) { beizhu += e.text() + " "; } String duration = "0"; try { detail.getElementsByAttributeValue("property", "v:runtime") .get(0).attr("content"); } catch (Exception e) { } String introduction = ""; try { introduction = detailDoc .getElementsByAttributeValue("property", "v:summary").get(0).text(); } catch (Exception e) { } String score = detailDoc .getElementsByAttributeValue("property", "v:average") .get(0).text(); String data = ""; try { data = detailDoc .getElementsByAttributeValue("property", "v:initialReleaseDate").get(0).text(); } catch (Exception e) { } try { score = detail .getElementsByAttributeValue("property", "v:average").get(0).text(); } catch (Exception e) { } String director = ""; try { Elements directors = detail.getElementsByAttributeValue( "rel", "v:directedBy"); for (int n = 0; n < directors.size(); n++) { director += directors.get(n).text() + " "; } } catch (Exception e) { } if (director.endsWith(" ")) director = director.substring(0, director.length() - 1); Pattern pattern = Pattern .compile("[0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}"); Matcher matcher = pattern.matcher(data); String dateStr = null; if (matcher.find()) { dateStr = matcher.group(0); } String month = "0"; String day = "0"; if (dateStr != null) { year = dateStr.split("-")[0]; month = dateStr.split("-")[1]; day = dateStr.split("-")[2]; } String area =""; String[] infoSt = vinfo.toString().split("
"); for (String ist : infoSt) { if (ist.contains("��Ƭ���")) { try{ int start = ist.indexOf(""); area = ist.substring(start + 7, ist.length()) .replace("\"", "").trim(); System.out.println(area); }catch(Exception e) { e.printStackTrace(); } break; } } VideoInfo info = new VideoInfo(); info.setName(name); info.setBeizhu(beizhu); info.setDuration(duration); info.setIntroduction(introduction); info.setMainActor(actor); info.setPicture(picture); info.setScore(score); info.setYear(year); info.setDay(day); info.setMonth(month); info.setDirector(director); info.setArea(area); list.add(info); } catch (Exception e) { e.printStackTrace(); } } return list; } }