package com.yeshi.buwan.util.video.web; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import javax.script.Invocable; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import java.lang.reflect.Type; import java.util.List; public class YouKuWebUtil { public static void main(String[] args) throws Exception { List list = parseCategoryList("https://www.youku.com/category/show/c_97_s_6.html?theme=dark"); System.out.println(list); } /** * 解析专辑列表 * * @param url */ public static List parseCategoryList(String url) throws Exception { Document document = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36").timeout(60000).get(); Elements els = document.getElementsByTag("script"); for (int i = 0; i < els.size(); i++) { String value = els.get(i).html(); if (value.contains("window.__INITIAL_DATA__")) { System.out.println(value); return parseVideoList(value); } } return null; } private static List parseVideoList(String dtaa) throws ScriptException, NoSuchMethodException { String script = "var _window={};" + dtaa.replace("window.", "_window.").replace("document.", "_document."); script += ";function getData(){return JSON.stringify(_window.__INITIAL_DATA__.categoryVideos);}"; ScriptEngineManager manager = new ScriptEngineManager(); ScriptEngine jsEngine = manager.getEngineByName("javascript"); try { jsEngine.eval(script); } catch (ScriptException e) { e.printStackTrace(); } Gson gson = new Gson(); if (jsEngine instanceof Invocable) { Invocable in = (Invocable) jsEngine; Object data = in.invokeFunction("getData"); Type type = new TypeToken>() { }.getType(); List list = gson.fromJson(data.toString(), type); if (list != null) { for (YouKuCoverInfo info : list) { info.setShowId(parseShowIdFromVideoLink(info.getVideoLink())); } } return list; } return null; } private static String parseShowIdFromVideoLink(String videoLink) { if (videoLink.indexOf("/id_") > -1) { return videoLink.substring(videoLink.indexOf("/id_"), videoLink.indexOf(".htm")).replace("/id_", "").trim(); } return null; } public static class YouKuCoverInfo { private String summaryType; private String access; private String type; private String showThumb; private String img; private String summary; private String title; private String subTitle; private String rightTagText; private String videoId; private String videoLink; private String showId; public String getShowId() { return showId; } public void setShowId(String showId) { this.showId = showId; } public String getSummaryType() { return summaryType; } public void setSummaryType(String summaryType) { this.summaryType = summaryType; } public String getAccess() { return access; } public void setAccess(String access) { this.access = access; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getShowThumb() { return showThumb; } public void setShowThumb(String showThumb) { this.showThumb = showThumb; } public String getImg() { return img; } public void setImg(String img) { this.img = img; } public String getSummary() { return summary; } public void setSummary(String summary) { this.summary = summary; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getSubTitle() { return subTitle; } public void setSubTitle(String subTitle) { this.subTitle = subTitle; } public String getRightTagText() { return rightTagText; } public void setRightTagText(String rightTagText) { this.rightTagText = rightTagText; } public String getVideoId() { return videoId; } public void setVideoId(String videoId) { this.videoId = videoId; } public String getVideoLink() { return videoLink; } public void setVideoLink(String videoLink) { this.videoLink = videoLink; } } }