From f537abe9f3646c739beaf15076246a2f71a347e9 Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 20 二月 2025 16:57:55 +0800 Subject: [PATCH] IOS广告增加区域屏蔽 --- src/main/java/com/yeshi/buwan/videos/hanmi/HanmiApiUtil.java | 266 +++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 206 insertions(+), 60 deletions(-) diff --git a/src/main/java/com/yeshi/buwan/videos/hanmi/HanmiApiUtil.java b/src/main/java/com/yeshi/buwan/videos/hanmi/HanmiApiUtil.java index 6c080d8..d24b862 100644 --- a/src/main/java/com/yeshi/buwan/videos/hanmi/HanmiApiUtil.java +++ b/src/main/java/com/yeshi/buwan/videos/hanmi/HanmiApiUtil.java @@ -3,16 +3,17 @@ import com.yeshi.buwan.util.StringUtil; import com.yeshi.buwan.videos.hanmi.entity.HanmiShow; import com.yeshi.buwan.videos.hanmi.entity.HanmiShowEpisode; +import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.io.UnsupportedEncodingException; +import java.net.URI; +import java.net.URLEncoder; +import java.util.*; public class HanmiApiUtil { @@ -22,99 +23,208 @@ return parseShowDetail(show); } + private static Map<String, String> getHeaders() { + Map<String, String> headers = new HashMap<>(); + headers.put("sec-fetch-dest", "document"); + headers.put("sec-fetch-mode", "navigate"); + headers.put("sec-fetch-site", "same-origin"); + headers.put("sec-fetch-user", "?1"); + headers.put("upgrade-insecure-requests", "1"); + return headers; + } + + private static Document getDoc(String link, Map<String, String> headres) throws IOException { + Connection connection = Jsoup.connect(link).timeout(60000); + if (headres != null) + for (Iterator<String> its = headres.keySet().iterator(); its.hasNext(); ) { + String key = its.next(); + connection = connection.header(key, headres.get(key)); + } + Document doc = connection + .userAgent("Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1").get(); + return doc; + } + public static HanmiShow parseShowDetail(HanmiShow show) throws Exception { - if (show.getUrl() == null || !show.getUrl().startsWith("https://www.hmtv.me/show/")) { + if (show.getUrl() == null || !show.getUrl().startsWith("https://www.wztaichuan.com/vod/detail")) { throw new Exception("閾炬帴涓嶅悎娉�"); } - Document doc = Jsoup.connect(show.getUrl()).timeout(60000).referrer("https://www.hmtv.me/hanju").userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36").get(); - Element root = doc.getElementsByClass("video-content").get(0); - Element titleItem = root.getElementsByClass("article-title").get(0); + URI uri = URI.create(show.getUrl()); - //鏍囬 - String title = titleItem.getElementsByClass("item-title").get(0).ownText(); + Document doc = getDoc(show.getUrl(), getHeaders()); - String year = titleItem.getElementsByClass("item-year").get(0).ownText(); + Element root = doc.getElementsByClass("stui-content__thumb").get(0).parent(); + String picture = doc.getElementsByClass("stui-content__thumb").get(0).getElementsByTag("img").get(0).attr("data-original"); //鑺傜洰淇℃伅 - Element videoBox = root.getElementsByClass("video_box").get(0); + Element videoInfo = root.getElementsByClass("stui-content__detail").get(0); - String picture = videoBox.getElementsByClass("video_img").get(0).getElementsByTag("img").attr("src"); + Element titleItem = videoInfo.getElementsByClass("title").get(0); - Element videoInfo = videoBox.getElementsByClass("video_info").get(0); - String videoInfoStr = videoInfo.html(); - String[] sts = videoInfoStr.split("<br>"); - Map<String, String> infos = new HashMap<>(); - for (String st : sts) { - Document d = Jsoup.parse(st); - String value = d.text(); - if (value.indexOf(":") > -1) - infos.put(value.substring(0, value.indexOf(":")).trim(), value.substring(value.indexOf(":") + 1).trim()); + //鏍囬 + String title = null; + try { + title = titleItem.ownText(); + } catch (IndexOutOfBoundsException e) { } + String score = null; + try { + score = titleItem.getElementsByClass("score").get(0).ownText(); + } catch (IndexOutOfBoundsException e) { + } + show.setScore(score); + + Map<String, String> infos = new HashMap<>(); + Elements datas = videoInfo.getElementsByClass("data"); + for(int i=0;i<datas.size();i++){ + Elements data_items = datas.get(i).getElementsByClass("text-muted"); + for(int j=0; j<data_items.size(); j++){ + String key = data_items.get(j).ownText().trim(); + String value = null; + switch (key){ + case "涓绘紨锛�": + List<String> actors=new ArrayList<>(); + Elements temps = data_items.get(j).parent().getElementsByTag("a"); + for(Iterator<Element> its = temps.iterator(); its.hasNext();) + { + actors.add( its.next().ownText()); + } + value = StringUtil.join(actors, ","); + break; + default: + if(data_items.get(j).nextElementSibling()!=null) { + value = data_items.get(j).nextElementSibling().ownText(); + } else{ + value = data_items.get(j).parent().ownText(); + } + } + infos.put(key,value); + } + } + String desc = videoInfo.getElementsByClass("desc").get(0).ownText(); //鍓ч泦鍒楄〃 - Element eposide = root.getElementsByClass("video_list_li").get(0); + Element eposide = doc.getElementsByClass("playlist").get(0).getElementsByClass("stui-content__playlist").get(0); Elements eposides = eposide.getElementsByTag("a"); List<HanmiShowEpisode> episodeList = new ArrayList<>(); - for (int i = 0; i < eposides.size(); i++) { - String href = eposides.get(i).attr("href"); - String tag = eposides.get(i).ownText(); + + + //鐢靛奖 + if (show.getType() != null && show.getType().contains("褰�")) { + int index = 0; + String href = eposides.get(index).attr("href"); HanmiShowEpisode ep = new HanmiShowEpisode(); - ep.setOrderBy(i + 1); - ep.setPlayUrl("https://www.hmtv.me" + href); - ep.setTag(tag); + ep.setOrderBy(0); + ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); + ep.setTag(show.getTitle() != null ? show.getTitle() : title); episodeList.add(ep); + } else { + for (int i = 0; i < eposides.size(); i++) { + String href = eposides.get(i).attr("href"); + String tag = eposides.get(i).ownText(); + HanmiShowEpisode ep = new HanmiShowEpisode(); + ep.setOrderBy(i + 1); + ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); + ep.setTag(tag); + episodeList.add(ep); + } } - - //绠�浠� - String desc = root.getElementsByClass("jianjie").get(0).text(); - if (show.getTitle() == null) show.setTitle(title.split(" ")[0]); - show.setYear(year.replace("(", "").replace(")", "")); - show.setPicture(picture); - if (infos.get("涓绘紨") != null) - show.setActors(infos.get("涓绘紨").replace("/", ",")); - if (infos.get("瀵兼紨") != null) - show.setDirector(infos.get("瀵兼紨")); - if (infos.get("绫诲瀷") != null) - show.setCategorys(infos.get("绫诲瀷")); - if (infos.get("鍥藉/鍦板尯") != null) - show.setArea(infos.get("鍥藉/鍦板尯")); - if (infos.get("棣栨挱") != null) - show.setRelaseDate(infos.get("棣栨挱").substring(0, infos.get("棣栨挱").indexOf("(") > -1 ? infos.get("棣栨挱").indexOf("(") : infos.get("棣栨挱").length())); - show.setId(show.getUrl().replace("https://www.hmtv.me/show/", "").trim()); + show.setPicture(picture); + if (infos.get("涓绘紨锛�") != null) + show.setActors(infos.get("涓绘紨锛�"). + replace("/", ",")); + if (infos.get("瀵兼紨锛�") != null) + show.setDirector(infos.get("瀵兼紨锛�")); + if (infos.get("绫诲瀷锛�") != null) + show.setCategorys(infos.get("绫诲瀷锛�")); + if (infos.get("鍦板尯锛�") != null) + show.setArea(infos.get("鍦板尯锛�")); + show.setDesc(desc); + show.setYear(infos.get("骞翠唤锛�")); + if (show.getYear() != null && show.getRelaseDate() == null) { + show.setRelaseDate(show.getYear() + "-01-01"); + } + show.setId(show.getUrl().split("/id/")[1].split("/")[0].split("\\.")[0].trim()); show.setEpisodeList(episodeList); show.setUrl(show.getUrl()); - show.setDesc(desc.trim()); return show; } - public static List<HanmiShow> parseList(String listUrl) throws IOException { + Map<String, String> headers = new HashMap<>(); + headers.put("sec-fetch-dest", "document"); + headers.put("sec-fetch-mode", "navigate"); + headers.put("sec-fetch-site", "same-origin"); + headers.put("sec-fetch-user", "?1"); + headers.put("upgrade-insecure-requests", "1"); + + List<HanmiShow> list = new ArrayList<>(); - Document doc = Jsoup.connect(listUrl).timeout(60000).referrer("https://www.hmtv.me/hanju").userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36").get(); - Element root = doc.getElementsByClass("m-movies").get(0); - Elements items = root.getElementsByClass("u-movie"); + Document doc = getDoc(listUrl, headers); + Elements es = doc.getElementsByClass("stui-pannel_bd"); + Element root = null; + for(int i=0;i<es.size();i++){ + if( es.get(i).select("ul.stui-vodlist").size()>0){ + root = es.get(i).select("ul.stui-vodlist").get(0); + break; + } + } + Elements items = root.getElementsByTag("li"); for (int i = 0; i < items.size(); i++) { Element item = items.get(i); HanmiShow show = new HanmiShow(); - show.setUrl(item.getElementsByTag("a").get(0).attr("href")); - show.setTag(item.getElementsByClass("zhuangtai").get(0).text()); - String score = item.getElementsByClass("pingfen").get(0).text(); - if (score != null) { - score = score.replace("鍒�", ""); - show.setScore(score); + String url = item.getElementsByTag("a").get(0).attr("href"); + if(!url.startsWith("http")){ + URI uri = URI.create(listUrl); + url=String.format("%s://%s%s",uri.getScheme(),uri.getHost(),url); } - show.setTitle(item.getElementsByTag("h2").get(0).getElementsByTag("a").get(0).ownText()); + show.setUrl(url); + show.setTag(item.getElementsByClass("pic-text").get(0).text()); + show.setTitle(item.getElementsByClass("stui-vodlist__detail").get(0).getElementsByTag("a").get(0).ownText()); + show.setId(show.getUrl().split("/")[show.getUrl().split("/").length-1].split("\\.")[0].trim()); list.add(show); } - return list; } + + + /** + * 鏍规嵁閾炬帴鑾峰彇鍓ч泦 + * + * @param playUrl + * @return + */ + public static List<HanmiShowEpisode> getShowEpisodesFromPlayUrl(String playUrl) throws IOException { + URI uri = URI.create(playUrl); + + Document doc = getDoc(playUrl, getHeaders()); + + Element root = doc.getElementsByClass("stui-content__thumb").get(0).parent(); + + String picture = doc.getElementsByClass("stui-content__thumb").get(0).getElementsByTag("img").get(0).attr("data-original"); + + //鍓ч泦鍒楄〃 + Element eposide = doc.getElementsByClass("playlist").get(0).getElementsByClass("stui-content__playlist").get(0); + Elements eposides = eposide.getElementsByTag("a"); + List<HanmiShowEpisode> episodeList = new ArrayList<>(); + for (int i = 0; i < eposides.size(); i++) { + String href = eposides.get(i).attr("href"); + String tag = eposides.get(i).ownText(); + HanmiShowEpisode ep = new HanmiShowEpisode(); + ep.setOrderBy(i + 1); + ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); + ep.setTag(tag); + episodeList.add(ep); + } + return episodeList; + } + public static List<HanmiShow> parseDetailList(List<HanmiShow> showList) throws Exception { List<HanmiShow> list = new ArrayList<>(); @@ -128,8 +238,44 @@ return list; } + + public static void parseDY() throws UnsupportedEncodingException { + for(int p=1;p<20;p++) { + String url = String.format("https://www.wztaichuan.com/vod/show/area/%s/id/1/page/%d.html", URLEncoder.encode("闊╁浗","UTF-8"), p); + try { + List<HanmiShow> showList = parseList(url); + showList = parseDetailList(showList); + for(HanmiShow show:showList){ + show.setTag("璇勫垎锛�"+show.getScore()); + show.setCategorys("鐢靛奖"); + } + System.out.println("鐢靛奖:"+p +"-"+showList.size()); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + + + public static void main(String[] args) throws Exception { - parseDetailList(parseList("https://www.hmtv.me/hanju")); +// List<HanmiShowEpisode> list = getShowEpisodesFromPlayUrl("https://www.hmtv.me/vplay/MTExNS0xLTA=.html"); +// System.out.println(list); +// List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList("https://www.hanjutv.me/hanju/page/" + 50)); +// System.out.printf(showList.toString()); + +// URI uri = URI.create("https://www.hanjutv.me/s/1579"); +// System.out.println(uri.getScheme()); +// parseList("https://www.wztaichuan.com/vod/type/id/5/page/1.html"); + parseDY(); + +// +// System.out.println(uri.getHost()); +// HanmiShow show=new HanmiShow(); +// show.setUrl("https://www.wztaichuan.com/vod/detail/id/10195.html"); +// show = parseShowDetail(show); +// System.out.println(show); } } -- Gitblit v1.8.0