From ab35ac8b769b2d9816dffb33a64f2c6f7bd5dd6e Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 05 九月 2024 17:05:55 +0800 Subject: [PATCH] 风行网页版爬虫 --- src/main/java/com/yeshi/buwan/util/rank/IqiyiRankUtil.java | 68 +++++++++++++++++++++++++--------- 1 files changed, 50 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/yeshi/buwan/util/rank/IqiyiRankUtil.java b/src/main/java/com/yeshi/buwan/util/rank/IqiyiRankUtil.java index 9418375..7fb53d4 100644 --- a/src/main/java/com/yeshi/buwan/util/rank/IqiyiRankUtil.java +++ b/src/main/java/com/yeshi/buwan/util/rank/IqiyiRankUtil.java @@ -1,37 +1,69 @@ package com.yeshi.buwan.util.rank; +import com.yeshi.buwan.util.HttpUtil; +import net.sf.json.JSONArray; +import net.sf.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class IqiyiRankUtil { public static Map<String, List<String>> getRank(int count) throws IOException { - Document doc = Jsoup.connect("https://www.iqiyi.com/ranks/hotsearch").timeout(20000).get(); - Element root = doc.getElementsByClass("qy-reso-card-list").get(0); - Elements items = root.getElementsByClass("qy-reso-card-item"); + Document doc = Jsoup.connect("https://www.iqiyi.com/ranks1PCW/home").timeout(20000).get(); + Element root = doc.getElementsByClass("gc__page").get(0); + Elements items = root.children(); Map<String, List<String>> map = new HashMap<>(); for (int i = 0; i < items.size(); i++) { - Element item = items.get(i); - String title = item.getElementsByClass("head-txt").get(0).text(); - List<String> list = new ArrayList<>(); - Elements names = item.getElementsByClass("sum-li"); - for (int j = 0; j < names.size(); j++) { - Element name = names.get(j); - String itemTitle = name.getElementsByTag("a").attr("title"); - list.add(itemTitle); - if (list.size() >= count) - break; + Element item = root.child(i); + if (item.hasClass("gc__tl1")) { + List<String> totalRanks = new ArrayList<>(); + Elements cols = item.getElementsByClass("gc__grid").get(0).getElementsByClass("gc__col"); + for (Iterator<Element> its = cols.iterator(); its.hasNext(); ) { + Element col = its.next(); + Elements names = col.getElementsByTag("a"); + for (Iterator<Element> its1 = names.iterator(); its1.hasNext(); ) { + Element videoItem = its1.next(); + String name = videoItem.getElementsByClass("rvi__tit1").attr("title"); + totalRanks.add(name); + } + } + if(totalRanks.size()>count){ + totalRanks = totalRanks.subList(0,10); + } + map.put("鎬绘", totalRanks); } - map.put(title, list); + } + String url = "https://mesh.if.iqiyi.com/portal/pcw/rankList/comRankList?v=1&device=00d26e3cdde103b885d820f1545bd66a&auth=&uid=&ip=202.108.14.240&refresh=0&server=false"; + String result = HttpUtil.get(url); + JSONObject resultJSON = JSONObject.fromObject(result); + if (resultJSON.optInt("code") == 0) { + JSONArray typesItem = resultJSON.optJSONObject("data").optJSONArray("items"); + for (int i = 0; i < typesItem.size(); i++) { + String type = typesItem.optJSONObject(i).optString("name"); + if(type.contains("鏇村")){ + continue; + } + List<String> tempList = new ArrayList<>(); + JSONArray cards = typesItem.optJSONObject(i).optJSONArray("cards"); + if (cards != null && cards.size() > 0) { + cards = cards.optJSONObject(0).optJSONArray("contents"); + for (int c = 0; c < cards.size(); c++) { + String title = cards.optJSONObject(c).optString("title"); + tempList.add(title); + } + if(tempList.size()>count){ + tempList = tempList.subList(0,10); + } + map.put(type.replace("姒�",""),tempList); + } + + } } return map; -- Gitblit v1.8.0