From ab35ac8b769b2d9816dffb33a64f2c6f7bd5dd6e Mon Sep 17 00:00:00 2001
From: admin <weikou2014>
Date: 星期四, 05 九月 2024 17:05:55 +0800
Subject: [PATCH] 风行网页版爬虫

---
 src/main/java/com/yeshi/buwan/util/video/web/TencentWebUtil.java |  170 +++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 100 insertions(+), 70 deletions(-)

diff --git a/src/main/java/com/yeshi/buwan/util/video/web/TencentWebUtil.java b/src/main/java/com/yeshi/buwan/util/video/web/TencentWebUtil.java
index 1f7f87f..865ae67 100644
--- a/src/main/java/com/yeshi/buwan/util/video/web/TencentWebUtil.java
+++ b/src/main/java/com/yeshi/buwan/util/video/web/TencentWebUtil.java
@@ -1,10 +1,6 @@
 package com.yeshi.buwan.util.video.web;
 
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-import org.yeshi.utils.HttpUtil;
+import net.sf.json.JSONObject;
 import org.yeshi.utils.StringUtil;
 
 import java.io.UnsupportedEncodingException;
@@ -13,12 +9,29 @@
 
 public class TencentWebUtil {
 
+    final public static Map<String,Integer> CHANNEL_ID_MAP=new HashMap<>();
+    static{
+        CHANNEL_ID_MAP.put("鐢靛奖",100173);
+        CHANNEL_ID_MAP.put("鐢佃鍓�",100113);
+        CHANNEL_ID_MAP.put("鍔ㄦ极",100119);
+    }
+
     public static class TencentWebVideoInfo {
         private String playUrl;
         private String id;
         private String title;
         private String picture;
         private String duration;
+        private String tag;
+        private String epsodePubtime;
+
+        public String getEpsodePubtime() {
+            return epsodePubtime;
+        }
+
+        public void setEpsodePubtime(String epsodePubtime) {
+            this.epsodePubtime = epsodePubtime;
+        }
 
         public String getPlayUrl() {
             return playUrl;
@@ -59,6 +72,14 @@
         public void setDuration(String duration) {
             this.duration = duration;
         }
+
+        public String getTag() {
+            return tag;
+        }
+
+        public void setTag(String tag) {
+            this.tag = tag;
+        }
     }
 
 
@@ -83,75 +104,57 @@
     /**
      * 鑾峰彇鐭棰戝垪琛�
      *
-     * @param params
-     * @param page
+     * @param channelId 100173:鐢靛奖 100113:鐢佃鍓�   100119:鍔ㄦ极
+     * @param pageIndex
      * @return
      * @throws Exception
      */
-    public static List<TencentWebVideoInfo> getVideoList(Map<String, String> params, int page) throws Exception {
-        if (params == null)
-            throw new Exception("鍙傛暟涓虹┖");
-        int pageSize = 30;
-        params.put("append", "1");
-        params.put("listpage", page + "");
-        params.put("offset", (page - 1) * pageSize + "");
-        params.put("pagesize", pageSize + "");
-
-        String url = "https://v.qq.com/x/bu/pagesheet/list";
+    public static List<TencentWebVideoInfo> getVideoList(int channelId, int pageIndex, Integer areaId) throws Exception {
+        List<TencentWebVideoInfo> videoList=new ArrayList<>();
         Map<String, String> headers = new HashMap<>();
-        headers.put("referer", "https://v.qq.com/channel/ent");
-        headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36");
-        String result = HttpUtil.get(url, params, headers);
-        Document document = Jsoup.parse(result);
-        Elements els = document.getElementsByClass("list_item");
-        return parseVideoList(els);
-    }
-
-
-    public static List<TencentWebVideoInfo> getVideoList(String url) throws Exception {
-        Map<String, String> headers = new HashMap<>();
-        headers.put("referer", "https://v.qq.com/channel/ent");
-        headers.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.164 Safari/537.36");
-        String result = HttpUtil.get(url, new HashMap<>(), headers);
-        Document document = Jsoup.parse(result);
-        Elements els = document.getElementsByClass("list_item");
-        return parseVideoList(els);
-    }
-
-    private static List<TencentWebVideoInfo> parseVideoList(Elements els) throws UnsupportedEncodingException {
-        List<TencentWebVideoInfo> list = new ArrayList<>();
-        for (int i = 0; i < els.size(); i++) {
-            Element ele = els.get(i);
-            String href = ele.getElementsByTag("a").get(0).attr("href");
-            String id = ele.getElementsByTag("a").get(0).attr("data-float");
-            String title = ele.getElementsByTag("a").get(0).attr("title");
-            title = new String(title.getBytes("ISO-8859-1"), "UTF-8");
-            String picture = ele.getElementsByTag("img").get(0).attr("src");
-            picture = picture.startsWith("http") ? picture : "https:" + picture;
-            String duration = null;
-            try {
-                duration = ele.getElementsByClass("figure_caption").get(0).ownText();
-                duration = duration.trim();
-                for (int j = 0; j < duration.length(); j++) {
-                    char ca = duration.charAt(j);
-                    if (!(ca >= 48 && ca < 59)) {
-                        duration = null;
-                        break;
-                    }
-                }
-                System.out.println(duration);
-            } catch (Exception e) {
-            }
-            TencentWebVideoInfo videoInfo = new TencentWebVideoInfo();
-            videoInfo.setDuration(duration);
-            videoInfo.setId(id);
-            videoInfo.setPicture(picture);
-            videoInfo.setPlayUrl(href);
-            videoInfo.setTitle(title);
-            list.add(videoInfo);
+        headers.put("Content-Type", "application/json; charset=utf-8");
+        headers.put("Referer", "https://v.qq.com/");
+        String text =
+                "{\"page_context\":{\"page_index\":\"1\"},\"page_params\":{\"page_id\":\"channel_list_second_page\",\"page_type\":\"operation\",\"channel_id\":\"100173\",\"filter_params\":\"sort=75\",\"page\":\"1\",\"new_mark_label_enabled\":\"1\"},\"page_bypass_params\":{\"params\":{\"page_id\":\"channel_list_second_page\",\"page_type\":\"operation\",\"channel_id\":\"100173\",\"filter_params\":\"sort=75\",\"page\":\"1\",\"caller_id\":\"3000010\",\"platform_id\":\"2\",\"data_mode\":\"default\",\"user_mode\":\"default\"},\"scene\":\"operation\",\"abtest_bypass_id\":\"77fef11ab0ccd4ee\"}}";
+        JSONObject params=JSONObject.fromObject(text);
+        params.optJSONObject("page_context").put("page_index",pageIndex+"");
+        params.optJSONObject("page_params").put("channel_id",channelId+"");
+        params.optJSONObject("page_params").put("page",pageIndex+"");
+        if(areaId!=null) {
+            params.optJSONObject("page_params").put("filter_params", params.optJSONObject("page_params").get("filter_params") + "&iarea=" + areaId);
         }
-        return list;
+        params.optJSONObject("page_bypass_params").optJSONObject("params").put("page",pageIndex+"");
+        params.optJSONObject("page_bypass_params").optJSONObject("params").put("channel_id",channelId+"");
+
+        String result = com.yeshi.buwan.util.HttpUtil.post("https://pbaccess.video.qq.com/trpc.vector_layout.page_view.PageService/getPage?video_appid=3000010", params.toString(), headers);
+
+
+        com.alibaba.fastjson.JSONObject resultJson =  com.alibaba.fastjson.JSONObject.parseObject(result);
+        if (resultJson.getInteger("ret") == 0) {
+
+            com.alibaba.fastjson.JSONObject data = resultJson.getJSONObject("data");
+            com.alibaba.fastjson.JSONArray array = data.getJSONArray("CardList");
+            for (int i = 0; i < array.size(); i++) {
+                if (array.getJSONObject(i).getString("type").equalsIgnoreCase("channel_list_poster")) {
+                    array = array.getJSONObject(i).getJSONObject("children_list").getJSONObject("list").getJSONArray("cards");
+                    for (int j = 0; j < array.size(); j++) {
+                        if (array.getJSONObject(j).getString("type").equalsIgnoreCase("channel_list_poster")) {
+                            com.alibaba.fastjson.JSONObject item = array.getJSONObject(j).getJSONObject("params");
+                            TencentWebVideoInfo video = parseListItem(item);
+                            videoList.add(video);
+                        }
+                    }
+                    break;
+                }
+
+            }
+
+        }
+        return videoList;
     }
+
+
+
 
     public static String getApiUrl(String webUrl, int page) {
         Map<String, String> params = parseParams(webUrl);
@@ -174,9 +177,36 @@
         return url + "?" + StringUtil.concat(paramsList, "&");
     }
 
+    private static TencentWebVideoInfo parseListItem(com.alibaba.fastjson.JSONObject item) {
+        TencentWebVideoInfo videoInfo = new TencentWebVideoInfo();
+        videoInfo.setId(item.getString("cid"));
+        videoInfo.setPicture(item.getString("new_pic_vt"));
+        videoInfo.setPlayUrl(String.format("https://v.qq.com/x/cover/%s.html", videoInfo.getId()));
+        videoInfo.setTitle(item.getString("title"));
+        videoInfo.setDuration("");
+        if (item.getInteger("type") != 1) {
+            videoInfo.setTag(item.getString("timelong"));
+        } else {
+            JSONObject imgTag = JSONObject.fromObject(item.getString("uni_imgtag"));
+            for (Object key : imgTag.keySet()) {
+                JSONObject imgTagItem = imgTag.optJSONObject(key.toString());
+                if (imgTagItem.optInt("id") == 28) {
+                    videoInfo.setTag(imgTagItem.optString("text"));
+                }
+            }
+            if(videoInfo.getTag()==null){
+                videoInfo.setTag("9.0鍒�");
+            }
+        }
+        videoInfo.setEpsodePubtime(item.getString("epsode_pubtime"));
+        return videoInfo;
+    }
+
 
     public static void main(String[] args) throws Exception {
-        List<TencentWebVideoInfo> videoInfos = getVideoList(parseParams("https://v.qq.com/channel/ent?_all=1&channel=ent&iarea=2&itype=-1&listpage=1&sort=40"), 1);
-        System.out.println(videoInfos);
+
+        List<TencentWebVideoInfo> list =  getVideoList(100173,0, 100028);
+        System.out.println(list.size());
+
     }
 }

--
Gitblit v1.8.0