From ab35ac8b769b2d9816dffb33a64f2c6f7bd5dd6e Mon Sep 17 00:00:00 2001
From: admin <weikou2014>
Date: 星期四, 05 九月 2024 17:05:55 +0800
Subject: [PATCH] 风行网页版爬虫

---
 src/test/java/com/hxh/spring/test/video/HanmiTest.java |  148 +++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 125 insertions(+), 23 deletions(-)

diff --git a/src/test/java/com/hxh/spring/test/video/HanmiTest.java b/src/test/java/com/hxh/spring/test/video/HanmiTest.java
index 4bd841d..37d4d4b 100644
--- a/src/test/java/com/hxh/spring/test/video/HanmiTest.java
+++ b/src/test/java/com/hxh/spring/test/video/HanmiTest.java
@@ -1,14 +1,27 @@
 package com.hxh.spring.test.video;
 
 
+import com.yeshi.buwan.dao.VideoInfoDao;
 import com.yeshi.buwan.dao.juhe.hanmi.HanmiShowDao;
 import com.yeshi.buwan.dao.juhe.hanmi.HanmiShowEpisodeDao;
+import com.yeshi.buwan.dao.juhe.hanmi.VideoHanmiMapDao;
+import com.yeshi.buwan.dao.juhe.youku.InternetSearchVideoDao;
+import com.yeshi.buwan.domain.ResourceVideo;
+import com.yeshi.buwan.domain.video.InternetSearchVideo;
+import com.yeshi.buwan.service.imp.ResourceVideoService;
+import com.yeshi.buwan.service.imp.VideoInfoService;
+import com.yeshi.buwan.service.imp.VideoService;
 import com.yeshi.buwan.service.inter.juhe.HanmiService;
+import com.yeshi.buwan.service.inter.juhe.InternetSearchVideoService;
 import com.yeshi.buwan.videos.hanmi.HanmiApiUtil;
 import com.yeshi.buwan.videos.hanmi.entity.HanmiShow;
 import com.yeshi.buwan.videos.hanmi.entity.HanmiShowEpisode;
+import com.yeshi.buwan.videos.hanmi.entity.VideoHanmiMap;
 import org.junit.Test;
 import org.junit.runner.RunWith;
+import org.springframework.data.mongodb.core.query.Criteria;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.data.mongodb.core.query.Update;
 import org.springframework.test.context.ContextConfiguration;
 import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 import org.springframework.test.context.web.WebAppConfiguration;
@@ -33,28 +46,69 @@
     @Resource
     private HanmiShowDao hanmiShowDao;
 
+    final String MOVIE_URL = "https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/1/page/{椤电爜}.html";
+    final String SHOW_URL = "https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/3/page/{椤电爜}.html";
+    final String TV_URL = "https://www.wztaichuan.com/vod/type/id/5/page/{椤电爜}.html";
+
+    private  List<HanmiShow> updateShow(String listUrl,int page) throws Exception {
+        List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList(listUrl.replace("{椤电爜}",page+"")));
+        for (HanmiShow show : showList) {
+            if(listUrl.contains("/id/1/")){
+                show.setType("鐢靛奖");
+            }else if(listUrl.contains("/id/3/")){
+                show.setType("缁艰壓");
+            }else if(listUrl.contains("/id/5/")){
+                show.setType("鐢佃鍓�");
+            }
+            hanmiService.saveShow(show);
+        }
+        return showList;
+    }
+
+
+
+    @Test
+    public void updateLatest() throws Exception {
+        String[] urls =new String[]{MOVIE_URL,SHOW_URL,TV_URL};
+        for(String url: urls) {
+            for (int i = 1; i < 20; i++) {
+                System.out.printf(i + "==========================");
+                // 鐢靛奖 https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/1/page/2.html
+                // 缁艰壓 https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/3/page/2.html
+                // 闊╁墽 https://www.wztaichuan.com/vod/type/id/5/page/2.html
+                List<HanmiShow> showList =   updateShow(url,i);
+                if (showList.size() < 10) {
+                    break;
+                }
+            }
+        }
+    }
+
+
     @Test
     public void addHanJu() throws Exception {
-        for (int i = 1; i < 3; i++) {
-            List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList("https://www.hmtv.me/hanju/page/" + i));
-            for (HanmiShow show : showList) {
-                hanmiService.saveShow(show);
-//                hanmiService.deleteByShowId(show.getId());
+        for (int i = 1; i < 30; i++) {
+            List<HanmiShow> showList =   updateShow(SHOW_URL,i);
+            if (showList.size() < 10) {
+                break;
             }
         }
     }
 
     @Test
     public void addToVideo() throws IOException {
-
-        for (int i = 1; i < 3; i++) {
-            List<HanmiShow> showList = HanmiApiUtil.parseList("https://www.hmtv.me/hanju/page/" + i);
-            Collections.reverse(showList);
-            for (HanmiShow show : showList) {
-                show.setId(show.getUrl().replace("/show/", "").trim());
-                show = hanmiService.getShowDetail(show.getId());
-                if (show != null)
-                    hanmiService.addToVideoInfo(show);
+        long count = hanmiService.countAll();
+        int pageSize = 100;
+        int totalPage=(int) (count%pageSize ==0?count/pageSize:count/pageSize+1);
+        for(int i=0;i<totalPage;i++) {
+            List<HanmiShow> list = hanmiService.listAll(i + 1, pageSize);
+            for(HanmiShow show:list){
+                try {
+                    show = hanmiService.getShowDetail(show.getId());
+                    hanmiService.addToInternetSearch(show);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
             }
         }
     }
@@ -74,29 +128,77 @@
             show = hanmiService.getShowDetail(show.getId());
             show.setType("闊╁墽");
             if (show != null)
-                hanmiService.addToVideoInfo(show);
+                hanmiService.addToInternetSearch(show);
         }
 
     }
 
+    @Resource
+    private VideoHanmiMapDao videoHanmiMapDao;
+
+    @Resource
+    private InternetSearchVideoDao internetSearchVideoDao;
+
+    @Resource
+    private ResourceVideoService resourceVideoService;
+
+    @Resource
+    private VideoInfoDao videoInfoDao;
+
     @Test
     public void update() {
-        List<HanmiShow> list = hanmiShowDao.listAll(0, 100);
-        for (HanmiShow show : list) {
-            List<HanmiShowEpisode> epList = hanmiShowEpisodeDao.listByShowId(show.getId(), 0, 1);
-            if (epList != null && epList.size() > 0) {
+        for (int i = 0; i < 100; i++) {
+            List<HanmiShow> list = hanmiShowDao.listAll(i*100, 100);
+            for (HanmiShow show : list) {
                 try {
-                    List<HanmiShowEpisode> episodeList = HanmiApiUtil.getShowEpisodesFromPlayUrl(epList.get(0).getPlayUrl());
-                    if (episodeList != null)
-                        for (HanmiShowEpisode episode : episodeList) {
+                    show = HanmiApiUtil.parseShowDetail(show);
+                    if (show != null && show.getEpisodeList() != null) {
+                        // 鏇存柊灏侀潰鍥剧墖
+                        VideoHanmiMap map =  videoHanmiMapDao.selectByShowId(show.getId());
+                        if(map!=null){
+                            InternetSearchVideo video =     internetSearchVideoDao.get(map.getVideoId());
+                            if(video!=null){
+                                InternetSearchVideo update = new InternetSearchVideo();
+                                update.setId(video.getId());
+                                update.setVpicture(show.getPicture());
+                                internetSearchVideoDao.updateSelective(update);
+                            }
+                        }
+                        Query query = new Query();
+                        query.addCriteria(new Criteria().where("_id").is(show.getId()));
+                        Update update = new Update();
+                        update.set("tag", show.getTag());
+                        hanmiShowDao.update(query, update);
+                        query = new Query();
+                        query.addCriteria(new Criteria().where("showId").is(show.getId()));
+                        hanmiShowEpisodeDao.delete(query);
+                        // 鍒犻櫎鍘熸潵鐨勬暟鎹�
+                        for (HanmiShowEpisode episode : show.getEpisodeList()) {
                             episode.setShowId(show.getId());
                             episode.setId(HanmiShowEpisode.createId(episode.getShowId(), episode.getTag()));
                             episode.setCreateTime(new Date());
                             hanmiShowEpisodeDao.save(episode);
                         }
-                } catch (IOException e) {
+                    }
+                } catch (Exception e) {
                     e.printStackTrace();
                 }
+
+            }
+        }
+    }
+
+
+    @Test
+    public void deleteFromVideo(){
+        // 娓呴櫎闊╄糠鏁版嵁
+        List<ResourceVideo>  resourceVideos =   resourceVideoService.listResourceVideo(28L,1,100 );
+        for(ResourceVideo rv:resourceVideos){
+            List<ResourceVideo> resourceList =   resourceVideoService.getResourceList(rv.getVideo().getId());
+            resourceVideoService.delete(rv.getVideo().getId(), rv.getResource().getId());
+            if(resourceList.size()<2){
+                // 鍒犻櫎瑙嗛
+                videoInfoDao.delete(rv.getVideo());
             }
         }
     }

--
Gitblit v1.8.0