From ab35ac8b769b2d9816dffb33a64f2c6f7bd5dd6e Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 05 九月 2024 17:05:55 +0800 Subject: [PATCH] 风行网页版爬虫 --- src/test/java/com/hxh/spring/test/video/HanmiTest.java | 148 +++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 125 insertions(+), 23 deletions(-) diff --git a/src/test/java/com/hxh/spring/test/video/HanmiTest.java b/src/test/java/com/hxh/spring/test/video/HanmiTest.java index 4bd841d..37d4d4b 100644 --- a/src/test/java/com/hxh/spring/test/video/HanmiTest.java +++ b/src/test/java/com/hxh/spring/test/video/HanmiTest.java @@ -1,14 +1,27 @@ package com.hxh.spring.test.video; +import com.yeshi.buwan.dao.VideoInfoDao; import com.yeshi.buwan.dao.juhe.hanmi.HanmiShowDao; import com.yeshi.buwan.dao.juhe.hanmi.HanmiShowEpisodeDao; +import com.yeshi.buwan.dao.juhe.hanmi.VideoHanmiMapDao; +import com.yeshi.buwan.dao.juhe.youku.InternetSearchVideoDao; +import com.yeshi.buwan.domain.ResourceVideo; +import com.yeshi.buwan.domain.video.InternetSearchVideo; +import com.yeshi.buwan.service.imp.ResourceVideoService; +import com.yeshi.buwan.service.imp.VideoInfoService; +import com.yeshi.buwan.service.imp.VideoService; import com.yeshi.buwan.service.inter.juhe.HanmiService; +import com.yeshi.buwan.service.inter.juhe.InternetSearchVideoService; import com.yeshi.buwan.videos.hanmi.HanmiApiUtil; import com.yeshi.buwan.videos.hanmi.entity.HanmiShow; import com.yeshi.buwan.videos.hanmi.entity.HanmiShowEpisode; +import com.yeshi.buwan.videos.hanmi.entity.VideoHanmiMap; import org.junit.Test; import org.junit.runner.RunWith; +import org.springframework.data.mongodb.core.query.Criteria; +import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.springframework.test.context.web.WebAppConfiguration; @@ -33,28 +46,69 @@ @Resource private HanmiShowDao hanmiShowDao; + final String MOVIE_URL = "https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/1/page/{椤电爜}.html"; + final String SHOW_URL = "https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/3/page/{椤电爜}.html"; + final String TV_URL = "https://www.wztaichuan.com/vod/type/id/5/page/{椤电爜}.html"; + + private List<HanmiShow> updateShow(String listUrl,int page) throws Exception { + List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList(listUrl.replace("{椤电爜}",page+""))); + for (HanmiShow show : showList) { + if(listUrl.contains("/id/1/")){ + show.setType("鐢靛奖"); + }else if(listUrl.contains("/id/3/")){ + show.setType("缁艰壓"); + }else if(listUrl.contains("/id/5/")){ + show.setType("鐢佃鍓�"); + } + hanmiService.saveShow(show); + } + return showList; + } + + + + @Test + public void updateLatest() throws Exception { + String[] urls =new String[]{MOVIE_URL,SHOW_URL,TV_URL}; + for(String url: urls) { + for (int i = 1; i < 20; i++) { + System.out.printf(i + "=========================="); + // 鐢靛奖 https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/1/page/2.html + // 缁艰壓 https://www.wztaichuan.com/vod/show/area/%E9%9F%A9%E5%9B%BD/id/3/page/2.html + // 闊╁墽 https://www.wztaichuan.com/vod/type/id/5/page/2.html + List<HanmiShow> showList = updateShow(url,i); + if (showList.size() < 10) { + break; + } + } + } + } + + @Test public void addHanJu() throws Exception { - for (int i = 1; i < 3; i++) { - List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList("https://www.hmtv.me/hanju/page/" + i)); - for (HanmiShow show : showList) { - hanmiService.saveShow(show); -// hanmiService.deleteByShowId(show.getId()); + for (int i = 1; i < 30; i++) { + List<HanmiShow> showList = updateShow(SHOW_URL,i); + if (showList.size() < 10) { + break; } } } @Test public void addToVideo() throws IOException { - - for (int i = 1; i < 3; i++) { - List<HanmiShow> showList = HanmiApiUtil.parseList("https://www.hmtv.me/hanju/page/" + i); - Collections.reverse(showList); - for (HanmiShow show : showList) { - show.setId(show.getUrl().replace("/show/", "").trim()); - show = hanmiService.getShowDetail(show.getId()); - if (show != null) - hanmiService.addToVideoInfo(show); + long count = hanmiService.countAll(); + int pageSize = 100; + int totalPage=(int) (count%pageSize ==0?count/pageSize:count/pageSize+1); + for(int i=0;i<totalPage;i++) { + List<HanmiShow> list = hanmiService.listAll(i + 1, pageSize); + for(HanmiShow show:list){ + try { + show = hanmiService.getShowDetail(show.getId()); + hanmiService.addToInternetSearch(show); + } catch (Exception e) { + e.printStackTrace(); + } } } } @@ -74,29 +128,77 @@ show = hanmiService.getShowDetail(show.getId()); show.setType("闊╁墽"); if (show != null) - hanmiService.addToVideoInfo(show); + hanmiService.addToInternetSearch(show); } } + @Resource + private VideoHanmiMapDao videoHanmiMapDao; + + @Resource + private InternetSearchVideoDao internetSearchVideoDao; + + @Resource + private ResourceVideoService resourceVideoService; + + @Resource + private VideoInfoDao videoInfoDao; + @Test public void update() { - List<HanmiShow> list = hanmiShowDao.listAll(0, 100); - for (HanmiShow show : list) { - List<HanmiShowEpisode> epList = hanmiShowEpisodeDao.listByShowId(show.getId(), 0, 1); - if (epList != null && epList.size() > 0) { + for (int i = 0; i < 100; i++) { + List<HanmiShow> list = hanmiShowDao.listAll(i*100, 100); + for (HanmiShow show : list) { try { - List<HanmiShowEpisode> episodeList = HanmiApiUtil.getShowEpisodesFromPlayUrl(epList.get(0).getPlayUrl()); - if (episodeList != null) - for (HanmiShowEpisode episode : episodeList) { + show = HanmiApiUtil.parseShowDetail(show); + if (show != null && show.getEpisodeList() != null) { + // 鏇存柊灏侀潰鍥剧墖 + VideoHanmiMap map = videoHanmiMapDao.selectByShowId(show.getId()); + if(map!=null){ + InternetSearchVideo video = internetSearchVideoDao.get(map.getVideoId()); + if(video!=null){ + InternetSearchVideo update = new InternetSearchVideo(); + update.setId(video.getId()); + update.setVpicture(show.getPicture()); + internetSearchVideoDao.updateSelective(update); + } + } + Query query = new Query(); + query.addCriteria(new Criteria().where("_id").is(show.getId())); + Update update = new Update(); + update.set("tag", show.getTag()); + hanmiShowDao.update(query, update); + query = new Query(); + query.addCriteria(new Criteria().where("showId").is(show.getId())); + hanmiShowEpisodeDao.delete(query); + // 鍒犻櫎鍘熸潵鐨勬暟鎹� + for (HanmiShowEpisode episode : show.getEpisodeList()) { episode.setShowId(show.getId()); episode.setId(HanmiShowEpisode.createId(episode.getShowId(), episode.getTag())); episode.setCreateTime(new Date()); hanmiShowEpisodeDao.save(episode); } - } catch (IOException e) { + } + } catch (Exception e) { e.printStackTrace(); } + + } + } + } + + + @Test + public void deleteFromVideo(){ + // 娓呴櫎闊╄糠鏁版嵁 + List<ResourceVideo> resourceVideos = resourceVideoService.listResourceVideo(28L,1,100 ); + for(ResourceVideo rv:resourceVideos){ + List<ResourceVideo> resourceList = resourceVideoService.getResourceList(rv.getVideo().getId()); + resourceVideoService.delete(rv.getVideo().getId(), rv.getResource().getId()); + if(resourceList.size()<2){ + // 鍒犻櫎瑙嗛 + videoInfoDao.delete(rv.getVideo()); } } } -- Gitblit v1.8.0