From ab35ac8b769b2d9816dffb33a64f2c6f7bd5dd6e Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 05 九月 2024 17:05:55 +0800 Subject: [PATCH] 风行网页版爬虫 --- src/main/java/com/yeshi/buwan/service/imp/juhe/TencentVideoServiceImpl.java | 109 +++++++++++++++++++++++++++++++++++++++++++----------- 1 files changed, 86 insertions(+), 23 deletions(-) diff --git a/src/main/java/com/yeshi/buwan/service/imp/juhe/TencentVideoServiceImpl.java b/src/main/java/com/yeshi/buwan/service/imp/juhe/TencentVideoServiceImpl.java index 57bdb07..cfeabd4 100644 --- a/src/main/java/com/yeshi/buwan/service/imp/juhe/TencentVideoServiceImpl.java +++ b/src/main/java/com/yeshi/buwan/service/imp/juhe/TencentVideoServiceImpl.java @@ -7,20 +7,23 @@ import com.yeshi.buwan.domain.video.InternetSearchVideo; import com.yeshi.buwan.service.inter.juhe.InternetSearchVideoService; import com.yeshi.buwan.service.inter.juhe.TencentVideoService; +import com.yeshi.buwan.util.TimeUtil; +import com.yeshi.buwan.util.video.web.TencentWebUtil; +import com.yeshi.buwan.videos.tencent.TencentVideoApiUtil; import com.yeshi.buwan.videos.tencent.TencentVideoUtil; import com.yeshi.buwan.videos.tencent.entity.TencentCoverInfo; import com.yeshi.buwan.videos.tencent.entity.TencentCoverVideo; import com.yeshi.buwan.videos.tencent.entity.TencentSearchVideoMap; import com.yeshi.buwan.util.factory.InternetSearchVideoFactory; +import com.yeshi.buwan.videos.tencent.vo.TencentCoverInfoVO; import org.springframework.data.domain.Sort; import org.springframework.data.mongodb.core.query.Criteria; import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; import org.springframework.stereotype.Service; import javax.annotation.Resource; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; +import java.util.*; @Service public class TencentVideoServiceImpl implements TencentVideoService { @@ -37,20 +40,6 @@ @Override public void save(TencentCoverInfo detail) throws Exception { - List<TencentCoverVideo> videoList = new ArrayList<>(); - for (TencentCoverInfo.VipIdsBean idsBean : detail.getVip_ids()) { - TencentCoverVideo video = new TencentCoverVideo(); - video.setCoverId(detail.getCover_id()); - video.setPosition(idsBean.getP()); - video.setStage(idsBean.getP() + 1); - video.setVideoId(idsBean.getV()); - video.setLink(String.format("https://v.qq.com/x/cover/%s/%s.html", detail.getCover_id(), idsBean.getV())); - if (idsBean.getF() == 2 || idsBean.getF() == 7) - videoList.add(video); - } - detail.setVideoList(videoList); - - if (detail.getUpdateTime() == null) detail.setUpdateTime(new Date()); tencentCoverInfoDao.save(detail); @@ -59,7 +48,6 @@ if (video.getUpdateTime() == null) { video.setUpdateTime(new Date()); } - tencentCoverVideoDao.save(video); } } @@ -71,7 +59,7 @@ if (detail != null) { query.addCriteria(Criteria.where("coverId").is(coverId)); } - query.with(new Sort(new Sort.Order(Sort.Direction.ASC, "show_videoseq"))); + query.with(new Sort(new Sort.Order(Sort.Direction.ASC, "position"))); List<TencentCoverVideo> videoList = tencentCoverVideoDao.findList(query); detail.setVideoList(videoList); return detail; @@ -79,7 +67,10 @@ @Override public TencentCoverInfo getSimpleCoverDetail(String coverId) { - TencentCoverInfo detail = tencentCoverInfoDao.get(coverId); + Query query=new Query(); + query.addCriteria(Criteria.where("_id").is(coverId)); + + TencentCoverInfo detail = tencentCoverInfoDao.findOne(query); return detail; } @@ -135,17 +126,89 @@ } @Override - public void addToInternetSearch(TencentCoverInfo showDetail) throws Exception { - save(showDetail); + public void addToInternetSearch(TencentCoverInfo showDetail, boolean saveCoverInfo) throws Exception { + if(saveCoverInfo) { + save(showDetail); + } InternetSearchVideo video = InternetSearchVideoFactory.create(showDetail); if (internetSearchVideoService.save(video) == null) { return; } TencentSearchVideoMap map = new TencentSearchVideoMap(); - map.setCoverId(showDetail.getId()); + map.setCoverId(showDetail.getCover_id()); map.setVideoId(video.getId()); map.setCreateTime(new Date()); tencentSearchVideoMapDao.save(map); } + /** + * @author hxh + * @description 浠庡叏缃戞悳绉婚櫎 + * @date 18:27 2024/8/16 + * @param: coverId + * @return void + **/ + private void removeFromInternetSearch(String coverId){ + Query query=new Query(); + query.addCriteria(Criteria.where("coverId").is(coverId)); + List<TencentSearchVideoMap> mapList = tencentSearchVideoMapDao.findList(query); + // 鍒犻櫎map + for(TencentSearchVideoMap map:mapList){ + tencentSearchVideoMapDao.deleteByPrimaryKey(map.getVideoId()); + internetSearchVideoService.removeResourceId( map.getVideoId(), TencentVideoUtil.RESOURCE_ID); + } + } + + @Override + public void clearOfflineCovers() { + // 鎷夊彇闀挎湡娌℃湁鏇存柊鐨勪笓杈� + Query query=new Query(); + query.addCriteria(Criteria.where("updateTime").lte(new Date(System.currentTimeMillis()-1000*60*60*24L*5))); + query.with(new Sort(new Sort.Order(Sort.Direction.ASC, "updateTime"))); + query.skip(0); + query.limit(100); + List<TencentCoverInfo> list = tencentCoverInfoDao.findList(query); + for(TencentCoverInfo coverInfo:list){ + try { + boolean onLine = TencentVideoApiUtil.isOnLine(coverInfo.getVideoList().get(0).getLink()); + if(onLine){ + Query updateQuery=new Query(); + updateQuery.addCriteria(Criteria.where("_id").is(coverInfo.getCover_id())); + Update update=new Update(); + update.set("updateTime",new Date()); + tencentCoverInfoDao.update(updateQuery, update); + }else{ + // 鍒犻櫎鏁版嵁 + Query updateQuery=new Query(); + updateQuery.addCriteria(Criteria.where("_id").is(coverInfo.getCover_id())); + tencentCoverInfoDao.delete(updateQuery); + } + Thread.sleep(1000); + } catch (Exception e) { + e.printStackTrace(); + } + } + // 鍒犻櫎鍏ㄧ綉鎼滈噷闈㈡鏉ユ簮鐨勮棰戝凡缁忚鍒犻櫎 + query = new Query(); + long count = tencentSearchVideoMapDao.count(query); + int pageSize = 100; + int page =(int) (count%pageSize==0?count/pageSize:count/pageSize+1); + Set<String> deletedCoverIds = new HashSet<>(); + for(int i=0;i<page;i++){ + query.skip(i*pageSize); + query.limit(pageSize); + List<TencentSearchVideoMap> mapList = tencentSearchVideoMapDao.findList(query); + for(TencentSearchVideoMap map:mapList){ + Query cquery=new Query(); + cquery.addCriteria(Criteria.where("_id").is(map.getCoverId())); + if(tencentCoverInfoDao.findOne(cquery)==null){ + deletedCoverIds.add(map.getCoverId()); + removeFromInternetSearch(map.getCoverId()); + tencentSearchVideoMapDao.deleteByPrimaryKey(map.getVideoId()); + } + } + } + System.out.println(deletedCoverIds); + } + } -- Gitblit v1.8.0