From 4603536999de113969dd7164f3a04dcde9083480 Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期一, 31 三月 2025 13:19:42 +0800 Subject: [PATCH] 初始化导入 --- comment.py | 135 ++++++++++++++++++++++++++++++++------------- 1 files changed, 96 insertions(+), 39 deletions(-) diff --git a/comment.py b/comment.py index c0e93a2..cdbd18c 100644 --- a/comment.py +++ b/comment.py @@ -1,4 +1,3 @@ -import re import threading import time @@ -9,7 +8,9 @@ from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC +import comment_manager import setting +from video_manager import VideoManger class CommentManager: @@ -50,20 +51,19 @@ # 绛夊埌宸︿晶鑿滃崟鍑虹幇鍚庢墠鑳芥墽琛屽悗缁搷浣� wait = WebDriverWait(self.driver, 100) # 鏈�澶氱瓑寰�10绉� element = wait.until(EC.visibility_of_element_located((By.ID, "side-bar"))) - threading.Thread(target=lambda: self.click_like(self.driver), daemon=True).start() + threading.Thread(target=lambda: self.process_videos(self.driver), daemon=True).start() - def __process_like(self, comment_content, comment_element): + def __process_like(self, video_name, video_date, comment_element): """ 澶勭悊鍗曟潯璇勮鐨勮禐 :param comment_content: :param comment_element: :return: """ - comment_pattern = re.compile(r'[\u4e00-\u9fa5]+') - comment_content = re.sub("<img.*?>", "", comment_content) - if not comment_pattern.search(comment_content): - # 绗﹀悎鏍囧噯鐨勮瘎璁� + user_name, comment_content, comment_time = self.__parse_comment_info(comment_element) + if not comment_manager.is_need_click_like(video_name, video_date, user_name, comment_content): return + comment_actions = comment_element.find_element(By.CLASS_NAME, "action-list").find_elements(By.CLASS_NAME, "action-item") classes: str = comment_actions[0].find_element(By.TAG_NAME, "svg").get_attribute("class") @@ -72,50 +72,73 @@ return like = comment_element.find_element(By.CLASS_NAME, "like-action") like.click() - self.driver.implicitly_wait(2) + # self.driver.implicitly_wait(2) + time.sleep(2) - def __process_reply(self, comment_content, comment_element): + def __parse_comment_info(self, comment_element): """ - 澶勭悊鍗曟潯璇勮鐨勫洖澶� - :param comment_content: + 瑙f瀽璇勮淇℃伅 :param comment_element: - :return: + :return:(璇勮浜�,璇勮鍐呭,璇勮鏃堕棿) + """ + user_name = comment_element.find_element(By.CLASS_NAME, "comment-user-name").get_attribute("innerHTML") + comment_time = comment_element.find_element(By.CLASS_NAME, "comment-time").get_attribute("innerHTML") + comment_content = comment_element.find_element(By.CLASS_NAME, "comment-content").get_attribute("innerHTML") + return user_name, comment_content, comment_time + + def __parse_video_info(self, video_element): + """ + 瑙f瀽瑙嗛淇℃伅 + :param comment_element: + :return:(瑙嗛鍚嶇О,瑙嗛鏃堕棿,璇勮鏁伴噺) + """ + video_time = video_element.find_element(By.CLASS_NAME, "feed-time").get_attribute("innerHTML") + comment_count = video_element.find_element(By.CLASS_NAME, "feed-comment-total").find_element(By.XPATH, + "./span[2]").get_attribute( + "innerHTML") + video_name = video_element.find_element(By.CLASS_NAME, "comment-content").get_attribute("innerHTML") + return video_name, video_time, comment_count + + def __process_reply(self, video_name, video_date, comment_element): + """ + 澶勭悊鍗曟潯璇勮鍥炲 + :param video_name: + :param video_date: + :param comment_element: + :return: 鏄惁闇�瑕佺偣璧� """ def get_reply_comment(nick_name, content): if not setting.is_reply_comment(): return None # 鍐呭鏄惁绗﹀悎鏍囧噯 - for t in self.comment_templates: - if re.match(t[0], content): - # 婊¤冻鍐呭 - return t[1].replace("[鏄电О]", nick_name) + return comment_manager.get_replay_content(video_name, video_date, nick_name, content) - comment_pattern = re.compile(r'[\u4e00-\u9fa5]+') - comment_content = re.sub("<img.*?>", "", comment_content) - if not comment_pattern.search(comment_content): - # 绗﹀悎鏍囧噯鐨勮瘎璁� - return + comment_user_name, comment_content, comment_time = self.__parse_comment_info(comment_element) + try: comment_reply_list = comment_element.find_element(By.CLASS_NAME, "comment-reply-list") if comment_reply_list.find_element(By.XPATH, ".//div[normalize-space()='浣滆��']"): print("宸茬粡鍥炲銆傘�傘�傘��") - return + return False except: print("杩樻病鍥炲銆傘�傘�傘��") + + replay_content = get_reply_comment(comment_user_name, comment_element) + if not replay_content: + # 涓嶉渶瑕佽瘎璁� + print("涓嶉渶瑕佸洖澶嶃�傘�傘�傘��") + return True + + # 闇�瑕佸洖澶� comment_actions = comment_element.find_element(By.CLASS_NAME, "action-list").find_elements(By.CLASS_NAME, "action-item") # 鐐瑰嚮璇勮 comment_actions[1].click() wait = WebDriverWait(self.driver, 5) # 鏈�澶氱瓑寰�10绉� - element = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "comment-create-content"))) - # TODO 鏄电О - replay_content = get_reply_comment("鏄电О", comment_element) - if not replay_content: - # 涓嶉渶瑕佽瘎璁� - return - + wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "comment-create-content"))) + time.sleep(1) self.driver.find_element(By.CLASS_NAME, "comment-create-content").find_element(By.TAG_NAME, "textarea").send_keys(replay_content) @@ -123,8 +146,16 @@ self.driver.find_element(By.CLASS_NAME, "comment-create-content").find_element(By.XPATH, "div[3]/div[2]").click() time.sleep(2) + return False - def __click_like_all(self, driver, start_index=0): + def __process_comments(self, driver, start_index=0, video_info=None): + """ + + :param driver: + :param start_index: + :param video_info:(瑙嗛鍐呭, 瑙嗛鏃ユ湡, 璇勮娆℃暟) + :return: + """ scroll_list = driver.find_element(By.CLASS_NAME, "feed-comment__wrp") loadmore = scroll_list.find_element(By.CLASS_NAME, "loadmore__dot") @@ -137,34 +168,60 @@ for index in range(start_index, len(comments)): comment = comments[index] # 璇勮鍐呭 - comment_content = comment.find_element(By.CLASS_NAME, "comment-content").get_attribute("innerHTML") + user_name, comment_content, comment_time = self.__parse_comment_info(comment) if not comment_content: continue print("=======璇勮鍐呭锛�", comment_content) - self.__process_reply(comment_content, comment) - self.__process_like(comment_content, comment) + need_like = self.__process_reply(video_info[0], video_info[1], comment) + if need_like: + print("闇�瑕佺偣璧�") + self.__process_like(video_info[0], video_info[1], comment) + else: + print("涓嶉渶瑕佺偣璧�") # 寰�涓嬫粦鍔� if has_more: driver.execute_script( "var __comment_scroll = document.getElementsByClassName('feed-comment__wrp')[0]; __comment_scroll.scrollTop = __comment_scroll.scrollHeight;") print("寰�涓嬫粴鍔�") time.sleep(3) - self.__click_like_all(driver, start_index=len(comments)) + self.__process_comments(driver, start_index=len(comments)) else: - print("娌℃湁鏇村浜�") + print("娌℃湁鏇村璇勮浜�") - def click_like(self, driver: webdriver.Chrome): + def process_videos(self, driver: webdriver.Chrome, start_index=0): wait = WebDriverWait(driver, 100) # 鏈�澶氱瓑寰�10绉� - element = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "feeds-container"))) + scroll_list = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "feeds-container"))) time.sleep(1) # 鏌ユ壘瑙嗛鍒楄〃 videos_root = driver.find_element(By.CLASS_NAME, "feeds-container") videos = videos_root.find_elements(By.CLASS_NAME, "comment-feed-wrap") - for video in videos: + for i in range(start_index, len(videos)): # 閫夋嫨瑙嗛 + video = videos[i] + # 瑙f瀽瑙嗛鍐呭 + video_name, video_time, comment_count = self.__parse_video_info(video) + # 鍒ゆ柇鏄惁瑕佺偣鍑昏繘鍘� + if not VideoManger().is_need_click(video_name, video_time, comment_count): + continue video.click() driver.implicitly_wait(2) - self.__click_like_all(driver) + self.__process_comments(driver, video_info=(video_name, video_time, comment_count)) + # 5s澶勭悊涓�涓棰� + time.sleep(5) + + loadmore = scroll_list.find_element(By.CLASS_NAME, "loadmore__dot") + # 鑾峰彇鐖舵帶浠� + loadmore = loadmore.find_element(By.XPATH, "./..") + has_more = loadmore.value_of_css_property("display") == "none" + if has_more: + driver.execute_script( + "var __video_scroll = document.getElementsByClassName('feeds-container')[0]; __video_scroll.scrollTop = __video_scroll.scrollHeight;") + print("寰�涓嬫粴鍔�") + time.sleep(3) + self.process_videos(driver, start_index=len(videos)) + else: + print("娌℃湁鏇村瑙嗛浜�") + VideoManger().add_video_infos([self.__parse_video_info(v) for v in videos]) def close(self): if self.driver: -- Gitblit v1.8.0