From 91b7ec2b67d74e4d2e41c857232414feb3cb7bfd Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期三, 02 四月 2025 18:32:10 +0800 Subject: [PATCH] 功能完善 --- comment.py | 239 ++++++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 177 insertions(+), 62 deletions(-) diff --git a/comment.py b/comment.py index cc8a11c..78365b8 100644 --- a/comment.py +++ b/comment.py @@ -1,4 +1,6 @@ -import re +import json +import os +import random import threading import time @@ -8,6 +10,11 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC + +import comment_manager +import constant +import setting +from video_manager import VideoManger class CommentManager: @@ -20,47 +27,72 @@ # driver = webdriver.Chrome(options=options) # 鍙﹀涓�绉嶆柟寮� # 璋锋瓕娴忚鍣ㄤ綅缃� - chrome_location = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe' + chrome_location = setting.get_chrome_path() # 璋锋瓕娴忚鍣ㄩ┍鍔ㄥ湴鍧� - chromedriver_path = r'chromedriver.exe' + chromedriver_path = setting.get_chromedriver_path() self.options.binary_location = chrome_location # 鎸囧畾chrome鐨勮矾寰� self.service = Service(chromedriver_path) # 鑾峰彇姝e垯琛ㄨ揪寮� comment_template_str = "" # 灏嗕腑鏂囨浛鎹负姝e垯琛ㄨ揪寮忔敮鎸佺殑unicode缂栫爜 comment_template_str = comment_template_str.encode('unicode_escape').decode("utf-8") - self.comment_templates = [(x.split("#")[0], x.split("#")[1]) if x.find("#")>=0 for x in comment_template_str.split("\n")] + self.comment_templates = [(x.split("#")[0], x.split("#")[1]) for x in comment_template_str.split("\n") if + x.find("#") >= 0] self.driver = None + self.break_excute = False # 涓柇鎵ц + + def break_excute(self): + """ + 涓柇鎵ц + :return: + """ + print("=====涓柇鎵ц=====") + self.break_excute = True def __init(self): + self.break_excute = False if not self.driver: self.driver = webdriver.Chrome(service=self.service, options=self.options) + self.driver.get( + "https://channels.weixin.qq.com/platform/comment?isImageMode=0") + else: + self.driver.refresh() def start_process_comment(self): """ 寮�濮嬪鐞嗚瘎璁� :return: """ - self.__init() - self.driver.get( - "https://channels.weixin.qq.com/platform/comment?isImageMode=0") - # 绛夊埌宸︿晶鑿滃崟鍑虹幇鍚庢墠鑳芥墽琛屽悗缁搷浣� - wait = WebDriverWait(self.driver, 100) # 鏈�澶氱瓑寰�10绉� - element = wait.until(EC.visibility_of_element_located((By.ID, "side-bar"))) - threading.Thread(target=lambda: self.click_like(self.driver), daemon=True).start() + while True: + try: + self.__start_excute_task() + except: + pass + finally: + time.sleep(constant.REFRESH_TIME_SPACE) - def __process_like(self,comment_content, comment_element): + def __start_excute_task(self): + """ + 寮�濮嬫墽琛屼换鍔� + :return: + """ + self.__init() + # 绛夊埌宸︿晶鑿滃崟鍑虹幇鍚庢墠鑳芥墽琛屽悗缁搷浣� + wait = WebDriverWait(self.driver, 1000000) # 鏈�澶氱瓑寰�10绉� + element = wait.until(EC.visibility_of_element_located((By.ID, "side-bar"))) + self.process_videos(self.driver) + + def __process_like(self, video_name, video_date, comment_element): """ 澶勭悊鍗曟潯璇勮鐨勮禐 :param comment_content: :param comment_element: :return: """ - comment_pattern = re.compile(r'[\u4e00-\u9fa5]+') - comment_content = re.sub("<img.*?>", "", comment_content) - if not comment_pattern.search(comment_content): - # 绗﹀悎鏍囧噯鐨勮瘎璁� + user_name, comment_content, comment_time = self.__parse_comment_info(comment_element) + if not comment_manager.is_need_click_like(video_name, video_date, user_name, comment_content): return + comment_actions = comment_element.find_element(By.CLASS_NAME, "action-list").find_elements(By.CLASS_NAME, "action-item") classes: str = comment_actions[0].find_element(By.TAG_NAME, "svg").get_attribute("class") @@ -69,97 +101,168 @@ return like = comment_element.find_element(By.CLASS_NAME, "like-action") like.click() - self.driver.implicitly_wait(2) + # self.driver.implicitly_wait(2) + time.sleep(2) - def __process_reply(self, comment_content, comment_element): + def __parse_comment_info(self, comment_element): """ - 澶勭悊鍗曟潯璇勮鐨勫洖澶� - :param comment_content: + 瑙f瀽璇勮淇℃伅 :param comment_element: - :return: + :return:(璇勮浜�,璇勮鍐呭,璇勮鏃堕棿) """ - def get_reply_comment(nick_name, content): - # 鍐呭鏄惁绗﹀悎鏍囧噯 - for t in self.comment_templates: - if re.match(t[0], content): - # 婊¤冻鍐呭 - retrun t[1].replace("[鏄电О]",nick_name) + user_name = comment_element.find_element(By.CLASS_NAME, "comment-user-name").get_attribute("innerHTML") + comment_time = comment_element.find_element(By.CLASS_NAME, "comment-time").get_attribute("innerHTML") + try: + comment_content = comment_element.find_element(By.CLASS_NAME, "comment-content").get_attribute("innerHTML") + except: + comment_content = '' + return user_name, comment_content, comment_time - comment_pattern = re.compile(r'[\u4e00-\u9fa5]+') - comment_content = re.sub("<img.*?>", "", comment_content) - if not comment_pattern.search(comment_content): - # 绗﹀悎鏍囧噯鐨勮瘎璁� - return + def __parse_video_info(self, video_element): + """ + 瑙f瀽瑙嗛淇℃伅 + :param comment_element: + :return:(瑙嗛鍚嶇О,瑙嗛鏃堕棿,璇勮鏁伴噺) + """ + video_time = video_element.find_element(By.CLASS_NAME, "feed-time").get_attribute("innerHTML") + comment_count = video_element.find_element(By.CLASS_NAME, "feed-comment-total").find_element(By.XPATH, + "./span[2]").get_attribute( + "innerHTML") + video_name = video_element.find_element(By.CLASS_NAME, "feed-title").get_attribute("innerHTML") + video_title = video_name + if video_title.find("#") >= 0: + video_title = video_title[:video_title.find("#")] + return video_name, video_time, comment_count, video_title + + def __process_reply(self, video_info, comment_element): + """ + 澶勭悊鍗曟潯璇勮鍥炲 + :param video_name:(video_name, video_date, comment_count, video_title) + :param comment_element: + :return: 鏄惁闇�瑕佺偣璧� + """ + + def get_reply_comment(nick_name, content): + if not setting.is_reply_comment(): + return None + # 鍐呭鏄惁绗﹀悎鏍囧噯 + return comment_manager.get_replay_content(video_info, nick_name, content) + + comment_user_name, comment_content, comment_time = self.__parse_comment_info(comment_element) + try: comment_reply_list = comment_element.find_element(By.CLASS_NAME, "comment-reply-list") if comment_reply_list.find_element(By.XPATH, ".//div[normalize-space()='浣滆��']"): print("宸茬粡鍥炲銆傘�傘�傘��") - return + return False except: print("杩樻病鍥炲銆傘�傘�傘��") - comment_actions = comment_element.find_element(By.CLASS_NAME, "action-list").find_elements(By.CLASS_NAME, - "action-item") - # 鐐瑰嚮璇勮 - comment_actions[1].click() - wait = WebDriverWait(self.driver, 5) # 鏈�澶氱瓑寰�10绉� - element = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "comment-create-content"))) - - replay_content = get_reply_comment(, comment_element) + replay_content = get_reply_comment(comment_user_name, comment_content) if not replay_content: # 涓嶉渶瑕佽瘎璁� - return + print("涓嶉渶瑕佸洖澶嶃�傘�傘�傘��") + return True + # 闇�瑕佸洖澶� + + comment_actions = comment_element.find_element(By.CLASS_NAME, "action-list").find_elements(By.CLASS_NAME, + "action-item") + # 鐐瑰嚮璇勮 + comment_actions[1].click() + wait = WebDriverWait(self.driver, 10) # 鏈�澶氱瓑寰�10绉� + wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "comment-create-content"))) + time.sleep(1) self.driver.find_element(By.CLASS_NAME, - "comment-create-content").find_element(By.TAG_NAME, - "textarea").send_keys(replay_content) + "comment-create-content").find_element(By.TAG_NAME, + "textarea").send_keys(replay_content) self.driver.implicitly_wait(1) self.driver.find_element(By.CLASS_NAME, - "comment-create-content").find_element(By.XPATH, "div[3]/div[2]").click() + "comment-create-content").find_element(By.XPATH, "div[3]/div[2]").click() time.sleep(2) + time.sleep(random.randint(constant.COMMENT_REPLY_SPACE_TIME_MIN, constant.COMMENT_REPLY_SPACE_TIME_MAX)) + return False + def __process_comments(self, driver, start_index=0, video_info=None): + """ - def __click_like_all(self, driver, start_index=0): + :param driver: + :param start_index: + :param video_info:(瑙嗛鍐呭, 瑙嗛鏃ユ湡, 璇勮娆℃暟, 瑙嗛鏍囬) + :return: + """ scroll_list = driver.find_element(By.CLASS_NAME, "feed-comment__wrp") - - loadmore = scroll_list.find_element(By.CLASS_NAME, "loadmore__dot") - # 鑾峰彇鐖舵帶浠� - loadmore = loadmore.find_element(By.XPATH, "./..") - has_more = loadmore.value_of_css_property("display") == "none" + try: + loadmore = scroll_list.find_element(By.CLASS_NAME, "loadmore__dot") + # 鑾峰彇鐖舵帶浠� + loadmore = loadmore.find_element(By.XPATH, "./..") + has_more = loadmore.value_of_css_property("display") == "none" + except: + has_more = False comments = scroll_list.find_elements(By.XPATH, "div[2]/div/div/div[contains(@class,'comment-item')]") print("璇勮鏉℃暟", len(comments)) for index in range(start_index, len(comments)): + if self.break_excute: + return comment = comments[index] # 璇勮鍐呭 - comment_content = comment.find_element(By.CLASS_NAME, "comment-content").get_attribute("innerHTML") + user_name, comment_content, comment_time = self.__parse_comment_info(comment) if not comment_content: - continue + comment_content = '' print("=======璇勮鍐呭锛�", comment_content) - self.__process_reply(comment_content, comment) - self.__process_like(comment_content, comment) + need_like = self.__process_reply(video_info, comment) + if need_like: + print("闇�瑕佺偣璧�") + self.__process_like(video_info[0], video_info[1], comment) + else: + print("涓嶉渶瑕佺偣璧�") # 寰�涓嬫粦鍔� if has_more: driver.execute_script( "var __comment_scroll = document.getElementsByClassName('feed-comment__wrp')[0]; __comment_scroll.scrollTop = __comment_scroll.scrollHeight;") print("寰�涓嬫粴鍔�") time.sleep(3) - self.__click_like_all(driver, start_index=len(comments)) + self.__process_comments(driver, start_index=len(comments), video_info=video_info) else: - print("娌℃湁鏇村浜�") + print("娌℃湁鏇村璇勮浜�, 璇勮鏁伴噺锛�", len(comments)) - def click_like(self, driver: webdriver.Chrome): + def process_videos(self, driver: webdriver.Chrome, start_index=0): wait = WebDriverWait(driver, 100) # 鏈�澶氱瓑寰�10绉� - element = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "feeds-container"))) + scroll_list = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "feeds-container"))) time.sleep(1) # 鏌ユ壘瑙嗛鍒楄〃 videos_root = driver.find_element(By.CLASS_NAME, "feeds-container") videos = videos_root.find_elements(By.CLASS_NAME, "comment-feed-wrap") - for video in videos: + for i in range(start_index, len(videos)): + if self.break_excute: + return # 閫夋嫨瑙嗛 + video = videos[i] + # 瑙f瀽瑙嗛鍐呭 + video_info = self.__parse_video_info(video) + # 鍒ゆ柇鏄惁瑕佺偣鍑昏繘鍘� + if not VideoManger().is_need_click(video_info): + continue video.click() driver.implicitly_wait(2) - self.__click_like_all(driver) + self.__process_comments(driver, video_info=video_info) + # 澶勭悊涓�涓棰� + time.sleep(constant.VIDEO_CLICK_SPACE_TIME) + + loadmore = scroll_list.find_element(By.CLASS_NAME, "loadmore__dot") + # 鑾峰彇鐖舵帶浠� + loadmore = loadmore.find_element(By.XPATH, "./..") + has_more = loadmore.value_of_css_property("display") == "none" + if has_more: + driver.execute_script( + "var __video_scroll = document.getElementsByClassName('feeds-container')[0]; __video_scroll.scrollTop = __video_scroll.scrollHeight;") + print("寰�涓嬫粴鍔�") + time.sleep(3) + self.process_videos(driver, start_index=len(videos)) + else: + print("娌℃湁鏇村瑙嗛浜�") + VideoManger().add_video_infos([self.__parse_video_info(v) for v in videos]) def close(self): if self.driver: @@ -168,10 +271,22 @@ except: pass + def __save_cookie(self): + cookies = self.driver.get_cookies() + with open("datas/cookies.json", "w") as f: + json.dump(cookies, f) + + def __fill_cookie(self): + if os.path.exists("datas/cookies.json"): + with open("datas/cookies.json", "r") as f: + cookies = json.load(f) + for cookie in cookies: + self.driver.add_cookie(cookie) + if __name__ == "__main__": # CommentManager().start_process_comment() - s ="浣犲ソ" + s = "浣犲ソ" print(s.encode('unicode_escape').decode("utf-8")) # if __name__ == "__main__": -- Gitblit v1.8.0