diff --git a/AutoSearchAndDownload.py b/AutoSearchAndDownload.py index 8a4bc29..2fa3f09 100644 --- a/AutoSearchAndDownload.py +++ b/AutoSearchAndDownload.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup import cv2 from skimage.measure import compare_ssim - +from selenium.common.exceptions import TimeoutException class AutoSearchAndDownload: def main(self, Dir='F:\\pic\\test\\'): @@ -69,8 +69,17 @@ def Auto(): if n != 'log': # log文件用来判断有没有下载失败的影片,所以不需要加入搜索 try: - - driver.get(url + n) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(url + n) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(url + n) + time.sleep(5) Size = [x.text for x in driver.find_elements_by_xpath("//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")] # 获取搜索到的磁链的大小 content = driver.page_source.encode('utf-8') @@ -80,7 +89,17 @@ def Auto(): print('%s 一共有 %d 个种子'%(n, len(Size))) print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)])) time.sleep(2) - driver.get(href[MaxVideoSizeIndex(Size)]) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(href[MaxVideoSizeIndex(Size)]) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(href[MaxVideoSizeIndex(Size)]) + time.sleep(5) # 进入影片尺寸最大的影片磁链页 pyperclip.copy([x.text for x in driver.find_elements_by_xpath("//*[@class='magnet-link hidden-xs']")][0]) # 复制磁链 diff --git a/AutoSearchAndDownload_Thunder.py b/AutoSearchAndDownload_Thunder.py index 51ede32..02b433f 100644 --- a/AutoSearchAndDownload_Thunder.py +++ b/AutoSearchAndDownload_Thunder.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup import cv2 from skimage.measure import compare_ssim - +from selenium.common.exceptions import TimeoutException class AutoSearchAndDownload_Thunder: def main(self, Dir='F:\\pic\\test\\'): @@ -126,7 +126,17 @@ def Auto(): if n != 'log': # log文件用来判断有没有下载失败的影片,所以不需要加入搜索 try: - driver.get(url + n) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(url + n) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(url + n) + time.sleep(5) Size = [x.text for x in driver.find_elements_by_xpath( "//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")] # 获取搜索到的磁链的大小 @@ -138,7 +148,17 @@ def Auto(): print('%s 一共有 %d 个种子' % (n, len(Size))) print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)])) time.sleep(2) - driver.get(href[MaxVideoSizeIndex(Size)]) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(href[MaxVideoSizeIndex(Size)]) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(href[MaxVideoSizeIndex(Size)]) + time.sleep(5) # 进入影片尺寸最大的影片磁链页 pyperclip.copy([x.text for x in driver.find_elements_by_xpath( "//*[@class='magnet-link hidden-xs']")][0]) diff --git a/Crawl_141jav.py b/Crawl_141jav.py index 0d6be52..4f166a1 100644 --- a/Crawl_141jav.py +++ b/Crawl_141jav.py @@ -7,6 +7,7 @@ import pyperclip from lxml import etree import datetime +from selenium.common.exceptions import TimeoutException class Crawl_141jav: def main(self, Dir='F:\\pic\\141jav\\', startTime= datetime.date.today()): @@ -44,19 +45,42 @@ def scrapy(): f.close() for date in timeList: try: - driver.get(url+date) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(url+date) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(url+date) + time.sleep(5) if not os.path.exists(custom_path): os.mkdir(custom_path) if not os.path.exists(custom_path+date.replace('/','-')+'\\'): os.mkdir(custom_path+date.replace('/', '-')+'\\') + videoNumber = 0 for page in range(100): try: - driver.get(url+date+'?page='+str(page+1)) + driver.set_page_load_timeout(10) + while True: + try: + driver.get(url+date+'?page='+str(page+1)) + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver.get(url+date+'?page='+str(page+1)) + time.sleep(5) content = driver.page_source.encode('utf-8') html = etree.HTML(content) soup = BeautifulSoup(content, 'lxml') href = [x.attrib['src'] for x in html.xpath("//img[@class='image']")] + videoNumber += len(href) if len(href) == 0: + print("%s 共 %d 部片!" % (date, videoNumber)) break name = [x.text.replace("\n", "") for x in html.xpath( "//h5[@class='title is-4 is-spaced']/a")] @@ -74,11 +98,24 @@ def scrapy(): pyautogui.hotkey('ctrlleft', 'V') time.sleep(1) pyautogui.press('enter') - with open(custom_path + 'history.txt', 'a+') as f: - f.writelines(name[i]) - f.writelines('\n') - f.close() - print("%s 下载完成!" % (name[i])) + time.sleep(1) + while True: + filelist = os.listdir(custom_path+date.replace('/', '-')+'\\') + if name[i] + '.jpg' in filelist: + with open(custom_path + 'history.txt', 'a+') as f: + f.writelines(name[i]) + f.writelines('\n') + f.close() + print("%s 下载完成!" % (name[i])) + break + else: + print("等待响应") + time.sleep(2) + pyautogui.hotkey('ctrlleft', 'V') # 粘贴 + time.sleep(1) + pyautogui.press('enter') # 确认 + time.sleep(1) + time.sleep(2) driver_info.quit() except: print("%s 共 %d 页结束!" % (date, page+1)) diff --git a/Crawl_51luxu.py b/Crawl_51luxu.py index 4cd8fc5..0e64019 100644 --- a/Crawl_51luxu.py +++ b/Crawl_51luxu.py @@ -3,6 +3,7 @@ from bs4 import BeautifulSoup import pyautogui from selenium.webdriver.support.ui import WebDriverWait +from selenium.common.exceptions import TimeoutException import pyperclip import os @@ -96,13 +97,26 @@ def scrapy(driver): pyautogui.hotkey('ctrlleft', 'V') # 粘贴 time.sleep(1) pyautogui.press('enter') # 确认 - time.sleep(0.2) - with open(custom_path + 'history.txt', 'a+') as f: - f.writelines(title) - f.writelines('\n') - f.close() + time.sleep(1) + while True: + filelist = os.listdir(custom_path) + if title + '.jpg' in filelist: + with open(custom_path + 'history.txt', 'a+') as f: + f.writelines(title) + f.writelines('\n') + f.close() + print("%s 下载完成!" % (title)) + break + else: + print("等待响应") + time.sleep(2) + pyautogui.hotkey('ctrlleft', 'V') # 粘贴 + time.sleep(1) + pyautogui.press('enter') # 确认 + time.sleep(1) # 在txt中加入当前下载的图片名字 print("%s 下载完成!"%(title)) + time.sleep(2) driver1.quit() print("第 %d 页爬完"%(page)) button = "//*[@class='next page-numbers']" #翻页按钮 diff --git a/Crawl_fc2.py b/Crawl_fc2.py index be0d11c..9a23e64 100644 --- a/Crawl_fc2.py +++ b/Crawl_fc2.py @@ -7,7 +7,7 @@ from lxml import etree import os from bs4 import BeautifulSoup - +from selenium.common.exceptions import TimeoutException # 主要功能就是访问fc2所有影片按时间更新的详情页,然后挨个下载具体影片的视频截图 class Crawl_fc2: def main(self, Dir='F:\\pic\\fc2\\', page=1): @@ -46,13 +46,33 @@ def scrapy(driver): continue # 前文提到的判断是否下过,如果是,后面就不用进行了 # 进入相应链接的详情页 - driver_info.get(url_prefix + name[i] + '.html') + driver_info.set_page_load_timeout(10) + while True: + try: + driver_info.get(url_prefix + name[i] + '.html') + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver_info.get(url_prefix + name[i] + '.html') + time.sleep(5) # 进入各个影片的详情页,因为链接就是名字加html,所以直接保存番号就行了 content = driver_info.page_source.encode('utf-8') soup = BeautifulSoup(content, 'lxml') href = url_prefix[:-5] + ''.join(re.findall(r'href="/uploadfile/.*?"',str(soup))).split("\"")[1].split("\"")[0] # 进入详情页后,获取视频截图的链接 - driver_info.get(href) # 进入视频截图页面 + driver_info.set_page_load_timeout(10) + while True: + try: + driver_info.get(href) # 进入视频截图页面 + break + except TimeoutException: + print("加载超时,启动F5刷新,等待5秒") + pyautogui.click(x=509, y=33) + pyautogui.hotkey('f5') + driver_info.get(href) + time.sleep(5) wait = WebDriverWait(driver_info, 10) # 等待浏览器相应,删除也可以 pyautogui.rightClick(x=500, y=500) # 右击图片,位置可根据自己的屏幕调整 pyautogui.typewrite(['V']) # 另存为的快捷键为 V @@ -61,13 +81,25 @@ def scrapy(driver): pyautogui.hotkey('ctrlleft', 'V') # 粘贴 time.sleep(1) pyautogui.press('enter') # 确认 - with open(custom_path + 'history.txt', 'a+') as f: - f.writelines(name[i]) - f.writelines('\n') - f.close() + time.sleep(1) + while True: + filelist = os.listdir(custom_path) + if name[i] + '.jpg' in filelist: + with open(custom_path + 'history.txt', 'a+') as f: + f.writelines(name[i]) + f.writelines('\n') + f.close() + print("%s 下载完成!" % (name[i])) + break + else: + print("等待响应") + time.sleep(2) + pyautogui.hotkey('ctrlleft', 'V') # 粘贴 + time.sleep(1) + pyautogui.press('enter') # 确认 + time.sleep(1) # 在txt中加入当前下载的图片名字 - print("%s 下载完成!" % (name[i])) - time.sleep(0.2) + time.sleep(2) driver_info.quit() print("第 %d 页爬完" % (page + 1)) button = "//*[@href='https://fc2club.com/index.php?m=content&c=index&a=lists&catid=12&page=" + str(page + 2) + "']" #翻页按钮 diff --git a/__pycache__/AutoSearchAndDownload.cpython-37.pyc b/__pycache__/AutoSearchAndDownload.cpython-37.pyc index 53da240..d97b640 100644 Binary files a/__pycache__/AutoSearchAndDownload.cpython-37.pyc and b/__pycache__/AutoSearchAndDownload.cpython-37.pyc differ diff --git a/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc b/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc index c8c5cc8..c9691c3 100644 Binary files a/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc and b/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc differ diff --git a/__pycache__/Crawl_141jav.cpython-37.pyc b/__pycache__/Crawl_141jav.cpython-37.pyc index 72aacce..ec8e1b2 100644 Binary files a/__pycache__/Crawl_141jav.cpython-37.pyc and b/__pycache__/Crawl_141jav.cpython-37.pyc differ diff --git a/__pycache__/Crawl_51luxu.cpython-37.pyc b/__pycache__/Crawl_51luxu.cpython-37.pyc index f971a0a..7d82b4c 100644 Binary files a/__pycache__/Crawl_51luxu.cpython-37.pyc and b/__pycache__/Crawl_51luxu.cpython-37.pyc differ diff --git a/__pycache__/Crawl_fc2.cpython-37.pyc b/__pycache__/Crawl_fc2.cpython-37.pyc index 3638dbb..193b4ae 100644 Binary files a/__pycache__/Crawl_fc2.cpython-37.pyc and b/__pycache__/Crawl_fc2.cpython-37.pyc differ diff --git a/__pycache__/pyautogui.cpython-37.pyc b/__pycache__/pyautogui.cpython-37.pyc new file mode 100644 index 0000000..28e3155 Binary files /dev/null and b/__pycache__/pyautogui.cpython-37.pyc differ