💄 2020年4月11日17:56:59

ExcaliburEX · Apr 11, 2020 · 6973857 · 6973857
1 parent 107b1a9
commit 6973857
Show file tree

Hide file tree

Showing 11 changed files with 150 additions and 28 deletions.
diff --git a/AutoSearchAndDownload.py b/AutoSearchAndDownload.py
@@ -7,7 +7,7 @@
 from bs4 import BeautifulSoup
 import cv2
 from skimage.measure import compare_ssim
-
+from selenium.common.exceptions import TimeoutException
 
 class AutoSearchAndDownload:
     def main(self, Dir='F:\\pic\\test\\'):
@@ -69,8 +69,17 @@ def Auto():
                 if n != 'log':
                 # log文件用来判断有没有下载失败的影片，所以不需要加入搜索
                     try:
-
-                        driver.get(url + n)
+                        driver.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver.get(url + n)
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver.get(url + n)
+                                time.sleep(5)
                         Size = [x.text for x in driver.find_elements_by_xpath("//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")]
                         # 获取搜索到的磁链的大小
                         content = driver.page_source.encode('utf-8')
@@ -80,7 +89,17 @@ def Auto():
                         print('%s 一共有 %d 个种子'%(n, len(Size)))
                         print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)]))
                         time.sleep(2)
-                        driver.get(href[MaxVideoSizeIndex(Size)])
+                        driver.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver.get(href[MaxVideoSizeIndex(Size)])
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver.get(href[MaxVideoSizeIndex(Size)])
+                                time.sleep(5)
                         # 进入影片尺寸最大的影片磁链页
                         pyperclip.copy([x.text for x in driver.find_elements_by_xpath("//*[@class='magnet-link hidden-xs']")][0])
                         # 复制磁链

diff --git a/AutoSearchAndDownload_Thunder.py b/AutoSearchAndDownload_Thunder.py
@@ -9,7 +9,7 @@
 from bs4 import BeautifulSoup
 import cv2
 from skimage.measure import compare_ssim
-
+from selenium.common.exceptions import TimeoutException
 
 class AutoSearchAndDownload_Thunder:
     def main(self, Dir='F:\\pic\\test\\'):
@@ -126,7 +126,17 @@ def Auto():
                 if n != 'log':
                     # log文件用来判断有没有下载失败的影片，所以不需要加入搜索
                     try:
-                        driver.get(url + n)
+                        driver.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver.get(url + n)
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver.get(url + n)
+                                time.sleep(5)
                         Size = [x.text for x in driver.find_elements_by_xpath(
                             "//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")]
                         # 获取搜索到的磁链的大小
@@ -138,7 +148,17 @@ def Auto():
                         print('%s 一共有 %d 个种子' % (n, len(Size)))
                         print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)]))
                         time.sleep(2)
-                        driver.get(href[MaxVideoSizeIndex(Size)])
+                        driver.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver.get(href[MaxVideoSizeIndex(Size)])
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver.get(href[MaxVideoSizeIndex(Size)])
+                                time.sleep(5)
                         # 进入影片尺寸最大的影片磁链页
                         pyperclip.copy([x.text for x in driver.find_elements_by_xpath(
                             "//*[@class='magnet-link hidden-xs']")][0])

diff --git a/Crawl_141jav.py b/Crawl_141jav.py
@@ -7,6 +7,7 @@
 import pyperclip
 from lxml import etree
 import datetime
+from selenium.common.exceptions import TimeoutException
 
 class Crawl_141jav:
     def main(self, Dir='F:\\pic\\141jav\\', startTime= datetime.date.today()):
@@ -44,19 +45,42 @@ def scrapy():
                     f.close()
             for date in timeList:
                 try:
-                    driver.get(url+date)
+                    driver.set_page_load_timeout(10)
+                    while True:
+                        try:
+                            driver.get(url+date)
+                            break
+                        except TimeoutException:
+                            print("加载超时，启动F5刷新,等待5秒")
+                            pyautogui.click(x=509, y=33)
+                            pyautogui.hotkey('f5')
+                            driver.get(url+date)
+                            time.sleep(5)
                     if not os.path.exists(custom_path):
                         os.mkdir(custom_path)
                     if not os.path.exists(custom_path+date.replace('/','-')+'\\'):
                         os.mkdir(custom_path+date.replace('/', '-')+'\\')
+                    videoNumber = 0
                     for page in range(100):
                         try:
-                            driver.get(url+date+'?page='+str(page+1))
+                            driver.set_page_load_timeout(10)
+                            while True:
+                                try:
+                                    driver.get(url+date+'?page='+str(page+1))
+                                    break
+                                except TimeoutException:
+                                    print("加载超时，启动F5刷新，等待5秒")
+                                    pyautogui.click(x=509, y=33)
+                                    pyautogui.hotkey('f5')
+                                    driver.get(url+date+'?page='+str(page+1))
+                                    time.sleep(5)
                             content = driver.page_source.encode('utf-8')
                             html = etree.HTML(content)
                             soup = BeautifulSoup(content, 'lxml')
                             href = [x.attrib['src'] for x in html.xpath("//img[@class='image']")]
+                            videoNumber += len(href)
                             if len(href) == 0:
+                                print("%s 共 %d 部片！" % (date, videoNumber))
                                 break
                             name = [x.text.replace("\n", "") for x in html.xpath(
                                 "//h5[@class='title is-4 is-spaced']/a")]
@@ -74,11 +98,24 @@ def scrapy():
                                 pyautogui.hotkey('ctrlleft', 'V')
                                 time.sleep(1)
                                 pyautogui.press('enter')
-                                with open(custom_path + 'history.txt', 'a+') as f:
-                                    f.writelines(name[i])
-                                    f.writelines('\n')
-                                    f.close()
-                                print("%s 下载完成！" % (name[i]))
+                                time.sleep(1)
+                                while True: 
+                                    filelist = os.listdir(custom_path+date.replace('/', '-')+'\\')
+                                    if name[i] + '.jpg' in filelist:
+                                        with open(custom_path + 'history.txt', 'a+') as f:
+                                            f.writelines(name[i])
+                                            f.writelines('\n')
+                                            f.close()
+                                        print("%s 下载完成！" % (name[i]))
+                                        break   
+                                    else:
+                                        print("等待响应")
+                                        time.sleep(2)
+                                        pyautogui.hotkey('ctrlleft', 'V')  # 粘贴
+                                        time.sleep(1)
+                                        pyautogui.press('enter')  # 确认
+                                        time.sleep(1)
+                            time.sleep(2)    
                             driver_info.quit()
                         except:
                             print("%s 共 %d 页结束！" % (date, page+1))

diff --git a/Crawl_51luxu.py b/Crawl_51luxu.py
@@ -3,6 +3,7 @@
 from bs4 import BeautifulSoup
 import pyautogui
 from selenium.webdriver.support.ui import WebDriverWait
+from selenium.common.exceptions import TimeoutException
 import pyperclip
 import os
 
@@ -96,13 +97,26 @@ def scrapy(driver):
                         pyautogui.hotkey('ctrlleft', 'V') # 粘贴
                         time.sleep(1)
                         pyautogui.press('enter') # 确认
-                        time.sleep(0.2)
-                        with open(custom_path + 'history.txt', 'a+') as f:
-                            f.writelines(title)
-                            f.writelines('\n')
-                            f.close()
+                        time.sleep(1)
+                        while True:
+                            filelist = os.listdir(custom_path)
+                            if title + '.jpg' in filelist:
+                                with open(custom_path + 'history.txt', 'a+') as f:
+                                    f.writelines(title)
+                                    f.writelines('\n')
+                                    f.close()
+                                print("%s 下载完成！" % (title))
+                                break
+                            else:
+                                print("等待响应")
+                                time.sleep(2)
+                                pyautogui.hotkey('ctrlleft', 'V')  # 粘贴
+                                time.sleep(1)
+                                pyautogui.press('enter')  # 确认
+                                time.sleep(1)
                         # 在txt中加入当前下载的图片名字
                         print("%s 下载完成！"%(title))
+                    time.sleep(2)
                     driver1.quit()
                     print("第 %d 页爬完"%(page))
                     button =  "//*[@class='next page-numbers']"  #翻页按钮

diff --git a/Crawl_fc2.py b/Crawl_fc2.py
@@ -7,7 +7,7 @@
 from lxml import etree
 import os
 from bs4 import BeautifulSoup
-
+from selenium.common.exceptions import TimeoutException
 # 主要功能就是访问fc2所有影片按时间更新的详情页，然后挨个下载具体影片的视频截图
 class Crawl_fc2:
     def main(self, Dir='F:\\pic\\fc2\\', page=1):
@@ -46,13 +46,33 @@ def scrapy(driver):
                             continue
                         # 前文提到的判断是否下过，如果是，后面就不用进行了
                         # 进入相应链接的详情页
-                        driver_info.get(url_prefix + name[i] + '.html')
+                        driver_info.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver_info.get(url_prefix + name[i] + '.html')
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver_info.get(url_prefix + name[i] + '.html')
+                                time.sleep(5)
                         # 进入各个影片的详情页，因为链接就是名字加html，所以直接保存番号就行了
                         content = driver_info.page_source.encode('utf-8')
                         soup = BeautifulSoup(content, 'lxml')
                         href = url_prefix[:-5] + ''.join(re.findall(r'href="/uploadfile/.*?"',str(soup))).split("\"")[1].split("\"")[0]
                         # 进入详情页后，获取视频截图的链接
-                        driver_info.get(href) # 进入视频截图页面
+                        driver_info.set_page_load_timeout(10)
+                        while True:
+                            try:
+                                driver_info.get(href) # 进入视频截图页面
+                                break
+                            except TimeoutException:
+                                print("加载超时，启动F5刷新,等待5秒")
+                                pyautogui.click(x=509, y=33)
+                                pyautogui.hotkey('f5')
+                                driver_info.get(href)
+                                time.sleep(5)
                         wait = WebDriverWait(driver_info, 10)  # 等待浏览器相应，删除也可以
                         pyautogui.rightClick(x=500, y=500)  # 右击图片，位置可根据自己的屏幕调整
                         pyautogui.typewrite(['V'])  # 另存为的快捷键为 V
@@ -61,13 +81,25 @@ def scrapy(driver):
                         pyautogui.hotkey('ctrlleft', 'V')  # 粘贴
                         time.sleep(1)
                         pyautogui.press('enter')  # 确认
-                        with open(custom_path + 'history.txt', 'a+') as f:
-                            f.writelines(name[i])
-                            f.writelines('\n')
-                            f.close()
+                        time.sleep(1)
+                        while True:
+                            filelist = os.listdir(custom_path)
+                            if name[i] + '.jpg' in filelist:
+                                with open(custom_path + 'history.txt', 'a+') as f:
+                                    f.writelines(name[i])
+                                    f.writelines('\n')
+                                    f.close()
+                                print("%s 下载完成！" % (name[i]))
+                                break
+                            else:
+                                print("等待响应")
+                                time.sleep(2)
+                                pyautogui.hotkey('ctrlleft', 'V')  # 粘贴
+                                time.sleep(1)
+                                pyautogui.press('enter')  # 确认
+                                time.sleep(1)
                         # 在txt中加入当前下载的图片名字
-                        print("%s 下载完成！" % (name[i]))
-                        time.sleep(0.2)
+                    time.sleep(2)
                     driver_info.quit()
                     print("第 %d 页爬完" % (page + 1))
                     button = "//*[@href='https://fc2club.com/index.php?m=content&c=index&a=lists&catid=12&page=" + str(page + 2) + "']"  #翻页按钮

diff --git a/__pycache__/AutoSearchAndDownload.cpython-37.pyc b/__pycache__/AutoSearchAndDownload.cpython-37.pyc
diff --git a/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc b/__pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc
diff --git a/__pycache__/Crawl_141jav.cpython-37.pyc b/__pycache__/Crawl_141jav.cpython-37.pyc
diff --git a/__pycache__/Crawl_51luxu.cpython-37.pyc b/__pycache__/Crawl_51luxu.cpython-37.pyc
diff --git a/__pycache__/Crawl_fc2.cpython-37.pyc b/__pycache__/Crawl_fc2.cpython-37.pyc
diff --git a/__pycache__/pyautogui.cpython-37.pyc b/__pycache__/pyautogui.cpython-37.pyc