Skip to content

Commit

Permalink
💄 2020年4月11日17:56:59
Browse files Browse the repository at this point in the history
  • Loading branch information
ExcaliburEX committed Apr 11, 2020
1 parent 107b1a9 commit 6973857
Show file tree
Hide file tree
Showing 11 changed files with 150 additions and 28 deletions.
27 changes: 23 additions & 4 deletions AutoSearchAndDownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from bs4 import BeautifulSoup
import cv2
from skimage.measure import compare_ssim

from selenium.common.exceptions import TimeoutException

class AutoSearchAndDownload:
def main(self, Dir='F:\\pic\\test\\'):
Expand Down Expand Up @@ -69,8 +69,17 @@ def Auto():
if n != 'log':
# log文件用来判断有没有下载失败的影片,所以不需要加入搜索
try:

driver.get(url + n)
driver.set_page_load_timeout(10)
while True:
try:
driver.get(url + n)
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(url + n)
time.sleep(5)
Size = [x.text for x in driver.find_elements_by_xpath("//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")]
# 获取搜索到的磁链的大小
content = driver.page_source.encode('utf-8')
Expand All @@ -80,7 +89,17 @@ def Auto():
print('%s 一共有 %d 个种子'%(n, len(Size)))
print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)]))
time.sleep(2)
driver.get(href[MaxVideoSizeIndex(Size)])
driver.set_page_load_timeout(10)
while True:
try:
driver.get(href[MaxVideoSizeIndex(Size)])
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(href[MaxVideoSizeIndex(Size)])
time.sleep(5)
# 进入影片尺寸最大的影片磁链页
pyperclip.copy([x.text for x in driver.find_elements_by_xpath("//*[@class='magnet-link hidden-xs']")][0])
# 复制磁链
Expand Down
26 changes: 23 additions & 3 deletions AutoSearchAndDownload_Thunder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from bs4 import BeautifulSoup
import cv2
from skimage.measure import compare_ssim

from selenium.common.exceptions import TimeoutException

class AutoSearchAndDownload_Thunder:
def main(self, Dir='F:\\pic\\test\\'):
Expand Down Expand Up @@ -126,7 +126,17 @@ def Auto():
if n != 'log':
# log文件用来判断有没有下载失败的影片,所以不需要加入搜索
try:
driver.get(url + n)
driver.set_page_load_timeout(10)
while True:
try:
driver.get(url + n)
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(url + n)
time.sleep(5)
Size = [x.text for x in driver.find_elements_by_xpath(
"//*[@class='col-sm-2 col-lg-1 hidden-xs text-right size']")]
# 获取搜索到的磁链的大小
Expand All @@ -138,7 +148,17 @@ def Auto():
print('%s 一共有 %d 个种子' % (n, len(Size)))
print("%s 的尺寸最大为 %s" % (n, Size[MaxVideoSizeIndex(Size)]))
time.sleep(2)
driver.get(href[MaxVideoSizeIndex(Size)])
driver.set_page_load_timeout(10)
while True:
try:
driver.get(href[MaxVideoSizeIndex(Size)])
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(href[MaxVideoSizeIndex(Size)])
time.sleep(5)
# 进入影片尺寸最大的影片磁链页
pyperclip.copy([x.text for x in driver.find_elements_by_xpath(
"//*[@class='magnet-link hidden-xs']")][0])
Expand Down
51 changes: 44 additions & 7 deletions Crawl_141jav.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pyperclip
from lxml import etree
import datetime
from selenium.common.exceptions import TimeoutException

class Crawl_141jav:
def main(self, Dir='F:\\pic\\141jav\\', startTime= datetime.date.today()):
Expand Down Expand Up @@ -44,19 +45,42 @@ def scrapy():
f.close()
for date in timeList:
try:
driver.get(url+date)
driver.set_page_load_timeout(10)
while True:
try:
driver.get(url+date)
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(url+date)
time.sleep(5)
if not os.path.exists(custom_path):
os.mkdir(custom_path)
if not os.path.exists(custom_path+date.replace('/','-')+'\\'):
os.mkdir(custom_path+date.replace('/', '-')+'\\')
videoNumber = 0
for page in range(100):
try:
driver.get(url+date+'?page='+str(page+1))
driver.set_page_load_timeout(10)
while True:
try:
driver.get(url+date+'?page='+str(page+1))
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver.get(url+date+'?page='+str(page+1))
time.sleep(5)
content = driver.page_source.encode('utf-8')
html = etree.HTML(content)
soup = BeautifulSoup(content, 'lxml')
href = [x.attrib['src'] for x in html.xpath("//img[@class='image']")]
videoNumber += len(href)
if len(href) == 0:
print("%s 共 %d 部片!" % (date, videoNumber))
break
name = [x.text.replace("\n", "") for x in html.xpath(
"//h5[@class='title is-4 is-spaced']/a")]
Expand All @@ -74,11 +98,24 @@ def scrapy():
pyautogui.hotkey('ctrlleft', 'V')
time.sleep(1)
pyautogui.press('enter')
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(name[i])
f.writelines('\n')
f.close()
print("%s 下载完成!" % (name[i]))
time.sleep(1)
while True:
filelist = os.listdir(custom_path+date.replace('/', '-')+'\\')
if name[i] + '.jpg' in filelist:
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(name[i])
f.writelines('\n')
f.close()
print("%s 下载完成!" % (name[i]))
break
else:
print("等待响应")
time.sleep(2)
pyautogui.hotkey('ctrlleft', 'V') # 粘贴
time.sleep(1)
pyautogui.press('enter') # 确认
time.sleep(1)
time.sleep(2)
driver_info.quit()
except:
print("%s 共 %d 页结束!" % (date, page+1))
Expand Down
24 changes: 19 additions & 5 deletions Crawl_51luxu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from bs4 import BeautifulSoup
import pyautogui
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
import pyperclip
import os

Expand Down Expand Up @@ -96,13 +97,26 @@ def scrapy(driver):
pyautogui.hotkey('ctrlleft', 'V') # 粘贴
time.sleep(1)
pyautogui.press('enter') # 确认
time.sleep(0.2)
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(title)
f.writelines('\n')
f.close()
time.sleep(1)
while True:
filelist = os.listdir(custom_path)
if title + '.jpg' in filelist:
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(title)
f.writelines('\n')
f.close()
print("%s 下载完成!" % (title))
break
else:
print("等待响应")
time.sleep(2)
pyautogui.hotkey('ctrlleft', 'V') # 粘贴
time.sleep(1)
pyautogui.press('enter') # 确认
time.sleep(1)
# 在txt中加入当前下载的图片名字
print("%s 下载完成!"%(title))
time.sleep(2)
driver1.quit()
print("第 %d 页爬完"%(page))
button = "//*[@class='next page-numbers']" #翻页按钮
Expand Down
50 changes: 41 additions & 9 deletions Crawl_fc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from lxml import etree
import os
from bs4 import BeautifulSoup

from selenium.common.exceptions import TimeoutException
# 主要功能就是访问fc2所有影片按时间更新的详情页,然后挨个下载具体影片的视频截图
class Crawl_fc2:
def main(self, Dir='F:\\pic\\fc2\\', page=1):
Expand Down Expand Up @@ -46,13 +46,33 @@ def scrapy(driver):
continue
# 前文提到的判断是否下过,如果是,后面就不用进行了
# 进入相应链接的详情页
driver_info.get(url_prefix + name[i] + '.html')
driver_info.set_page_load_timeout(10)
while True:
try:
driver_info.get(url_prefix + name[i] + '.html')
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver_info.get(url_prefix + name[i] + '.html')
time.sleep(5)
# 进入各个影片的详情页,因为链接就是名字加html,所以直接保存番号就行了
content = driver_info.page_source.encode('utf-8')
soup = BeautifulSoup(content, 'lxml')
href = url_prefix[:-5] + ''.join(re.findall(r'href="/uploadfile/.*?"',str(soup))).split("\"")[1].split("\"")[0]
# 进入详情页后,获取视频截图的链接
driver_info.get(href) # 进入视频截图页面
driver_info.set_page_load_timeout(10)
while True:
try:
driver_info.get(href) # 进入视频截图页面
break
except TimeoutException:
print("加载超时,启动F5刷新,等待5秒")
pyautogui.click(x=509, y=33)
pyautogui.hotkey('f5')
driver_info.get(href)
time.sleep(5)
wait = WebDriverWait(driver_info, 10) # 等待浏览器相应,删除也可以
pyautogui.rightClick(x=500, y=500) # 右击图片,位置可根据自己的屏幕调整
pyautogui.typewrite(['V']) # 另存为的快捷键为 V
Expand All @@ -61,13 +81,25 @@ def scrapy(driver):
pyautogui.hotkey('ctrlleft', 'V') # 粘贴
time.sleep(1)
pyautogui.press('enter') # 确认
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(name[i])
f.writelines('\n')
f.close()
time.sleep(1)
while True:
filelist = os.listdir(custom_path)
if name[i] + '.jpg' in filelist:
with open(custom_path + 'history.txt', 'a+') as f:
f.writelines(name[i])
f.writelines('\n')
f.close()
print("%s 下载完成!" % (name[i]))
break
else:
print("等待响应")
time.sleep(2)
pyautogui.hotkey('ctrlleft', 'V') # 粘贴
time.sleep(1)
pyautogui.press('enter') # 确认
time.sleep(1)
# 在txt中加入当前下载的图片名字
print("%s 下载完成!" % (name[i]))
time.sleep(0.2)
time.sleep(2)
driver_info.quit()
print("第 %d 页爬完" % (page + 1))
button = "//*[@href='https://fc2club.com/index.php?m=content&c=index&a=lists&catid=12&page=" + str(page + 2) + "']" #翻页按钮
Expand Down
Binary file modified __pycache__/AutoSearchAndDownload.cpython-37.pyc
Binary file not shown.
Binary file modified __pycache__/AutoSearchAndDownload_Thunder.cpython-37.pyc
Binary file not shown.
Binary file modified __pycache__/Crawl_141jav.cpython-37.pyc
Binary file not shown.
Binary file modified __pycache__/Crawl_51luxu.cpython-37.pyc
Binary file not shown.
Binary file modified __pycache__/Crawl_fc2.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/pyautogui.cpython-37.pyc
Binary file not shown.

0 comments on commit 6973857

Please sign in to comment.