diff --git a/AVDC_Main.py b/AVDC_Main.py index 4b5843444..c2b678339 100644 --- a/AVDC_Main.py +++ b/AVDC_Main.py @@ -33,7 +33,7 @@ def __init__(self, parent=None): self.Init_Ui() self.set_style() # 初始化需要的变量 - self.version = '3.963' + self.version = '3.964' self.m_drag = False self.m_DragPosition = 0 self.count_claw = 0 # 批量刮削次数 @@ -341,20 +341,18 @@ def Load_Config(self): self.Ui.comboBox_website_all.setCurrentIndex(0) elif config['common']['website'] == 'mgstage': self.Ui.comboBox_website_all.setCurrentIndex(1) - elif config['common']['website'] == 'fc2club': - self.Ui.comboBox_website_all.setCurrentIndex(2) elif config['common']['website'] == 'javbus': - self.Ui.comboBox_website_all.setCurrentIndex(3) + self.Ui.comboBox_website_all.setCurrentIndex(2) elif config['common']['website'] == 'jav321': - self.Ui.comboBox_website_all.setCurrentIndex(4) + self.Ui.comboBox_website_all.setCurrentIndex(3) elif config['common']['website'] == 'javdb': - self.Ui.comboBox_website_all.setCurrentIndex(5) + self.Ui.comboBox_website_all.setCurrentIndex(4) elif config['common']['website'] == 'avsox': - self.Ui.comboBox_website_all.setCurrentIndex(6) + self.Ui.comboBox_website_all.setCurrentIndex(5) elif config['common']['website'] == 'xcity': - self.Ui.comboBox_website_all.setCurrentIndex(7) + self.Ui.comboBox_website_all.setCurrentIndex(6) elif config['common']['website'] == 'dmm': - self.Ui.comboBox_website_all.setCurrentIndex(8) + self.Ui.comboBox_website_all.setCurrentIndex(7) self.Ui.lineEdit_success.setText(config['common']['success_output_folder']) self.Ui.lineEdit_fail.setText(config['common']['failed_output_folder']) # ========================================================================proxy @@ -514,8 +512,6 @@ def save_config_clicked(self): website = 'all' elif self.Ui.comboBox_website_all.currentText() == 'mgstage': # mgstage website = 'mgstage' - elif self.Ui.comboBox_website_all.currentText() == 'fc2club': # fc2club - website = 'fc2club' elif self.Ui.comboBox_website_all.currentText() == 'javbus': # javbus website = 'javbus' elif self.Ui.comboBox_website_all.currentText() == 'jav321': # jav321 @@ -1233,8 +1229,14 @@ def PrintFiles(self, path, name_file, cn_sub, leak, json_data, filepath, failed_ # ========================================================================thumb复制为fanart def copyRenameJpgToFanart(self, path, naming_rule): - if not os.path.exists(path + '/' + naming_rule + '-fanart.jpg'): - shutil.copy(path + '/' + naming_rule + '-thumb.jpg', path + '/' + naming_rule + '-fanart.jpg') + try: + if not os.path.exists(path + '/' + naming_rule + '-fanart.jpg'): + shutil.copy(path + '/' + naming_rule + '-thumb.jpg', path + '/' + naming_rule + '-fanart.jpg') + self.add_text_main('[+]Fanart Copied! ' + naming_rule + '-fanart.jpg') + else: + self.add_text_main('[+]Fanart Existed! ' + naming_rule + '-fanart.jpg') + except Exception as error_info: + self.add_text_main('[-]Error in copyRenameJpgToFanart: ' + str(error_info)) # ========================================================================移动视频、字幕 def pasteFileToFolder(self, filepath, path, naming_rule, failed_folder): @@ -1417,7 +1419,7 @@ def creatFolder(self, success_folder, json_data, config): # ========================================================================从指定网站获取json_data def get_json_data(self, mode, number, config, appoint_url): - if mode == 6: # javdb模式 + if mode == 5: # javdb模式 self.add_text_main('[!]Please Wait Three Seconds!') time.sleep(3) json_data = getDataFromJSON(number, config, mode, appoint_url) diff --git a/Function/Function.py b/Function/Function.py index ceb3b84b4..f70eba8d8 100644 --- a/Function/Function.py +++ b/Function/Function.py @@ -5,7 +5,7 @@ import json from PIL import Image from configparser import ConfigParser -from Getter import avsox, javbus, javdb, fc2fans_club, mgstage, dmm, jav321, xcity +from Getter import avsox, javbus, javdb, mgstage, dmm, jav321, xcity # ========================================================================获取config @@ -162,9 +162,7 @@ def getDataFromJSON(file_number, config, mode, appoint_url): # 从JSON返回元 json_data = json.loads(javbus.main(file_number, appoint_url)) # =======================================================================FC2-111111 elif 'FC2' in file_number.upper(): - json_data = json.loads(fc2fans_club.main(re.search('\d{4,}', file_number).group(), appoint_url)) - if getDataState(json_data) == 0: - json_data = json.loads(javdb.main(file_number, appoint_url)) + json_data = json.loads(javdb.main(file_number, appoint_url)) # =======================================================================ssni00321 elif re.match('\D{2,}00\d{3,}', file_number) and '-' not in file_number and '_' not in file_number: json_data = json.loads(dmm.main(file_number, appoint_url)) @@ -192,27 +190,25 @@ def getDataFromJSON(file_number, config, mode, appoint_url): # 从JSON返回元 } elif mode == 2: # 仅从mgstage json_data = json.loads(mgstage.main(file_number, appoint_url)) - elif mode == 3: # 仅从fc2club - json_data = json.loads(fc2fans_club.main(file_number, appoint_url)) - elif mode == 4: # 仅从javbus + elif mode == 3: # 仅从javbus if isuncensored: json_data = json.loads(javbus.main_uncensored(file_number, appoint_url)) elif re.search('\D+\.\d{2}\.\d{2}\.\d{2}', file_number): json_data = json.loads(javbus.main_us(file_number, appoint_url)) else: json_data = json.loads(javbus.main(file_number, appoint_url)) - elif mode == 5: # 仅从jav321 + elif mode == 4: # 仅从jav321 json_data = json.loads(jav321.main(file_number, isuncensored, appoint_url)) - elif mode == 6: # 仅从javdb + elif mode == 5: # 仅从javdb if re.search('\D+\.\d{2}\.\d{2}\.\d{2}', file_number): json_data = json.loads(javdb.main_us(file_number, appoint_url)) else: - json_data = json.loads(javdb.main(file_number, isuncensored, appoint_url)) - elif mode == 7: # 仅从avsox + json_data = json.loads(javdb.main(file_number, appoint_url, isuncensored)) + elif mode == 6: # 仅从avsox json_data = json.loads(avsox.main(file_number, appoint_url)) - elif mode == 8: # 仅从xcity + elif mode == 7: # 仅从xcity json_data = json.loads(xcity.main(file_number, appoint_url)) - elif mode == 9: # 仅从dmm + elif mode == 8: # 仅从dmm json_data = json.loads(dmm.main(file_number, appoint_url)) # ================================================网站规则添加结束================================================ diff --git a/Function/getHtml.py b/Function/getHtml.py index 0548efe03..3441a0a52 100644 --- a/Function/getHtml.py +++ b/Function/getHtml.py @@ -1,6 +1,7 @@ import requests import os from configparser import ConfigParser +import cloudscraper # ========================================================================获取config @@ -32,6 +33,16 @@ def get_proxies(proxy_type, proxy): # ========================================================================网页请求 +# 破解cf5秒盾 +def get_html_javdb(url): + scraper = cloudscraper.create_scraper() + # 发送请求,获得响应 + response = scraper.get(url) + # 获得网页源代码 + html = response.text + return html + + def get_html(url, cookies=None): proxy_type = '' retry_count = 0 diff --git a/Getter/avsox.py b/Getter/avsox.py index 62187175f..a3f2ba855 100644 --- a/Getter/avsox.py +++ b/Getter/avsox.py @@ -96,7 +96,7 @@ def getTag(a): # 获取演员 def getUrl(number): - response = get_html('https://avsox.host/cn/search/' + number) + response = get_html('https://avsox.website/cn/search/' + number) html = etree.fromstring(response, etree.HTMLParser()) # //table/tr[1]/td[1]/text() url_list = html.xpath('//*[@id="waterfall"]/div/a/@href') if len(url_list) > 0: @@ -105,11 +105,12 @@ def getUrl(number): '//*[@id="waterfall"]/div[' + str(i) + ']/a/div[@class="photo-info"]/span/date[1]/text()')).strip( " ['']") if number.upper() == number_get.upper(): - return i, response, str(html.xpath('//*[@id="waterfall"]/div[' + str(i) + ']/a/@href')).strip(" ['']") + page_url = 'https:' + url_list[i-1] + return i, response, page_url return 0, response, '' -def main(number, appoint_url): +def main(number, appoint_url=''): try: count, response, url = getUrl(number) if str(response) == 'ProxyError': @@ -122,7 +123,6 @@ def main(number, appoint_url): soup = BeautifulSoup(web, 'lxml') info = str(soup.find(attrs={'class': 'row movie'})) number = getNum(web) - print(1) dic = { 'actor': getActor(web), 'title': getTitle(web).strip(number).strip().replace(' ', '-'), @@ -143,7 +143,7 @@ def main(number, appoint_url): 'outline': '', 'score': '', 'website': url, - 'source': 'avsox.py', + 'source': 'avsox.website', } except TimeoutError: dic = { @@ -162,4 +162,4 @@ def main(number, appoint_url): # print(main('051119-917')) # print(main('032620_001')) -# print(main('032620_001', 'https://avsox.host/cn/movie/cb8d28437cff4e90')) \ No newline at end of file +# print(main('032620_001', 'https://avsox.website/cn/movie/cb8d28437cff4e90')) \ No newline at end of file diff --git a/Getter/dmm.py b/Getter/dmm.py index 87929b56e..ba72df6d7 100644 --- a/Getter/dmm.py +++ b/Getter/dmm.py @@ -4,173 +4,290 @@ from lxml import etree import json from Function.getHtml import get_html +from urllib.parse import urlencode -def getTitle(a): - html = etree.fromstring(a, etree.HTMLParser()) - result = html.xpath('//*[@id="title"]/text()')[0] +def getTitle(text): + html = etree.fromstring(text, etree.HTMLParser()) + result = html.xpath('//*[starts-with(@id, "title")]/text()')[0] return result -def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - html = etree.fromstring(a, etree.HTMLParser()) - result = str(html.xpath("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()")).strip(" ['']").replace( - "', '", ',') +def getActor(text): + # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(text, etree.HTMLParser()) + result = ( + str( + html.xpath( + "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" + ) + ) + .strip(" ['']") + .replace("', '", ",") + ) return result -def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img - actor = actor.split(',') - d = {} - for i in actor: - if ',' not in i: - p = {i: ''} - d.update(p) - return d - - -def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getStudio(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: - result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/a/text()")[0] + result = html.xpath( + "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()" + )[0] except: - result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/text()")[0] - return result1 - - -def getPublisher(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result1 = html.xpath("//td[contains(text(),'レーベル')]/following-sibling::td/a/text()")[0] - except: - result1 = html.xpath("//td[contains(text(),'レーベル')]/following-sibling::td/text()")[0] - return result1 + result = html.xpath( + "//td[contains(text(),'メーカー')]/following-sibling::td/text()" + )[0] + return result -def getRuntime(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] - return re.search('\d+', str(result1)).group() +def getRuntime(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] + return re.search(r"\d+", str(result)).group() -def getSeries(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getLabel(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: - result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()")[0] + result = html.xpath( + "//td[contains(text(),'レーベル:')]/following-sibling::td/a/text()" + )[0] except: - result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/text()")[0] - return result1 + result = html.xpath( + "//td[contains(text(),'レーベル:')]/following-sibling::td/text()" + )[0] + return result -def getNum(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getNum(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: - result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/a/text()")[0] + result = html.xpath( + "//td[contains(text(),'品番:')]/following-sibling::td/a/text()" + )[0] except: - result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/text()")[0] - return result1 + result = html.xpath( + "//td[contains(text(),'品番:')]/following-sibling::td/text()" + )[0] + return result def getYear(getRelease): try: - result = str(re.search('\d{4}', getRelease).group()) + result = str(re.search(r"\d{4}", getRelease).group()) return result except: return getRelease -def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getRelease(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" + )[0].lstrip("\n") + except: + try: + result = html.xpath( + "//td[contains(text(),'発売日:')]/following-sibling::td/text()" + )[0].lstrip("\n") + except: + result = "----" + if result == "----": + try: + result = html.xpath( + "//td[contains(text(),'配信開始日:')]/following-sibling::td/a/text()" + )[0].lstrip("\n") + except: + try: + result = html.xpath( + "//td[contains(text(),'配信開始日:')]/following-sibling::td/text()" + )[0].lstrip("\n") + except: + pass + return result.replace("/", "-") + + +def getTag(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: - result1 = html.xpath("//td[contains(text(),'発売日:')]/following-sibling::td/a/text()")[0].lstrip('\n') + result = html.xpath( + "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" + ) except: - result1 = html.xpath("//td[contains(text(),'発売日:')]/following-sibling::td/text()")[0].lstrip('\n') - return result1 + result = html.xpath( + "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" + ) + return result -def getTag(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getCover(text, number): + html = etree.fromstring(text, etree.HTMLParser()) + cover_number = number try: - result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")).strip(" ['']") + result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] except: - result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")).strip(" ['']") - return result1.replace("', '", ",") + # sometimes fanza modify _ to \u0005f for image id + if "_" in cover_number: + cover_number = cover_number.replace("_", r"\u005f") + try: + result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] + except: + # (TODO) handle more edge case + # print(html) + # raise exception here, same behavior as before + # people's major requirement is fetching the picture + raise ValueError("can not find image") + return result -def getCover(htmlcode, number): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = html.xpath('//*[@id="' + number + '"]/@href')[0] +def getDirector(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'監督:')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'監督:')]/following-sibling::td/text()" + )[0] return result -def getExtraFanart(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - old_list = html.xpath("//div[@id='sample-image-block']/a[@name='sample-image']/img/@src") - new_list = [] - for extrafanart in old_list: - new_list.append(extrafanart.replace('-', 'jp-')) - return new_list +def getOutline(text): + html = etree.fromstring(text, etree.HTMLParser()) + try: + result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace( + "\n", "" + ) + if result == "": + result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace( + "\n", "" + ) + except: + # (TODO) handle more edge case + # print(html) + return "" + return result -def getDirector(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +def getSeries(text): try: - result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/a/text()")[0] + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()" + )[0] + return result except: - result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/text()")[0] - return result1 + return "" + + +def getExtrafanart(htmlcode): # 获取剧照 + html_pather = re.compile(r'
') + html = html_pather.search(htmlcode) + if html: + html = html.group() + extrafanart_pather = re.compile(r':(\d+)分', htmlcode): - score = str(re.findall(r'影片评分:(\d+)分', htmlcode)).strip(" ['']") - score = float(int(score) / 100.0 * 5) - return str(score) - - -def getExtraFanart(htmlcode): # 获取剧照 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - list_tmp = html.xpath("//div[@id='slider']/ul[@class='slides']/li/img[@class='responsive']/@src") - extrafanart_list = [] - for extrafanart in list_tmp: - extrafanart = 'https://fc2club.com' + extrafanart - extrafanart_list.append(extrafanart) - return extrafanart_list - - -def getTag(htmlcode): # 获取番号 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) - return result.strip(" ['']").replace("'", '').replace(' ', '') - - -def getYear(release): - try: - result = re.search('\d{4}', release).group() - return result - except: - return '' - - -def main(number, appoint_url): - try: - url = 'https://fc2club.com//html/FC2-' + number + '.html' - if appoint_url: - url = appoint_url - htmlcode = get_html(url) - if str(htmlcode) == 'ProxyError': - raise TimeoutError - actor = getActor(htmlcode) - if len(actor) == 0: - actor = 'FC2系列' - dic = { - 'title': getTitle(htmlcode).strip(' '), - 'studio': getStudio(htmlcode), - 'score': getScore(htmlcode), - 'runtime': getYear(getRelease(htmlcode)), - 'actor': actor.replace('/', ','), - 'release': getRelease(number), - 'number': 'FC2-' + number, - 'tag': getTag(htmlcode), - 'actor_photo': getActorPhoto(actor), - 'cover': getCover(htmlcode), - 'extrafanart': getExtraFanart(htmlcode), - 'imagecut': 0, - 'director': '', - 'series': '', - 'publisher': '', - 'year': '', - 'outline': '', - 'website': 'https://fc2club.com//html/FC2-' + number + '.html', - 'source': 'fc2fans_club.py', - } - except TimeoutError: - dic = { - 'title': '', - 'website': 'timeout', - } - except Exception as error_info: - print('Error in fc2fans_club.main : ' + str(error_info)) - dic = { - 'title': '', - 'website': '', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) - return js - -# print(main('1251689', '')) -# print(main('674239', "")) -# print(main('674239', "https://fc2club.com//html/FC2-674239.html")) diff --git a/Getter/jav321.py b/Getter/jav321.py index 84e1b22b5..05c8e7255 100644 --- a/Getter/jav321.py +++ b/Getter/jav321.py @@ -23,7 +23,7 @@ def getActor(response): return str(re.findall(r'(\S+)  ', response)).strip(" [',']").replace('\'', '') else: - return str(re.findall(r'女优: ([^<]+)  
', response)).strip(" [',']").replace('\'', '') + return str(re.findall(r'出演者: ([^<]+)  
', response)).strip(" [',']").replace('\'', '') def getStudio(response): @@ -31,7 +31,7 @@ def getStudio(response): def getRuntime(response): - return str(re.findall(r'播放时长: (\d+) \S+
', response)).strip(" ['']") + return str(re.findall(r'収録時間: (\d+) \S+
', response)).strip(" ['']") def getSeries(response): @@ -43,15 +43,15 @@ def getWebsite(detail_page): def getNum(response): - return str(re.findall(r'番号: (\S+)
', response)).strip(" ['']").upper() + return str(re.findall(r'品番: (\S+)
', response)).strip(" ['']").upper() def getScore(response): - if re.search(r'评分: ', response): - score = re.findall(r'评分: ', response)[0] + if re.search(r'平均評価: ', response): + score = re.findall(r'平均評価: ', response)[0] return str(float(score) / 10.0) else: - return str(re.findall(r'评分: ([^<]+)
', response)).strip(" [',']").replace('\'', '') + return str(re.findall(r'平均評価: ([^<]+)
', response)).strip(" [',']").replace('\'', '') def getYear(release): @@ -63,7 +63,7 @@ def getYear(release): def getRelease(response): - return str(re.findall(r'发行日期: (\d+-\d+-\d+)
', response)).strip(" ['']").replace('0000-00-00', '') + return str(re.findall(r'配信開始日: (\d+-\d+-\d+)
', response)).strip(" ['']").replace('0000-00-00', '') def getCover(detail_page): @@ -76,7 +76,8 @@ def getCover(detail_page): def getExtraFanart(htmlcode): - extrafanart_list = htmlcode.xpath("/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 col-md-12']/p/a/img[@class='img-responsive']/@src") + extrafanart_list = htmlcode.xpath( + "/html/body/div[@class='row'][2]/div[@class='col-md-3']/div[@class='col-xs-12 col-md-12']/p/a/img[@class='img-responsive']/@src") return extrafanart_list @@ -85,7 +86,7 @@ def getCoverSmall(detail_page): "@class='img-responsive']/@src")).strip(" ['']") -def getTag(response): # 获取演员 +def getTag(response): # 获取标签 return re.findall(r'(\S+)', response) @@ -161,4 +162,5 @@ def main(number, appoint_url, isuncensored=False): print(main('ABP-905')) ''' # print(main('heyzo-1031', '')) +# print(main('ssni-645', '')) # print(main('ymdd-173', 'https://www.jav321.com/video/ymdd00173')) diff --git a/Getter/javbus.py b/Getter/javbus.py index 9caa50767..27e66294d 100644 --- a/Getter/javbus.py +++ b/Getter/javbus.py @@ -15,7 +15,7 @@ def getActorPhoto(htmlcode): l = i.a['href'] t = i.get_text() html = etree.fromstring(get_html(l), etree.HTMLParser()) - p = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") + p = 'https://javbus.com' + str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") p2 = {t: p} d.update(p2) return d @@ -54,7 +54,7 @@ def getYear(getRelease): # 获取年份 def getCover(htmlcode): # 获取封面链接 doc = pq(htmlcode) image = doc('a.bigImage') - return image.attr('href') + return 'https://javbus.com' + image.attr('href') def getExtraFanart(htmlcode): # 获取封面链接 @@ -103,11 +103,11 @@ def getOutlineScore(number): # 获取简介 response = post_html("https://www.jav321.com/search", query={"sn": number}) detail_page = etree.fromstring(response, etree.HTMLParser()) outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']") - if re.search(r'评分: ', response): - score = re.findall(r'评分: ', response)[0] + if re.search(r'平均評価: ', response): + score = re.findall(r'平均評価: ', response)[0] score = str(float(score) / 10.0) else: - score = str(re.findall(r'评分: ([^<]+)
', response)).strip(" [',']").replace('\'', '') + score = str(re.findall(r'平均評価: ([^<]+)
', response)).strip(" [',']").replace('\'', '') if outline == '': dmm_htmlcode = get_html( "https://www.dmm.co.jp/search/=/searchstr=" + number.replace('-', '') + "/sort=ranking/") @@ -118,7 +118,9 @@ def getOutlineScore(number): # 获取简介 if url_detail != '': dmm_detail = get_html(url_detail) html = etree.fromstring(dmm_detail, etree.HTMLParser()) - outline = str(html.xpath('//*[@class="mg-t0 mg-b20"]/text()')).strip(" ['']").replace('\\n', '').replace('\n', '') + outline = str(html.xpath('//*[@class="mg-t0 mg-b20"]/text()')).strip(" ['']").replace('\\n', + '').replace( + '\n', '') except Exception as error_info: print('Error in javbus.getOutlineScore : ' + str(error_info)) return outline, score @@ -132,15 +134,17 @@ def getSeries(htmlcode): def getCover_small(number): # 从avsox获取封面图 try: - htmlcode = get_html('https://avsox.host/cn/search/' + number) + htmlcode = get_html('https://avsox.website/cn/search/' + number) html = etree.fromstring(htmlcode, etree.HTMLParser()) counts = len(html.xpath("//div[@id='waterfall']/div/a/div")) if counts == 0: return '' for count in range(1, counts + 1): # 遍历搜索结果,找到需要的番号 - number_get = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()") + number_get = html.xpath( + "//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()") if len(number_get) > 0 and number_get[0].upper() == number.upper(): - cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0] + cover_small = \ + html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0] return cover_small except Exception as error_info: print('Error in javbus.getCover_small : ' + str(error_info)) @@ -166,12 +170,15 @@ def find_number(number): counts = len(html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div")) if counts != 0: for count in range(1, counts + 1): # 遍历搜索结果,找到需要的番号 - number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0] + number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str( + count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0] number_get = number_get.upper() number = number.upper() - if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', ''): + if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', + ''): result_url = html.xpath( - "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0] + "//div[@id='waterfall']/div[@id='waterfall']/div[" + str( + count) + "]/a[@class='movie-box']/@href")[0] return result_url # =======================================================================无码搜索 htmlcode = get_html('https://www.javbus.com/uncensored/search/' + number + '&type=1') @@ -180,7 +187,8 @@ def find_number(number): if counts == 0: return 'not found' for count in range(1, counts + 1): # 遍历搜索结果,找到需要的番号 - number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0] + number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str( + count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0] number_get = number_get.upper() number = number.upper() if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', ''): @@ -299,9 +307,8 @@ def main_uncensored(number, appoint_url): return js -def main_us(number, appoint_url): +def main_us(number, appoint_url=''): try: - result_url = '' if appoint_url: result_url = appoint_url else: diff --git a/Getter/javdb.py b/Getter/javdb.py index da5fe98bb..1eb720c65 100644 --- a/Getter/javdb.py +++ b/Getter/javdb.py @@ -2,7 +2,7 @@ from bs4 import BeautifulSoup, SoupStrainer from lxml import etree import json -from Function.getHtml import get_html +from Function.getHtml import get_html_javdb from Function.getHtml import post_html @@ -17,8 +17,10 @@ def getTitle(htmlcode): def getActor(htmlcode): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = html.xpath('//strong[contains(text(),"演員")]/../span/text()') - result2 = html.xpath('//strong[contains(text(),"演員")]/../span/a/text()') + result1 = html.xpath( + '//strong[text()="演員:"]/../span/strong[@class="symbol female"][last()]/preceding-sibling::a/text()') + result2 = html.xpath( + '//strong[text()="Actor(s):"]/../span/strong[@class="symbol female"][last()]/preceding-sibling::a/text()') return result1 + result2 @@ -33,78 +35,80 @@ def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img def getStudio(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"片商:")]/../span/a/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Maker:")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getPublisher(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"發行")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"發行")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"發行:")]/../span/a/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Publisher:")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getRuntime(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').rstrip('mi') + result2 = str(html.xpath('//strong[contains(text(),"Duration:")]/../span/text()')).strip(" ['']") + return str(result1 + result2).strip('+').rstrip('mi').rstrip(' minute(s)') def getSeries(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"系列:")]/../span/a/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Series:")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') def getNumber(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip( + result1 = str(html.xpath('//strong[contains(text(),"番號:")]/../span/a/text()')).strip( " ['']").replace('_', '-') - result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip( + result2 = str(html.xpath('//strong[contains(text(),"ID:")]/../span/a/text()')).strip( " ['']").replace('_', '-') return str(result2 + result1).strip('+') -def getYear(getRelease): +def getYear(release): try: - result = str(re.search('\d{4}', getRelease).group()) + result = str(re.search('\d{4}', release).group()) return result except: - return getRelease + return release def getRelease(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"日期:")]/../span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Released Date:")]/../span/text()')).strip(" ['']") return str(result1 + result2).strip('+') def getTag(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"類別")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"類別:")]/../span/a/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Tags:")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip( ',') -def getCover_small(htmlcode, count): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@data-src")[count] - if 'thumbs' not in result: - result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@src")[count] - if not 'https' in result: - result = 'https:' + result - return result +# def getCover_small(htmlcode, count): +# html = etree.fromstring(htmlcode, etree.HTMLParser()) +# result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@data-src")[count] +# if 'thumbs' not in result: +# result = html.xpath("//div[@class='grid-item column']/a[@class='box']/div/img/@src")[count] +# if not 'https' in result: +# result = 'https:' + result +# return result def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath("//img[@class='video-cover']/@src")).strip(" ['']") + result = html.xpath("//img[@class='video-cover']/@src") + if len(result) >= 1: + result = result[0] # 有时xpath找不到元素,所以要用bs4 if not result: soup = BeautifulSoup(htmlcode, 'lxml', parse_only=SoupStrainer('img', {'class': 'video-cover'})) @@ -116,13 +120,15 @@ def getCover(htmlcode): def getExtraFanart(htmlcode): # 获取封面链接 html = etree.fromstring(htmlcode, etree.HTMLParser()) extrafanart_list = html.xpath("//div[@class='message-body']/div[@class='tile-images preview-images']/a/@href") + if '#preview-video' in extrafanart_list: + extrafanart_list.remove('#preview-video') return extrafanart_list def getDirector(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']") + result1 = str(html.xpath('//strong[contains(text(),"導演:")]/../span/a/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"Director:")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') @@ -142,11 +148,11 @@ def getOutlineScore(number): # 获取简介 response = post_html("https://www.jav321.com/search", query={"sn": number}) detail_page = etree.fromstring(response, etree.HTMLParser()) outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']") - if re.search(r'评分: ', response): - score = re.findall(r'评分: ', response)[0] + if re.search(r'平均評価: ', response): + score = re.findall(r'平均評価: ', response)[0] score = str(float(score) / 10.0) else: - score = str(re.findall(r'评分: ([^<]+)
', response)).strip(" [',']").replace('\'', '') + score = str(re.findall(r'平均評価: ([^<]+)
', response)).strip(" [',']").replace('\'', '') except Exception as error_info: print('Error in javdb.getOutlineScore : ' + str(error_info)) return outline, score @@ -154,10 +160,9 @@ def getOutlineScore(number): # 获取简介 def main(number, appoint_url, isuncensored=False): try: - result_url = '' if appoint_url == '': # ========================================================================搜索番号 - htmlcode = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') + htmlcode = get_html_javdb('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') if str(htmlcode) == 'ProxyError': raise TimeoutError html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() @@ -167,7 +172,6 @@ def main(number, appoint_url, isuncensored=False): raise Exception('Movie Data not found in javdb.main!') # ========================================================================遍历搜索结果,找到需要的番号所在URL count = 1 - number_get = '' movie_found = 0 for count in range(1, counts + 1): number_get = html.xpath( @@ -182,7 +186,7 @@ def main(number, appoint_url, isuncensored=False): else: result_url = appoint_url # ========================================================================请求、判断结果 - html_info = get_html(result_url).replace(u'\xa0', u' ') + html_info = get_html_javdb(result_url + '?locale=zh').replace(u'\xa0', u' ') if str(html_info) == 'ProxyError': raise TimeoutError # ========================================================================获取评分、简介 @@ -239,14 +243,13 @@ def main(number, appoint_url, isuncensored=False): return js -def main_us(number, appoint_url): +def main_us(number, appoint_url=''): try: - result_url = '' if appoint_url: result_url = appoint_url else: # ========================================================================搜索番号 - htmlcode = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') + htmlcode = get_html_javdb('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') if str(htmlcode) == 'ProxyError': raise TimeoutError html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() @@ -264,21 +267,35 @@ def main_us(number, appoint_url): series_get = html.xpath( '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str( count) + ']/a[@class=\'box\']/div[@class=\'uid2\']/text()')[0] + if not series_get: + series_get = html.xpath( + '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column horz-cover\'][' + str( + count) + ']/a[@class=\'box\']/div[@class=\'uid2\']/text()')[0] date_get = html.xpath( '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str( count) + ']/a[@class=\'box\']/div[@class=\'meta\']/text()')[0] + if not date_get: + date_get = html.xpath( + '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column horz-cover\'][' + str( + count) + ']/a[@class=\'box\']/div[@class=\'meta\']/text()')[0] if re.search('\d{4}-\d{1,2}-\d{1,2}', date_get): date_get = re.findall('\d{4}-\d{1,2}-\d{1,2}', date_get)[0] + elif re.search('\d{1,2}/\d{1,2}/\d{4}', date_get): + date_get_tmp = date_get + date_get = re.findall('\d{1,2}/\d{1,2}', date_get_tmp)[0] + year_get = re.findall('\d{4}', date_get_tmp)[0] + date_get = year_get + '-' + date_get series_get = series_get.replace(' ', '') - if (series_get.upper() == number_series.upper() or series_get.replace('-', '').upper() == number_series.upper()) \ - and number_date == date_get: + date_get = date_get.replace('/', '-') + if (series_get.upper() == number_series.upper() + or series_get.replace('-', '').upper() == number_series.upper()) and number_date == date_get: movie_found = 1 break if movie_found == 0: raise Exception('Movie Data not found in javdb.main_us!') result_url = 'https://javdb.com' + html.xpath('//*[@id="videos"]/div/div/a/@href')[count - 1] # ========================================================================请求、判断结果 - html_info = get_html(result_url).replace(u'\xa0', u' ') + html_info = get_html_javdb(result_url + '?locale=zh').replace(u'\xa0', u' ') if str(html_info) == 'ProxyError': raise TimeoutError # ========================================================================收集信息 @@ -334,13 +351,15 @@ def main_us(number, appoint_url): print(main('S2M-055')) print(main('LUXU-1217')) ''' +# print(main('IPX-604', '')) +# print(main('SSIS-084', '')) # print(main('abs-141', '')) # print(main('HYSD-00083', '')) # print(main('IESP-660', '')) # print(main('n1403', '')) # print(main('GANA-1910', '')) # print(main('heyzo-1031', '')) -# print(main_us('x-art.19.11.03')) +# print(main_us('x-art.19.11.03', '')) # print(main('032020-001', '')) # print(main('S2M-055', '')) # print(main('LUXU-1217', '')) diff --git a/Getter/mgstage.py b/Getter/mgstage.py index d64a54df9..698d65dc5 100644 --- a/Getter/mgstage.py +++ b/Getter/mgstage.py @@ -23,7 +23,7 @@ def getActor(htmlcode): def getActorPhoto(actor): d = {} for i in actor: - if ',' not in i or ')' in i: + if i and ',' not in i or ')' in i: p = {i: ''} d.update(p) return d diff --git a/Ui/AVDC.py b/Ui/AVDC.py index a3999b19c..1a6ca652e 100644 --- a/Ui/AVDC.py +++ b/Ui/AVDC.py @@ -16,7 +16,7 @@ def setupUi(self, AVDV): AVDV.setObjectName("AVDV") AVDV.resize(1025, 720) icon = QtGui.QIcon() - icon.addPixmap(QtGui.QPixmap("Img/ico.png"), QtGui.QIcon.Normal, QtGui.QIcon.Off) + icon.addPixmap(QtGui.QPixmap("../Img/ico.png"), QtGui.QIcon.Normal, QtGui.QIcon.Off) AVDV.setWindowIcon(icon) self.centralwidget = QtWidgets.QWidget(AVDV) self.centralwidget.setObjectName("centralwidget") @@ -213,7 +213,6 @@ def setupUi(self, AVDV): self.comboBox_website.addItem("") self.comboBox_website.addItem("") self.comboBox_website.addItem("") - self.comboBox_website.addItem("") self.label_2 = QtWidgets.QLabel(self.groupBox_7) self.label_2.setGeometry(QtCore.QRect(230, 90, 72, 21)) self.label_2.setObjectName("label_2") @@ -852,13 +851,12 @@ def retranslateUi(self, AVDV): self.pushButton_select_file.setText(_translate("AVDV", "选择文件")) self.comboBox_website.setItemText(0, _translate("AVDV", "All websites")) self.comboBox_website.setItemText(1, _translate("AVDV", "mgstage")) - self.comboBox_website.setItemText(2, _translate("AVDV", "fc2club")) - self.comboBox_website.setItemText(3, _translate("AVDV", "javbus")) - self.comboBox_website.setItemText(4, _translate("AVDV", "jav321")) - self.comboBox_website.setItemText(5, _translate("AVDV", "javdb")) - self.comboBox_website.setItemText(6, _translate("AVDV", "avsox")) - self.comboBox_website.setItemText(7, _translate("AVDV", "xcity")) - self.comboBox_website.setItemText(8, _translate("AVDV", "dmm")) + self.comboBox_website.setItemText(2, _translate("AVDV", "javbus")) + self.comboBox_website.setItemText(3, _translate("AVDV", "jav321")) + self.comboBox_website.setItemText(4, _translate("AVDV", "javdb")) + self.comboBox_website.setItemText(5, _translate("AVDV", "avsox")) + self.comboBox_website.setItemText(6, _translate("AVDV", "xcity")) + self.comboBox_website.setItemText(7, _translate("AVDV", "dmm")) self.label_2.setText(_translate("AVDV", "刮削网站:")) self.label_10.setText(_translate("AVDV", "刮削网址:")) self.label_12.setText(_translate("AVDV", "影片番号:")) diff --git a/Ui/AVDC.ui b/Ui/AVDC.ui index 2cf394c5a..1ff1d2cdd 100644 --- a/Ui/AVDC.ui +++ b/Ui/AVDC.ui @@ -656,11 +656,6 @@ p, li { white-space: pre-wrap; } mgstage - - - fc2club - - javbus diff --git a/update_check.json b/update_check.json index 793dda56d..0c6326d17 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "3.963", - "version_show":"3.963", + "version": "3.964", + "version_show":"3.964", "download": "https://github.com/moyy996/AVDC/releases" }