-
Notifications
You must be signed in to change notification settings - Fork 0
/
MetaScore.py
127 lines (113 loc) · 4.49 KB
/
MetaScore.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import requests
import re
from urllib.parse import quote_plus, quote
import asyncio
import aiohttp
class MetaError(Exception):
def __init__(self, code):
self.code = code
def image_getter(req_text):
image = (list(re.findall(r'(?<=<div class="result_thumbnail">\n)[ ]+<img src="[-\d\w:./]+', req_text)))[0]
image = image.lstrip()
image = image.split('"')[-1]
# print(image)
return image
def metacritic_parse(req_text):
trashtitles = list(re.findall(r'class="product_title basic_stat"\S\n\s[ ]+[-/\w< ="]+>\n[ ]+[-\w: ]+(?=\n)', req_text))
titles = list()
for title in trashtitles:
q = title.split("\n")
q = q[-1].lstrip()
q = re.sub(' IV', ' 4', q)
q = re.sub(' III', ' 3', q)
q = re.sub(' II', ' 2', q)
titles.append(q)
description = (list(re.findall(r'(?<=<p class="deck basic_stat">)[-\w :,.]+(?=</p>)', req_text)))[0]
junkratings = list(re.findall(r'(?<=<span class="metascore_w medium game)[\w ]+">[\w \d]+(?=</span>)', req_text))
ratings = list()
for rating in junkratings:
q = rating.split(">")
q = q[-1].lstrip()
ratings.append(q)
ratingsv1 = list()
for item in ratings:
try:
ratingsv1.append(int(item))
except Exception:
ratingsv1.append(item)
res = list(map(list, zip(titles, ratingsv1)))
# print("from parser", res)
res.append(description)
return res
# <div class="metascore_w small game positive">96</div>
# <span class="data textscore textscore_outstanding">9.1</span>
# async def metacritic_top():
async def metacritic_top():
site_texts = list()
headers = {
"authority": "www.metacritic.com",
"user-agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64)"
}
url = "https://www.metacritic.com/browse/games/score/metascore/all/pc/filtered"
for i in range(10):
# if i == 0:
# urll = url
# else:
# urll = url + '?page=' + str(i)
# site_texts.append(requests.get(urll, headers=headers).text)
async with aiohttp.ClientSession() as session:
if i == 0:
urll = url
else:
urll = url + '?page=' + str(i)
async with session.get(urll, headers=headers) as resp:
site_texts.append(await resp.text())
if resp.status != 200:
raise MetaError(resp.status)
titles = list()
msrating = list()
ursrating = list()
for site in site_texts:
msratingsite = list(re.findall(r'(?<=<div class="metascore_w small game positive">)..', site))
userratingsite = list(re.findall(r'(?<=<span class="data textscore textscore_)[\w]+">...', site))
trashtitles = list(
re.findall(r'class="basic_stat product_title"\S\n\s[ ]+[-/\w< ="]+>\n[ ]+[-\w: ]+(?=\n)', site))
titlessite = list()
for title in trashtitles:
q = title.split("\n")
q = q[-1].lstrip()
titlessite.append(q)
titles.extend(titlessite)
titlessite.clear()
msrating.extend(msratingsite)
msratingsite.clear()
ursrating.extend(userratingsite)
userratingsite.clear()
trashtitles.clear()
# ms_usr = [first + " " + second for first, second in zip[msrating, ursrating]] # hz
ursrating = list(map(lambda x: x.split('>')[-1], ursrating))
msrating = list(map(lambda x: int(x), msrating))
top_games = list(map(list, zip(titles, msrating, ursrating)))
def filt(x):
if x[-1] != 'tbd':
if float(x[-1]) > 7.0:
x[-1] = int(float(x[-1])*10)
return x
top_games = list(filter(filt, top_games))
return top_games
async def metacritic_search(question):
headers = {
"authority": "www.metacritic.com",
"user-agent": "Mozilla / 5.0(Windows NT 10.0;Win64;x64)"
}
url = "https://www.metacritic.com/search/game/{}/results?plats[3]=1&search_type=advanced".format(quote(question))
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as resp:
site_text = await resp.text()
if resp.status != 200:
raise MetaError
answer = metacritic_parse(site_text)
answer.append(image_getter(site_text))
return answer
if __name__ =="__main__":
pass