-
Notifications
You must be signed in to change notification settings - Fork 0
/
getComment.py
58 lines (52 loc) · 2.11 KB
/
getComment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# encoding: UTF-8
import re
import requests
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
def getComment(contentToken): # 参数为每条微博对应的唯一的一个字符串,使用weibo.cn打开一条微博的评论就能看到
cook = {"Cookie": "your cookies"}
url = 'http://weibo.cn/comment/' + contentToken
html = requests.get(url, cookies=cook)
allComment = []
reGetComment = re.compile(ur'<div class="c" id="C.*?<a href=(.*?)</a>.*?<span class="ctt">(.*?)</span>.*?<span class="ct">(\d\d月\d\d日|[\d-]{10}) ([\d:]{4,10}) 来自(.*?)</span></div>')
reGetPage = re.compile(ur'1/(\d+)页')
content = re.findall(reGetComment, html.text)
commentPage = re.findall(reGetPage, html.text)
if len(commentPage) == 0:
NumPage = 1
else:
NumPage = int(commentPage[0])
filename = '微博的评论'
for i in range(len(content)):
oneComment = []
for j in range(5):
print content[i][j]
oneComment.append(content[i][j])
f = open(filename, 'a')
f.write(str(content[i][j]))
f.write('\n')
f.write('--------------------------------\n')
f.close()
print '-------------------------------'
allComment.append(oneComment)
if NumPage >= 2:
for page in range(2, NumPage+1):
nextPageUrl = 'http://weibo.cn/comment/' + contentToken + '?page=' + str(page)
html = requests.get(nextPageUrl, cookies=cook)
content = re.findall(reGetComment, html.text)
for i in range(len(content)):
oneComment = []
for j in range(5):
print content[i][j]
oneComment.append(content[i][j])
f2 = open(filename, 'a')
f2.write(str(content[i][j]))
f2.write('\n')
f2.write('--------------------------------\n')
f2.close()
print '-------------------------------'
allComment.append(oneComment)
# print allComment
print "DONE"
return allComment