forked from le31ei/Domain-monitor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mongodb_con.py
96 lines (95 loc) · 3.93 KB
/
mongodb_con.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# coding: utf-8
'''
Created on 2018年11月26日
@author: guimaizi
'''
import config,time
from pymongo import MongoClient
class mongodb_con:
def __init__(self):
self.config_main=config.config()
'''
常用mongodb指令:
db.qq_com.find({"url":/.*Cookie*./})
db.qq_com.find({"state":"0"}) .limit(10)
db.qq_com.update({ "state" : {$ne:0}} ,{$set:{"state":0}},false,true)
'''
self.client = MongoClient(self.config_main.callback_mongo_config()['ip'], self.config_main.callback_mongo_config()['port'])
self.db_target_domain = self.client.target_domain
self.db_target_domain.authenticate(self.config_main.callback_mongo_config()['name'], self.config_main.callback_mongo_config()['password'])
def into_target(self,domain,data):
try:
'''
data数据写入
:domain 数据库名
:data 数据
'''
domain=domain.replace('.','_')
collection = self.db_target_domain[domain]
collection.insert(data,manipulate=True)
except Exception as e:
print(e)
def find(self,domain,url):
'''
url 是否存在
:domain 数据库名
:url 数据
'''
domain=domain.replace('.','_')
collection = self.db_target_domain[domain]
return collection.find({"domain": "%s"%url}).count()
def callback_list_url(self,domain,limt):
'''
return 数据库里的state参数为0的url_list
:domain 数据库名
:limt 条数
'''
domain=domain.replace('.','_')
collection = self.db_target_domain[domain]
return collection.find({"state":0}, { "id": 1, "domain": 1 }).limit(limt)
def callback_list_all_url(self,domain,limt):
'''
return 数据库里所有的url_list
:domain 数据库名
:limt 条数
'''
domain=domain.replace('.','_')
collection = self.db_target_domian[domain]
return collection.find({},{ "id": 1, "url": 1 }).limit(limt)
def callback_all_list(self,domain,limt):
'''
return 数据库里所有的数据
:domain 数据库名
:limt 条数
'''
domain=domain.replace('.','_')
collection = self.db_target_domian[domain]
return collection.find({}).limit(limt)
def callback_update(self,Domain,list_url_data):
Domain=Domain.replace('.','_')
collection = self.db_target_domain[Domain]
for data in list_url_data:
try:
len_data=collection.find({"domain":data['domain']}, {"html_size": 1 })[0]['html_size']
#len_data=collection.find({"url":"http://z.qq.com"}, {"html_size": 1 })[0]['html_size']
if len_data/data['html_size']>=1.2 or len_data/data['html_size']<=0.8:
print(data)
collection.update_one({"domain": data['domain']},{"$set": {"state": 1,"html_size":data['html_size'],"title":data['title'],"time":time.strftime('%Y-%m-%d',time.localtime())}})
else:
collection.update_one({"domain": data['domain']},{"$set": {"state": 1}})
except:collection.update_one({"domain": data['domain']},{"$set": {"state": 1}})
def update_date(self,Domain,url):
Domain=Domain.replace('.','_')
collection = self.db_target_domain[Domain]
collection.update_one({"domain": url},{"$set": {"state": 1}})
def update_all_date(self,Domain):
print(Domain)
Domain=Domain.replace('.','_')
collection = self.db_target_domain[Domain]
collection.update({ "state":{"$ne":0}} ,{"$set":{"state":0}},multi=True)
def close(self):
self.client.close()
if '__main__' == __name__:
p=mongodb_con()
for i in p.callback_list_url('.qq.com',10):
print(i)