-
Notifications
You must be signed in to change notification settings - Fork 1
/
abp_list_check.py
executable file
·97 lines (82 loc) · 2.87 KB
/
abp_list_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/python3
# (c) Alex Stanev <[email protected]>, https://stanev.org/abp
# The source code is distributed under GPLv3 license
import sys
import os.path
import urllib.parse
import socket
from urllib.request import urlopen
from urllib.error import HTTPError, URLError
print('Check AdBlockPlus filters for outdated entries\nver 0.3 (c) Alex Stanev, https://stanev.org/abp\n')
if len(sys.argv) != 2:
print('Usage: %s [abp_list.txt]' % sys.argv[0])
sys.exit(0)
if not os.path.exists(sys.argv[1]):
print('Could not find the list')
sys.exit(1)
socket.setdefaulttimeout(2)
no_res = 0
no_conn = 0
no_host = 0
skip = 0
short = 0
curr = 0
abplist = open(sys.argv[1])
for line in abplist:
curr += 1
rline = line.strip()
#remove #, $, ~, ^
#check for comment or empty or section [
for sym in ('#', '$', '~', '^', '[', '!'):
if rline.find(sym) != -1:
rline = rline[:rline.find(sym)]
if rline == '':
skip += 1
continue
#check for short entries
if len(rline) < 3:
short += 1
print('%i: Too short : %s' % (curr, line), end='')
continue
#remove single or double starting pipe
while rline[0] == '|' or rline[0] == '@':
rline = rline[1:]
#check for protocol idents
if rline.startswith(('http://', 'https://')):
print('%i: Consider removing protocol identificator : %s' % (curr, line), end='')
else:
rline = 'http://' + rline
url = urllib.parse.urlparse(rline)
#check for wildcards in host
if url[1] == '' or url[1].find('*') != -1:
no_host += 1
#print('%i: Wildcard or missing host : %s' % (curr, line), end='')
continue
#remove wildcards in path if present
path = url[2]
if len(path) > 1:
while path.endswith('.'):
path = path[:path.rfind('/')]
while path.rfind('*') != -1:
path = path[:path.rfind('*')]
path = path[:path.rfind('/')]
#access the resource
for domain in url[1].split(','):
try:
urlopen(url[0]+'://' + domain + path)
except HTTPError as e:
if e.code in (404, 410):
no_res += 1
print('%i: %i Resource not found : %s' % (curr, e.code, line), end='')
if e.code >= 500:
print('%i: %i Server error : %s' % (curr, e.code, line), end='')
except URLError as e:
no_conn += 1
print('%i: %s : %s : %s' % (curr, e.reason, line.strip(), url[0] + '://' + domain + path.strip()))
except Exception:
None
except KeyboardInterrupt as ex:
print('Keyboard interrupt')
break
abplist.close()
print('\nChecked lines:%i\nNot found:%i\nConnection error:%i\nIndeterminable:%i\nToo short:%i\nSkipped:%i' % (curr, no_res, no_conn, no_host, short, skip))