-
Notifications
You must be signed in to change notification settings - Fork 0
/
spjson.py
61 lines (52 loc) · 1.75 KB
/
spjson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import sqlite3
conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()
print("Creating JSON output on spider.js...")
howmany = int(input("How many nodes? "))
cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url
FROM Pages JOIN Links ON Pages.id = Links.to_id
WHERE html IS NOT NULL AND ERROR IS NULL
GROUP BY id ORDER BY id,inbound''')
fhand = open('spider.js','w')
nodes = list()
maxrank = None
minrank = None
for row in cur :
nodes.append(row)
rank = row[2]
if maxrank is None or maxrank < rank: maxrank = rank
if minrank is None or minrank > rank : minrank = rank
if len(nodes) > howmany : break
if maxrank == minrank or maxrank is None or minrank is None:
print("Error - please run sprank.py to compute page rank")
quit()
fhand.write('spiderJson = {"nodes":[\n')
count = 0
map = dict()
ranks = dict()
for row in nodes :
if count > 0 : fhand.write(',\n')
# print row
rank = row[2]
rank = 19 * ( (rank - minrank) / (maxrank - minrank) )
fhand.write('{'+'"weight":'+str(row[0])+',"rank":'+str(rank)+',')
fhand.write(' "id":'+str(row[3])+', "url":"'+row[4]+'"}')
map[row[3]] = count
ranks[row[3]] = rank
count = count + 1
fhand.write('],\n')
cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''')
fhand.write('"links":[\n')
count = 0
for row in cur :
# print row
if row[0] not in map or row[1] not in map : continue
if count > 0 : fhand.write(',\n')
rank = ranks[row[0]]
srank = 19 * ( (rank - minrank) / (maxrank - minrank) )
fhand.write('{"source":'+str(map[row[0]])+',"target":'+str(map[row[1]])+',"value":3}')
count = count + 1
fhand.write(']};')
fhand.close()
cur.close()
print("Open force.html in a browser to view the visualization")