-
Notifications
You must be signed in to change notification settings - Fork 1
/
prepare_stations.py
executable file
Β·122 lines (104 loc) Β· 3.85 KB
/
prepare_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3
import urllib.request
import zipfile
import os.path
from collections import namedtuple
import csv
import sqlite3
import shutil
Station = namedtuple('Station', ['id', 'name', 'type', 'frequence'])
TYPE_IDS = {
'Zug': 1 << 0,
'Bus': 1 << 1,
'Tram': 1 << 2,
'Schiff': 1 << 3,
'Metro': 1 << 4,
'Luftseilbahn': 1 << 5,
'Zahnradbahn': 1 << 6,
'Standseilbahn': 1 << 7,
'Aufzug': 1 << 8
}
conn = sqlite3.connect('timetable.db')
c = conn.cursor()
c.execute('SELECT DISTINCT stop_id from stops');
USED_STOP_IDS = [v[0] for v in c]
conn.close()
ABBRVS = [
('La Chaux-de-F', 'La Chaux-de-Fonds'),
('Belmont-s.-L.', 'Belmont-sur-Lausanne'),
]
def full_names(name):
for short_name, long_name in ABBRVS:
name.replace(short_name, long_name)
return name
def latin_encoder(latin_csv_data):
for line in latin_csv_data:
yield line.decode('latin1')
def read_passenger_frequence():
frequence_by_id = {}
filename = 'data/passenger-frequence.csv'
with open(filename) as csvfile:
for row in csv.DictReader(csvfile, delimiter=';'):
ident = row['lod'].rsplit('/', 1)[1]
frequence_by_id[ident] = int(row['DTV average daily traffic'])
return frequence_by_id
FREQUENCE_BY_ID = read_passenger_frequence()
def read_csv(csv_file):
csv_reader = csv.DictReader(latin_encoder(csv_file), delimiter=',')
for row in csv_reader:
id = int(row['Nummer'])
name = full_names(row['Name'])
types = row['Verkehrsmittel']
if str(id) in FREQUENCE_BY_ID:
frequence = FREQUENCE_BY_ID[str(id)]
else:
frequence = 0
if len(types) == 0:
print("WARN: Skipping stations without verkehrsmittel: %s" % name)
continue
type_num = 0
types = types.split('_')
for type_ in types:
if type_ not in TYPE_IDS:
print("WARN: Station %s type %s not found" % (name, type_))
continue
type_num |= TYPE_IDS[type_]
station = Station(id, name, type_num, frequence)
yield station
def write_stations(stations, sqlite_filename):
if os.path.isfile(sqlite_filename):
os.unlink(sqlite_filename)
conn = sqlite3.connect(sqlite_filename)
fts_table='fts_stations'
table='stations'
c = conn.cursor();
c.execute('''CREATE TABLE "android_metadata" ("locale" TEXT DEFAULT 'en_US')''')
c.execute('''INSERT INTO "android_metadata" VALUES ('en_US')''')
c.execute('''CREATE TABLE "%s" (
id INTEGER PRIMARY KEY NOT NULL,
name text NOT NULL,
types INTEGER NOT NULL,
frequency INTEGER NOT NULL
)''' % (table,))
c.execute('''CREATE VIRTUAL TABLE "%s" USING fts4(tokenize=simple, content="%s", name)''' % (fts_table, table))
for station in stations:
if str(station.id) not in USED_STOP_IDS:
print("WARN: Skipping name (id %s)" % station.id + station.name)
continue
conn.execute('''INSERT INTO stations VALUES (?, ?, ?, ?)''', (station.id, station.name, station.type, station.frequence))
c.execute("INSERT INTO %s(%s) VALUES('rebuild')" % (fts_table, fts_table));
conn.commit()
conn.close()
url = 'http://data.geo.admin.ch/ch.bav.haltestellen-oev/data.zip'
data_file="data.zip"
csv_zip_file_name='HST_MGMD_2017-05-01_CSV.ZIP'
csv_file_name='HST_MGMD_2017-05-01/Betriebspunkt.csv'
sqlite_filename='app/src/main/assets/stations.db'
if not os.path.isfile(data_file):
urllib.request.urlretrieve(url,data_file)
if not os.path.isfile(csv_zip_file_name):
with zipfile.ZipFile(data_file) as data_zip:
data_zip.extract(csv_zip_file_name)
with zipfile.ZipFile(csv_zip_file_name) as csv_zip:
with csv_zip.open(csv_file_name, 'r') as csv_file:
write_stations(read_csv(csv_file), sqlite_filename)