-
Notifications
You must be signed in to change notification settings - Fork 8
/
zipcodeParser.js
146 lines (132 loc) · 4.5 KB
/
zipcodeParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
const request = require('request');
const async = require('async');
const fs = require('fs');
// Scrape Website of Belgium Post for Zipcodes
const nl_url = "http://www.bpost2.be/zipcodes/files/zipcodes_num_nl_new.html";
const fr_url = "http://www.bpost2.be/zipcodes/files/zipcodes_num_fr_new.html";
// Nominatim rever lookup server.
// NEVER POINT THIS AT THE OFFICIAL openstreetmap.nominatim.org SERVER! THIS WOULD VIOLATE THE TOS.
const nominatim_url = "http://localhost:7070/search?format=json&limit=5&q="
// Pattern for extracting zipcode<->name pairs from belgium post website
// Selects only pairs that have Teilgemeinde as other postcodes are only administrative (and hard to lookup in nominatim)
const pattern = "(?:<td class=\"column0 style8 n\">)([0-9]{4})(?:<\/td>)(?:<td class=\"column1 style6 s\">)([^<]*)(?:<\/td>)(?:<td class=\"column2 style6 s\">)(?:[^<]+)(?:<\/td>)";
// Run Program in waterfall mode
async.waterfall([
init,
getDataFr,
getDataNl,
nominatimLookup,
], function (err, result) {
console.log(err);
console.log(result);
});
// Initialize Variables
function init(callback) {
let list = [];
console.log(init);
callback(null, list);
};
// Get French Data
function getDataFr(list, callback) {
request(fr_url, {}, (err, res, body) => {
if (err) {
callback(err);
}
// Selectively remove whitespace
data = body.replace(/>\s+</g, '><');
let split = data.split("</tr>");
split.forEach((elem) => {
let result = elem.match(pattern);
if (result != undefined) {
let name = unescape(result[2]);
if (name.includes("&")) {
callback("Error: Failed to unescape():" + name);
} else {
list.push(
result[1] + " " + name
);
}
};
});
console.log("getDataFr finished");
callback(null, list);
});
}
function getDataNl(list, callback) {
request(nl_url, {}, (err, res, body) => {
if (err) {
callback(err);
}
// Selectively remove whitespace
data = body.replace(/>\s+</g, '><');
let split = data.split("</tr>");
split.forEach((elem) => {
let result = elem.match(pattern);
if (result != undefined) {
let name = unescape(result[2]);
if (name.includes("&")) {
callback("Error: Failed to unescape(): " + name);
} else {
list.push(
result[1] + " " + name
);
}
};
});
console.log("getDataNl finished");
callback(null, list);
});
}
function nominatimLookup(list, callback) {
let lookupList = [];
let errorcount = 0;
async.eachLimit(list, 25, (elem, callback) => {
// Request by postcode + name
request(nominatim_url + elem, {}, (err, res, body) => {
if (err) {
console.log(err);
callback(err);
}
if (body != "[]") {
let jsonBody = JSON.parse(body);
if (jsonBody.length > 0) {
jsonBody = jsonBody[0];
}
lookupList.push(decodeURI(elem.replace("+", " ")));
callback();
} else {
callback();
}
});
}, function (err) {
if (err != null) {
console.log(err)
} else {
fs.writeFileSync("./src/app/services/location-autocomplete/zipCodes.json", JSON.stringify(lookupList));
}
})
}
/**
* Converts textual html entities into decimal form.
* Conversion taken from http://www.javascripter.net/faq/accentedcharacters.htm
* @param {C} string
*/
function unescape(string) {
return string
.replace(/é/g, "%C3%A9")
.replace(/É/g, "%C3%89")
.replace(/ç/g, "%C3%A7")
.replace(/Ç/g, "%C3%87")
.replace(/è/g, "%C3%A8")
.replace(/È/g, "%C3%88")
.replace(/à/g, "%C3%A0")
.replace(/À/g, "%C3%80")
.replace(/ô/g, "%C3%B4")
.replace(/Ô/g, "%C3%94")
.replace(/ê/g, "%C3%AA")
.replace(/Ê/g, "%C3%8A")
.replace(/û/g, "%C3%BB")
.replace(/Û/g, "%C3%9B")
.replace(/â/g, "%C3%A2")
.replace(/Â/g, "%C3%82");
}