Skip to content

Commit

Permalink
#21 add support to load your own JSON schema
Browse files Browse the repository at this point in the history
  • Loading branch information
DenisCarriere committed Nov 5, 2015
1 parent 46fc3ca commit e536065
Show file tree
Hide file tree
Showing 12 changed files with 262 additions and 60 deletions.
6 changes: 3 additions & 3 deletions etl2osm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ def process(infile, config, outfile, **kwargs):
:param ``format``: Data output format [shp, geojson, osm].
"""

data = Extract(infile)
data.transform(config)
data.save(outfile)
data = Extract(infile, **kwargs)
data.transform(config, **kwargs)
data.save(outfile, **kwargs)
return data


Expand Down
7 changes: 5 additions & 2 deletions etl2osm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
@click.argument('infile')
@click.option('--config', '-c', help='Config file for column transformation.')
@click.option('--outfile', '-o', help='Out file path to save.')
def cli(infile, config, outfile):
@click.option('--suffix', help='Filepath for suffix attribute function.')
@click.option('--direction', help='Filepath for direction attribute function.')
@click.option('--title_except', help='Filepath for title except attribute function.')
def cli(infile, config, outfile, **kwargs):
"""Command Line Interface for ETL2OSM"""

etl2osm.process(infile, config, outfile)
etl2osm.process(infile, config, outfile, **kwargs)
7 changes: 3 additions & 4 deletions etl2osm/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def read_geojson(self, infile, **kwargs):
"""Reads a GeoJSON and gives the results in GeoJSON format"""

logging.info('Reading GeoJSON: %s' % infile)

with open(infile) as f:
geojson = json.load(f)

Expand Down Expand Up @@ -163,7 +162,7 @@ def read_osm(self, infile, **kwargs):
logging.info('Reading OSM: %s' % infile)
raise ValueError('Reading OSM not implemented')

def transform(self, config={}):
def transform(self, config={}, **kwargs):
""" Transform the data using the config file """

self.config = read_config(config)
Expand All @@ -174,11 +173,11 @@ def transform(self, config={}):
for x, feature in enumerate(self.features):
# Reproject data to WGS84
if not self.epsg == 'ESPG:4326':
feature = reproject(feature, self.crs, osr.SRS_WKT_WGS84)
feature = reproject(feature, self.crs, osr.SRS_WKT_WGS84, **kwargs)

# Transform Columns
if self.config:
feature = transform_columns(feature, self.config)
feature = transform_columns(feature, self.config, **kwargs)

# Save feature to self
self[x] = feature
Expand Down
2 changes: 1 addition & 1 deletion etl2osm/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


class Load(object):
def save(self, outfile):
def save(self, outfile, **kwargs):
""" Saves file to path """

extension = os.path.splitext(outfile)[1][1:]
Expand Down
10 changes: 10 additions & 0 deletions etl2osm/models/direction.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"N": "North",
"S": "South",
"E": "East",
"W": "West",
"NE": "Northeast",
"NW": "Northwest",
"SE": "Southeast",
"SW": "Southwest"
}
27 changes: 2 additions & 25 deletions etl2osm/models.py → etl2osm/models/suffix.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,4 @@
# -*- coding: utf-8 -*-

cap_except = [
"'s",
"the",
"in",
"a",
"CR",
"US",
"SR"
]

direction = {
"N": "North",
"S": "South",
"E": "East",
"W": "West",
"NE": "Northeast",
"NW": "Northwest",
"SE": "Southeast",
"SW": "Southwest"
}

suffix = {
{
"ALY": "Alley",
"ANX": "Annex",
"ARC": "Arcade",
Expand Down Expand Up @@ -165,4 +142,4 @@
"WALK": "Walk",
"WAY": "Way",
"WLS": "Wells"
}
}
9 changes: 9 additions & 0 deletions etl2osm/models/title_except.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[
"'s",
"the",
"in",
"a",
"CR",
"US",
"SR"
]
48 changes: 37 additions & 11 deletions etl2osm/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,37 @@
import re
import json
import logging
import etl2osm
from six import string_types, binary_type
from collections import OrderedDict
from osgeo import osr, ogr
from etl2osm.models import suffix, direction, cap_except


true_list = ['True', 'true', '1', True, 1]


def load_json(model, **kwargs):
if model in kwargs:
model = kwargs[model]

# Model is already in JSON format
if isinstance(model, (dict, list, tuple)):
return model
else:
# Find user defined file path
if os.path.exists(model):
with open(model) as f:
return json.load(f)

# Look inside etl2osm [models] folder for .json files
root = os.path.dirname(etl2osm.__file__)[:-len('etl2osm')]
path = os.path.join(root, 'etl2osm', 'models', model + '.json')

if os.path.exists(path):
with open(path) as f:
return json.load(f)


def regex_strip(value):
# ESRI Shapfiles have fields [SUB] blank fields at the end
# Regex will search all characters but must END with a letter or number
Expand Down Expand Up @@ -81,7 +103,7 @@ def extract_epsg(crs):
return crs


def reproject(feature, crs_source, crs_target=4326):
def reproject(feature, crs_source, crs_target=4326, **kwargs):
# Source Projection
p1 = get_coordinate_rerefence_system(extract_epsg(crs_source))

Expand Down Expand Up @@ -158,13 +180,13 @@ def read_config(config):
return json.load(f, object_pairs_hook=OrderedDict)


def titlecase_except(value, exceptions=cap_except):
def titlecase_except(value, **kwargs):
if isinstance(value, (string_types, binary_type)):
word_list = re.split(' ', value)
final = []

for word in word_list:
if word in exceptions:
if word in load_json('title_except', **kwargs):
final.append(word)
else:
final.append(word.capitalize())
Expand All @@ -173,7 +195,7 @@ def titlecase_except(value, exceptions=cap_except):
return value


def clean_field(properties, conform):
def clean_field(properties, conform, **kwargs):
if 'field' in conform:
# STRING
if isinstance(conform, dict):
Expand Down Expand Up @@ -231,6 +253,8 @@ def clean_field(properties, conform):

# Replaces the abreviated suffix (AVE=Avenue)
elif 'suffix' in conform['function']:
suffix = load_json('suffix', **kwargs)

if 'field' not in conform:
raise ValueError('[field] is missing using the Suffix Attribute Function.')
if properties[field]:
Expand All @@ -241,6 +265,8 @@ def clean_field(properties, conform):

# Replaces the abreviated directions (NE=Northeast)
elif 'direction' in conform['function']:
direction = load_json('direction', **kwargs)

if 'field' not in conform:
raise ValueError('[field] is missing using the Direction Attribute Function.')
if properties[field]:
Expand Down Expand Up @@ -291,7 +317,7 @@ def clean_field(properties, conform):
return value


def transform_fields(properties, conform):
def transform_fields(properties, conform, **kwargs):
fields = OrderedDict()
for key in conform.keys():
value = None
Expand All @@ -301,28 +327,28 @@ def transform_fields(properties, conform):
if isinstance(conform[key], (string_types, binary_type)):
if conform[key] in properties:
value = properties[conform[key]]
value = clean_field(properties, conform[key])
value = clean_field(properties, conform[key], **kwargs)
fields.update(dict([(key, value)]))

# DICT
# Replace & join multiple fields together
elif isinstance(conform[key], (OrderedDict, dict)):
value = clean_field(properties, conform[key])
value = clean_field(properties, conform[key], **kwargs)
fields.update(dict([(key, value)]))

# LIST
# Join a values from a list
elif isinstance(conform[key], (list, tuple)):
value = clean_field(properties, conform[key])
value = clean_field(properties, conform[key], **kwargs)
fields.update(dict([(key, value)]))

return fields


def transform_columns(feature, config):
def transform_columns(feature, config, **kwargs):
config = read_config(config)
conform = config['conform']
feature['properties'] = transform_fields(feature['properties'], conform)
feature['properties'] = transform_fields(feature['properties'], conform, **kwargs)

return feature

Expand Down
10 changes: 10 additions & 0 deletions tests/models/direction.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"N": "North",
"S": "South",
"E": "East",
"W": "West",
"NE": "Northeast",
"NW": "Northwest",
"SE": "Southeast",
"SW": "Southwest"
}
Loading

0 comments on commit e536065

Please sign in to comment.