From 4659e6a479f75f45383ea4ca95a6243e5180d8bd Mon Sep 17 00:00:00 2001 From: Paco Nathan Date: Sat, 12 Jan 2013 22:59:20 -0800 Subject: [PATCH] parsing road info --- README.md | 2 +- src/main/clj/copa/core.clj | 40 +++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c60a987..5d54b28 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ To build and run: lein clean lein uberjar rm -rf output/ - hadoop jar ./target/copa.jar data/copa.csv data/meta_tree.tsv data/meta_road.tsv out/trap out/park out/tree + hadoop jar ./target/copa.jar data/copa.csv data/meta_tree.tsv data/meta_road.tsv out/trap out/park out/tree out/road About Cascading diff --git a/src/main/clj/copa/core.clj b/src/main/clj/copa/core.clj index ce4991d..4b88ec4 100644 --- a/src/main/clj/copa/core.clj +++ b/src/main/clj/copa/core.clj @@ -54,23 +54,53 @@ (<- [?blurb, ?misc, ?geo, ?kind ?priv, ?tree_id, ?situs, ?tree_site, ?tree_species] (src ?blurb, ?misc, ?geo, ?kind) (re-matches #"^\s+Private\:\s+(\S+)\s+Tree ID\:\s+.*" ?misc) - (parse-tree ?misc :> _ ?priv, ?tree_id, ?situs, ?tree_site, ?raw_species) + (parse-tree ?misc :> _ + ?priv, ?tree_id, ?situs, ?tree_site, ?raw_species) ((c/comp s/trim s/lower-case) ?raw_species :> ?tree_species) (:trap (hfs-textline trap)) ) ) -(defn -main [in meta_tree meta_road trap park tree & args] +(defn parse-road [misc] + "parse the special fields in the road format" + (let [x (re-seq + #"^\s+Sequence\:.*\s+Year Constructed\:\s+(\d+)\s+Traffic Count\:\s+(\d+)\s+Traffic Index\:\s+(\w.*\w)\s+Traffic Class\:\s+(\w.*\w)\s+Traffic Date.*\s+Paving Length\:\s+(\d+)\s+Paving Width\:\s+(\d+)\s+Paving Area\:\s+(\d+)\s+Surface Type\:\s+(\w.*\w)\s+Surface Thickness.*\s+Bike Lane\:\s+(\w+)\s+Bus Route\:\s+(\w+)\s+Truck Route\:\s+(\w+)\s+Remediation.*$" + misc)] + (> (count x) 0) + (> (count (first x)) 1) + (first x)) + ) + + +(defn get-roads [src trap] + "filter/parse the road data" + (<- [?blurb, ?misc, ?geo, ?kind + ?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width + ?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route] + (src ?blurb, ?misc, ?geo, ?kind) + (re-matches #"^\s+Sequence\:.*\s+Year Constructed\:\s+(\d+)\s+Traffic.*" ?misc) + (parse-road ?misc :> _ + ?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width + ?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route) + (:trap (hfs-textline trap)) + ) + ) + + +(defn -main [in meta_tree meta_road trap park tree road & args] (let [gis (hfs-delimited in) tree_meta (hfs-delimited meta_tree :skip-header? true) road_meta (hfs-delimited meta_road :skip-header? true) - src (etl-gis gis trap)] + src (etl-gis gis (s/join "/" [trap "gis"]))] (?- (hfs-delimited park) - (get-parks src trap) + (get-parks src (s/join "/" [trap "park"])) ) (?- (hfs-delimited tree) - (get-trees src trap) + (get-trees src (s/join "/" [trap "tree"])) + ) + (?- (hfs-delimited road) + (get-roads src (s/join "/" [trap "road"])) ) ) )