Skip to content

Commit

Permalink
parsing road info
Browse files Browse the repository at this point in the history
  • Loading branch information
ceteri committed Jan 13, 2013
1 parent 7960c78 commit 4659e6a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ To build and run:
lein clean
lein uberjar
rm -rf output/
hadoop jar ./target/copa.jar data/copa.csv data/meta_tree.tsv data/meta_road.tsv out/trap out/park out/tree
hadoop jar ./target/copa.jar data/copa.csv data/meta_tree.tsv data/meta_road.tsv out/trap out/park out/tree out/road


About Cascading
Expand Down
40 changes: 35 additions & 5 deletions src/main/clj/copa/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,53 @@
(<- [?blurb, ?misc, ?geo, ?kind ?priv, ?tree_id, ?situs, ?tree_site, ?tree_species]
(src ?blurb, ?misc, ?geo, ?kind)
(re-matches #"^\s+Private\:\s+(\S+)\s+Tree ID\:\s+.*" ?misc)
(parse-tree ?misc :> _ ?priv, ?tree_id, ?situs, ?tree_site, ?raw_species)
(parse-tree ?misc :> _
?priv, ?tree_id, ?situs, ?tree_site, ?raw_species)
((c/comp s/trim s/lower-case) ?raw_species :> ?tree_species)
(:trap (hfs-textline trap))
)
)


(defn -main [in meta_tree meta_road trap park tree & args]
(defn parse-road [misc]
"parse the special fields in the road format"
(let [x (re-seq
#"^\s+Sequence\:.*\s+Year Constructed\:\s+(\d+)\s+Traffic Count\:\s+(\d+)\s+Traffic Index\:\s+(\w.*\w)\s+Traffic Class\:\s+(\w.*\w)\s+Traffic Date.*\s+Paving Length\:\s+(\d+)\s+Paving Width\:\s+(\d+)\s+Paving Area\:\s+(\d+)\s+Surface Type\:\s+(\w.*\w)\s+Surface Thickness.*\s+Bike Lane\:\s+(\w+)\s+Bus Route\:\s+(\w+)\s+Truck Route\:\s+(\w+)\s+Remediation.*$"
misc)]
(> (count x) 0)
(> (count (first x)) 1)
(first x))
)


(defn get-roads [src trap]
"filter/parse the road data"
(<- [?blurb, ?misc, ?geo, ?kind
?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width
?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route]
(src ?blurb, ?misc, ?geo, ?kind)
(re-matches #"^\s+Sequence\:.*\s+Year Constructed\:\s+(\d+)\s+Traffic.*" ?misc)
(parse-road ?misc :> _
?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width
?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route)
(:trap (hfs-textline trap))
)
)


(defn -main [in meta_tree meta_road trap park tree road & args]
(let [gis (hfs-delimited in)
tree_meta (hfs-delimited meta_tree :skip-header? true)
road_meta (hfs-delimited meta_road :skip-header? true)
src (etl-gis gis trap)]
src (etl-gis gis (s/join "/" [trap "gis"]))]
(?- (hfs-delimited park)
(get-parks src trap)
(get-parks src (s/join "/" [trap "park"]))
)
(?- (hfs-delimited tree)
(get-trees src trap)
(get-trees src (s/join "/" [trap "tree"]))
)
(?- (hfs-delimited road)
(get-roads src (s/join "/" [trap "road"]))
)
)
)

0 comments on commit 4659e6a

Please sign in to comment.