Skip to content

Commit

Permalink
adding tree geohash
Browse files Browse the repository at this point in the history
  • Loading branch information
ceteri committed Jan 13, 2013
1 parent 4659e6a commit 02e4128
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 44 deletions.
1 change: 1 addition & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
[cascalog-checkpoint "0.2.0"]
[cascalog-more-taps "0.3.1-SNAPSHOT"]
[clojure-csv/clojure-csv "1.3.2"]
[org.clojars.sunng/geohash "1.0.1"]
]
:profiles {:dev {:dependencies [[midje-cascalog "0.4.0"]]}
:provided {:dependencies [[org.apache.hadoop/hadoop-core "0.20.2-dev"]]}}
Expand Down
78 changes: 49 additions & 29 deletions src/main/clj/copa/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
(:require [clojure.string :as s]
[cascalog [ops :as c] [vars :as v]]
[clojure-csv.core :as csv]
[geohash.core :as geo]
)
(:gen-class))

Expand All @@ -20,24 +21,13 @@
"generator to parse fields from the GIS source tap"
(<- [?blurb ?misc ?geo ?kind]
(gis ?line)
(parse-gis ?line :> ?blurb, ?misc, ?geo, ?kind)
(parse-gis ?line :> ?blurb ?misc ?geo ?kind)
(:distinct false)
(:trap (hfs-textline trap))
)
)


(defn get-parks [src trap]
"filter/parse the park data"
(<- [?blurb, ?misc, ?geo, ?kind]
(src ?blurb, ?misc, ?geo, ?kind)
(re-matches
#"\s+Community Type\:\s+Park.*"
?misc)
)
)


(defn parse-tree [misc]
"parse the special fields in the tree format"
(let [x (re-seq
Expand All @@ -49,14 +39,32 @@
)


(defn get-trees [src trap]
(defn geo-tree [geo]
"parse geolocation for tree format"
(let [x (re-seq
#"^(\S+),(\S+),(\S+)\s*$"
geo)]
(> (count x) 0)
(> (count (first x)) 1)
(first x))
)


(defn get-trees [src trap tree_meta]
"filter/parse the tree data"
(<- [?blurb, ?misc, ?geo, ?kind ?priv, ?tree_id, ?situs, ?tree_site, ?tree_species]
(src ?blurb, ?misc, ?geo, ?kind)
(<- [?blurb ?misc ?geo ?kind ?priv
?tree_id ?situs ?tree_site ?species ?wikipedia ?calflora ?min_height ?max_height
?tree_lat ?tree_lng ?tree_alt ?tree_geohash]
(src ?blurb ?misc ?geo ?kind)
(re-matches #"^\s+Private\:\s+(\S+)\s+Tree ID\:\s+.*" ?misc)
(parse-tree ?misc :> _
?priv, ?tree_id, ?situs, ?tree_site, ?raw_species)
((c/comp s/trim s/lower-case) ?raw_species :> ?tree_species)
?priv ?tree_id ?situs ?tree_site ?raw_species)
((c/comp s/trim s/lower-case) ?raw_species :> ?species)
(tree_meta ?species ?wikipedia ?calflora ?min_height ?max_height)
(geo-tree ?geo :> _ ?tree_lat ?tree_lng ?tree_alt)
(read-string ?tree_lat :> ?lat)
(read-string ?tree_lng :> ?lng)
(geo/encode ?lat ?lng 6 :> ?tree_geohash)
(:trap (hfs-textline trap))
)
)
Expand All @@ -73,34 +81,46 @@
)


(defn get-roads [src trap]
(defn get-roads [src trap road_meta]
"filter/parse the road data"
(<- [?blurb, ?misc, ?geo, ?kind
(<- [?blurb ?misc ?geo ?kind
?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width
?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route]
(src ?blurb, ?misc, ?geo, ?kind)
?paving_area ?pavement_type ?bike_lane ?bus_route ?truck_route ?albedo_new ?albedo_worn]
(src ?blurb ?misc ?geo ?kind)
(re-matches #"^\s+Sequence\:.*\s+Year Constructed\:\s+(\d+)\s+Traffic.*" ?misc)
(parse-road ?misc :> _
?year_construct ?traffic_count ?traffic_index ?traffic_class ?paving_length ?paving_width
?paving_area ?surface_type ?bike_lane ?bus_route ?truck_route)
?paving_area ?pavement_type ?bike_lane ?bus_route ?truck_route)
(road_meta ?pavement_type ?albedo_new ?albedo_worn)
(:trap (hfs-textline trap))
)
)


(defn -main [in meta_tree meta_road trap park tree road & args]
(defn get-parks [src trap]
"filter/parse the park data"
(<- [?blurb ?misc ?geo ?kind]
(src ?blurb ?misc ?geo ?kind)
(re-matches
#"\s+Community Type\:\s+Park.*"
?misc)
)
)


(defn -main [in meta_tree meta_road trap park_sink tree_sink road_sink & args]
(let [gis (hfs-delimited in)
tree_meta (hfs-delimited meta_tree :skip-header? true)
road_meta (hfs-delimited meta_road :skip-header? true)
src (etl-gis gis (s/join "/" [trap "gis"]))]
(?- (hfs-delimited park)
(get-parks src (s/join "/" [trap "park"]))
(?- (hfs-delimited tree_sink)
(get-trees src (s/join "/" [trap "tree"]) tree_meta)
)
(?- (hfs-delimited tree)
(get-trees src (s/join "/" [trap "tree"]))
(?- (hfs-delimited road_sink)
(get-roads src (s/join "/" [trap "road"]) road_meta)
)
(?- (hfs-delimited road)
(get-roads src (s/join "/" [trap "road"]))
(?- (hfs-delimited park_sink)
(get-parks src (s/join "/" [trap "park"]))
)
)
)
16 changes: 1 addition & 15 deletions src/main/java/copa/Main.java
Original file line number Diff line number Diff line change
@@ -1,21 +1,7 @@
/*
* Copyright (c) 2007-2012 Concurrent, Inc. All Rights Reserved.
* Copyright (c) 2007-2013 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package copa;
Expand Down

0 comments on commit 02e4128

Please sign in to comment.