This repository has been archived by the owner on Jan 7, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 00ad03d
Showing
30 changed files
with
233,799 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
*.class | ||
|
||
# Mobile Tools for Java (J2ME) | ||
.mtj.tmp/ | ||
|
||
# Package Files # | ||
*.jar | ||
*.war | ||
*.ear | ||
|
||
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml | ||
hs_err_pid* | ||
bin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
OSM2Hive | ||
======== | ||
|
||
License | ||
------- | ||
|
||
TODO |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
OSM2Hive | ||
======== | ||
|
||
Read-me | ||
------- | ||
|
||
OSM2Hive is a collection of User-defined functions for Hive to allow OSM XML data import. | ||
It reads a XML file in a Hive table, and parses it to create new tables, in an easier to use | ||
format. The application tests use JUnit 4 framework. | ||
|
||
Usage | ||
----- | ||
|
||
OSM2Hive has to be called directly in Hive. To do so, use the following commands (in Hive) : | ||
|
||
``` | ||
ADD JAR /path/to/osm2hive.jar; | ||
CREATE TEMPORARY FUNCTION OSMImportNodes AS 'info.pavie.basicosmparser.controller.HiveNodeExporter'; | ||
CREATE TEMPORARY FUNCTION OSMImportWays AS 'info.pavie.basicosmparser.controller.HiveWayExporter'; | ||
CREATE TEMPORARY FUNCTION OSMImportRelations AS 'info.pavie.basicosmparser.controller.HiveRelationExporter'; | ||
CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE; | ||
LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata; | ||
CREATE TABLE osmnodes AS SELECT OSMImportNodes(osm_content) FROM osmdata; | ||
CREATE TABLE osmways AS SELECT OSMImportWays(osm_content) FROM osmdata; | ||
CREATE TABLE osmrelations AS SELECT OSMImportRelations(osm_content) FROM osmdata; | ||
``` | ||
|
||
That's all. | ||
|
||
License | ||
------- | ||
|
||
Copyright 2014 Adrien PAVIE | ||
|
||
See LICENSE for complete license. |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<osm version="0.6" generator="CGImap 0.0.2"> | ||
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800"/> | ||
<node id="298884269" lat="54.0901746" lon="12.2482632" user="SvenHRO" uid="46882" visible="true" version="1" changeset="676636" timestamp="2008-09-21T21:37:45Z"/> | ||
<node id="261728686" lat="54.0906309" lon="12.2441924" user="PikoWinter" uid="36744" visible="true" version="1" changeset="323878" timestamp="2008-05-03T13:39:23Z"/> | ||
<node id="1831881213" version="1" changeset="12370172" lat="54.0900666" lon="12.2539381" user="lafkor" uid="75625" visible="true" timestamp="2012-07-20T09:43:19Z"> | ||
<tag k="name" v="Neu Broderstorf"/> | ||
<tag k="traffic_sign" v="city_limit"/> | ||
</node> | ||
<node id="298884272" lat="54.0901447" lon="12.2516513" user="SvenHRO" uid="46882" visible="true" version="1" changeset="676636" timestamp="2008-09-21T21:37:45Z"/> | ||
<way id="26659127" user="Masch" uid="55988" visible="true" version="5" changeset="4142606" timestamp="2010-03-16T11:47:08Z"> | ||
<nd ref="298884269"/> | ||
<nd ref="298884272"/> | ||
<nd ref="261728686"/> | ||
<tag k="highway" v="unclassified"/> | ||
<tag k="name" v="Pastower Straße"/> | ||
</way> | ||
<relation id="56688" user="kmvar" uid="56190" visible="true" version="28" changeset="6947637" timestamp="2011-01-12T14:23:49Z"> | ||
<member type="node" ref="298884269" role="stop"/> | ||
<member type="node" ref="261728686" role=""/> | ||
<member type="way" ref="26659127" role="path"/> | ||
<member type="node" ref="298884272" role=""/> | ||
<tag k="name" v="Küstenbus Linie 123"/> | ||
<tag k="network" v="VVW"/> | ||
<tag k="operator" v="Regionalverkehr Küste"/> | ||
<tag k="ref" v="123"/> | ||
<tag k="route" v="bus"/> | ||
<tag k="type" v="route"/> | ||
</relation> | ||
</osm> |
119 changes: 119 additions & 0 deletions
119
src/main/info/pavie/osm2hive/controller/HiveImporter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package info.pavie.osm2hive.controller; | ||
|
||
import info.pavie.osm2hive.model.osm.Element; | ||
import info.pavie.osm2hive.model.xml.InvalidMarkupException; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Map; | ||
|
||
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | ||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; | ||
|
||
/** | ||
* Abstract class, containing common functions to all Hive importers. | ||
* See each heriting class for more details. | ||
* @author Adrien PAVIE | ||
*/ | ||
public abstract class HiveImporter extends GenericUDTF { | ||
//ATTRIBUTES | ||
/** Hive String Handler **/ | ||
protected StringObjectInspector stringOI; | ||
|
||
/** OSM XML Parser **/ | ||
protected OSMParser parser; | ||
|
||
/** Parsed OSM elements **/ | ||
protected Map<String,Element> elements; | ||
|
||
//OTHER METHODS | ||
/** | ||
* Checks if hive function call is valid, and defines stringOI attribute. | ||
* @param argOIs The hive function arguments | ||
* @throws UDFArgumentException If function call is invalid | ||
*/ | ||
protected void checkParameterOI(ObjectInspector[] argOIs) throws UDFArgumentException { | ||
if(argOIs.length != 1) { | ||
throw new UDFArgumentException("HiveImporter UDTF takes 1 argument: STRING"); | ||
} | ||
|
||
ObjectInspector arg1 = argOIs[0]; //First parameter, corresponding to OSM XML file path | ||
|
||
if(!(arg1 instanceof StringObjectInspector)) { | ||
throw new UDFArgumentException("HiveImporter UDTF takes 1 argument: STRING"); | ||
} | ||
|
||
this.stringOI = (StringObjectInspector) arg1; | ||
this.parser = new OSMParser(); | ||
} | ||
|
||
/** | ||
* @return The common field names (ID, UserID, Timestamp, ...). | ||
*/ | ||
protected ArrayList<String> getCommonFieldNames() { | ||
ArrayList<String> fieldNames = new ArrayList<String>(); | ||
fieldNames.add("ID"); | ||
fieldNames.add("UserID"); | ||
fieldNames.add("Timestamp"); | ||
fieldNames.add("IsVisible"); | ||
fieldNames.add("Version"); | ||
fieldNames.add("ChangesetID"); | ||
fieldNames.add("Tags"); | ||
|
||
return fieldNames; | ||
} | ||
|
||
/** | ||
* @return The common field object inspectors (for ID, UserID, Timestamp, ...) | ||
*/ | ||
protected ArrayList<ObjectInspector> getCommonFieldOIs() { | ||
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector); | ||
fieldOIs.add(ObjectInspectorFactory.getStandardMapObjectInspector( | ||
PrimitiveObjectInspectorFactory.javaStringObjectInspector, | ||
PrimitiveObjectInspectorFactory.javaStringObjectInspector)); | ||
|
||
return fieldOIs; | ||
} | ||
|
||
/** | ||
* Parses the given line from arguments and returns the read Element | ||
* @param args The Hive command arguments | ||
* @return The read OSM element, or null if not ready | ||
* @throws InvalidMarkupException If the line isn't a well-formed XML markup | ||
*/ | ||
protected Element preprocess(Object[] args) throws InvalidMarkupException { | ||
//Parse the received line | ||
String line = (String) stringOI.getPrimitiveJavaObject(args[0]); | ||
parser.parse(line); | ||
|
||
return (parser.isElementReady()) ? parser.getCurrentElement() : null; | ||
} | ||
|
||
/** | ||
* This methods fills a row array with common data extracted from the given element. | ||
* @param row The row array (size should be > 7) | ||
* @param elem The element to use | ||
*/ | ||
protected void fillRow(Object[] row, Element elem) { | ||
row[0] = elem.getId(); | ||
row[1] = elem.getUid(); | ||
row[2] = elem.getTimestamp(); | ||
row[3] = elem.isVisible(); | ||
row[4] = elem.getVersion(); | ||
row[5] = elem.getChangeset(); | ||
row[6] = elem.getTags(); | ||
} | ||
|
||
@Override | ||
public void close() throws HiveException {;} | ||
} |
66 changes: 66 additions & 0 deletions
66
src/main/info/pavie/osm2hive/controller/HiveNodeImporter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package info.pavie.osm2hive.controller; | ||
|
||
import info.pavie.osm2hive.model.osm.Element; | ||
import info.pavie.osm2hive.model.osm.Node; | ||
import info.pavie.osm2hive.model.xml.InvalidMarkupException; | ||
|
||
import java.util.ArrayList; | ||
|
||
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | ||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | ||
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | ||
|
||
/** | ||
* This class reads an OSM XML file, and creates rows for Hive (Nodes only). | ||
* To use it, you need to have a JAR of this application, and in Hive : | ||
* ADD JAR /path/to/osm2hive.jar; | ||
* CREATE TEMPORARY FUNCTION OSMImportNodes AS 'info.pavie.osm2hive.controller.HiveNodeImporter'; | ||
* CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE; | ||
* LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata; | ||
* CREATE TABLE osmnodes AS SELECT OSMImportNodes(osm_content) FROM osmdata; | ||
* @author Adrien PAVIE | ||
*/ | ||
public class HiveNodeImporter extends HiveImporter { | ||
//OTHER METHODS | ||
@Override | ||
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { | ||
//Check hive function call | ||
checkParameterOI(argOIs); | ||
|
||
//Expected output columns | ||
ArrayList<String> fieldNames = getCommonFieldNames(); | ||
fieldNames.add("Latitude"); | ||
fieldNames.add("Longitude"); | ||
|
||
//Expected output types | ||
ArrayList<ObjectInspector> fieldOIs = getCommonFieldOIs(); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector); | ||
fieldOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector); | ||
|
||
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); | ||
} | ||
|
||
@Override | ||
public void process(Object[] args) throws HiveException { | ||
try { | ||
Element current = preprocess(args); | ||
|
||
//Check if element is valid and is a node | ||
if(current != null && current instanceof Node) { | ||
//Create result | ||
Object[] currentRow = new Object[9]; | ||
fillRow(currentRow, current); | ||
currentRow[7] = ((Node) current).getLat(); | ||
currentRow[8] = ((Node) current).getLon(); | ||
|
||
//Send result | ||
forward(currentRow); | ||
} | ||
} catch (InvalidMarkupException e) { | ||
throw new HiveException(e); | ||
} | ||
} | ||
} |
78 changes: 78 additions & 0 deletions
78
src/main/info/pavie/osm2hive/controller/HiveRelationImporter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package info.pavie.osm2hive.controller; | ||
|
||
import info.pavie.osm2hive.model.osm.Element; | ||
import info.pavie.osm2hive.model.osm.Relation; | ||
import info.pavie.osm2hive.model.xml.InvalidMarkupException; | ||
|
||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | ||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | ||
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | ||
|
||
/** | ||
* This class reads an OSM XML file, and creates rows for Hive (Ways only). | ||
* To use it, you need to have a JAR of this application, and in Hive : | ||
* ADD JAR /path/to/osm2hive.jar; | ||
* CREATE TEMPORARY FUNCTION OSMImportRelations AS 'info.pavie.basicosmparser.controller.hive.HiveRelationImporter'; | ||
* CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE; | ||
* LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata; | ||
* CREATE TABLE osmrelations AS SELECT OSMImportRelations(osm_content) FROM osmdata; | ||
* @author Adrien PAVIE | ||
*/ | ||
public class HiveRelationImporter extends HiveImporter { | ||
//OTHER METHODS | ||
@Override | ||
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { | ||
//Check hive function call | ||
checkParameterOI(argOIs); | ||
|
||
//Expected output columns | ||
ArrayList<String> fieldNames = getCommonFieldNames(); | ||
fieldNames.add("Members"); | ||
|
||
//Expected output types | ||
ArrayList<ObjectInspector> fieldOIs = getCommonFieldOIs(); | ||
fieldOIs.add(ObjectInspectorFactory.getStandardMapObjectInspector( | ||
PrimitiveObjectInspectorFactory.javaStringObjectInspector, | ||
PrimitiveObjectInspectorFactory.javaStringObjectInspector)); | ||
|
||
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); | ||
} | ||
|
||
@Override | ||
public void process(Object[] args) throws HiveException { | ||
try { | ||
Element current = preprocess(args); | ||
|
||
//Check if element is valid and is a node | ||
if(current != null && current instanceof Relation) { | ||
//Create result | ||
Object[] currentRow = new Object[8]; | ||
fillRow(currentRow, current); | ||
|
||
//Create members map | ||
Map<String,String> members = new HashMap<String,String>(); | ||
for(String e : ((Relation) current).getMembers()) { | ||
//Role | ||
String role = ((Relation) current).getMemberRole(e); | ||
if(role.equals("")) { role = "null"; } | ||
|
||
members.put(e, role); | ||
} | ||
|
||
currentRow[7] = members; | ||
|
||
//Send result | ||
forward(currentRow); | ||
} | ||
} catch (InvalidMarkupException e) { | ||
throw new HiveException(e); | ||
} | ||
} | ||
} |
Oops, something went wrong.