Skip to content
This repository has been archived by the owner on Jan 7, 2022. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Camilleh9 committed Nov 16, 2014
0 parents commit 00ad03d
Show file tree
Hide file tree
Showing 30 changed files with 233,799 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
*.class

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.ear

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
bin
7 changes: 7 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
OSM2Hive
========

License
-------

TODO
Binary file added OSM2Hive.jar
Binary file not shown.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
OSM2Hive
========

Read-me
-------

OSM2Hive is a collection of User-defined functions for Hive to allow OSM XML data import.
It reads a XML file in a Hive table, and parses it to create new tables, in an easier to use
format. The application tests use JUnit 4 framework.

Usage
-----

OSM2Hive has to be called directly in Hive. To do so, use the following commands (in Hive) :

```
ADD JAR /path/to/osm2hive.jar;
CREATE TEMPORARY FUNCTION OSMImportNodes AS 'info.pavie.basicosmparser.controller.HiveNodeExporter';
CREATE TEMPORARY FUNCTION OSMImportWays AS 'info.pavie.basicosmparser.controller.HiveWayExporter';
CREATE TEMPORARY FUNCTION OSMImportRelations AS 'info.pavie.basicosmparser.controller.HiveRelationExporter';
CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata;
CREATE TABLE osmnodes AS SELECT OSMImportNodes(osm_content) FROM osmdata;
CREATE TABLE osmways AS SELECT OSMImportWays(osm_content) FROM osmdata;
CREATE TABLE osmrelations AS SELECT OSMImportRelations(osm_content) FROM osmdata;
```

That's all.

License
-------

Copyright 2014 Adrien PAVIE

See LICENSE for complete license.
30,583 changes: 30,583 additions & 0 deletions res/xml/bleruais.osm

Large diffs are not rendered by default.

201,019 changes: 201,019 additions & 0 deletions res/xml/cdg.osm

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions res/xml/sample.osm
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="CGImap 0.0.2">
<bounds minlat="54.0889580" minlon="12.2487570" maxlat="54.0913900" maxlon="12.2524800"/>
<node id="298884269" lat="54.0901746" lon="12.2482632" user="SvenHRO" uid="46882" visible="true" version="1" changeset="676636" timestamp="2008-09-21T21:37:45Z"/>
<node id="261728686" lat="54.0906309" lon="12.2441924" user="PikoWinter" uid="36744" visible="true" version="1" changeset="323878" timestamp="2008-05-03T13:39:23Z"/>
<node id="1831881213" version="1" changeset="12370172" lat="54.0900666" lon="12.2539381" user="lafkor" uid="75625" visible="true" timestamp="2012-07-20T09:43:19Z">
<tag k="name" v="Neu Broderstorf"/>
<tag k="traffic_sign" v="city_limit"/>
</node>
<node id="298884272" lat="54.0901447" lon="12.2516513" user="SvenHRO" uid="46882" visible="true" version="1" changeset="676636" timestamp="2008-09-21T21:37:45Z"/>
<way id="26659127" user="Masch" uid="55988" visible="true" version="5" changeset="4142606" timestamp="2010-03-16T11:47:08Z">
<nd ref="298884269"/>
<nd ref="298884272"/>
<nd ref="261728686"/>
<tag k="highway" v="unclassified"/>
<tag k="name" v="Pastower Straße"/>
</way>
<relation id="56688" user="kmvar" uid="56190" visible="true" version="28" changeset="6947637" timestamp="2011-01-12T14:23:49Z">
<member type="node" ref="298884269" role="stop"/>
<member type="node" ref="261728686" role=""/>
<member type="way" ref="26659127" role="path"/>
<member type="node" ref="298884272" role=""/>
<tag k="name" v="Küstenbus Linie 123"/>
<tag k="network" v="VVW"/>
<tag k="operator" v="Regionalverkehr Küste"/>
<tag k="ref" v="123"/>
<tag k="route" v="bus"/>
<tag k="type" v="route"/>
</relation>
</osm>
119 changes: 119 additions & 0 deletions src/main/info/pavie/osm2hive/controller/HiveImporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package info.pavie.osm2hive.controller;

import info.pavie.osm2hive.model.osm.Element;
import info.pavie.osm2hive.model.xml.InvalidMarkupException;

import java.util.ArrayList;
import java.util.Map;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

/**
* Abstract class, containing common functions to all Hive importers.
* See each heriting class for more details.
* @author Adrien PAVIE
*/
public abstract class HiveImporter extends GenericUDTF {
//ATTRIBUTES
/** Hive String Handler **/
protected StringObjectInspector stringOI;

/** OSM XML Parser **/
protected OSMParser parser;

/** Parsed OSM elements **/
protected Map<String,Element> elements;

//OTHER METHODS
/**
* Checks if hive function call is valid, and defines stringOI attribute.
* @param argOIs The hive function arguments
* @throws UDFArgumentException If function call is invalid
*/
protected void checkParameterOI(ObjectInspector[] argOIs) throws UDFArgumentException {
if(argOIs.length != 1) {
throw new UDFArgumentException("HiveImporter UDTF takes 1 argument: STRING");
}

ObjectInspector arg1 = argOIs[0]; //First parameter, corresponding to OSM XML file path

if(!(arg1 instanceof StringObjectInspector)) {
throw new UDFArgumentException("HiveImporter UDTF takes 1 argument: STRING");
}

this.stringOI = (StringObjectInspector) arg1;
this.parser = new OSMParser();
}

/**
* @return The common field names (ID, UserID, Timestamp, ...).
*/
protected ArrayList<String> getCommonFieldNames() {
ArrayList<String> fieldNames = new ArrayList<String>();
fieldNames.add("ID");
fieldNames.add("UserID");
fieldNames.add("Timestamp");
fieldNames.add("IsVisible");
fieldNames.add("Version");
fieldNames.add("ChangesetID");
fieldNames.add("Tags");

return fieldNames;
}

/**
* @return The common field object inspectors (for ID, UserID, Timestamp, ...)
*/
protected ArrayList<ObjectInspector> getCommonFieldOIs() {
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
fieldOIs.add(ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.javaStringObjectInspector));

return fieldOIs;
}

/**
* Parses the given line from arguments and returns the read Element
* @param args The Hive command arguments
* @return The read OSM element, or null if not ready
* @throws InvalidMarkupException If the line isn't a well-formed XML markup
*/
protected Element preprocess(Object[] args) throws InvalidMarkupException {
//Parse the received line
String line = (String) stringOI.getPrimitiveJavaObject(args[0]);
parser.parse(line);

return (parser.isElementReady()) ? parser.getCurrentElement() : null;
}

/**
* This methods fills a row array with common data extracted from the given element.
* @param row The row array (size should be > 7)
* @param elem The element to use
*/
protected void fillRow(Object[] row, Element elem) {
row[0] = elem.getId();
row[1] = elem.getUid();
row[2] = elem.getTimestamp();
row[3] = elem.isVisible();
row[4] = elem.getVersion();
row[5] = elem.getChangeset();
row[6] = elem.getTags();
}

@Override
public void close() throws HiveException {;}
}
66 changes: 66 additions & 0 deletions src/main/info/pavie/osm2hive/controller/HiveNodeImporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package info.pavie.osm2hive.controller;

import info.pavie.osm2hive.model.osm.Element;
import info.pavie.osm2hive.model.osm.Node;
import info.pavie.osm2hive.model.xml.InvalidMarkupException;

import java.util.ArrayList;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

/**
* This class reads an OSM XML file, and creates rows for Hive (Nodes only).
* To use it, you need to have a JAR of this application, and in Hive :
* ADD JAR /path/to/osm2hive.jar;
* CREATE TEMPORARY FUNCTION OSMImportNodes AS 'info.pavie.osm2hive.controller.HiveNodeImporter';
* CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE;
* LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata;
* CREATE TABLE osmnodes AS SELECT OSMImportNodes(osm_content) FROM osmdata;
* @author Adrien PAVIE
*/
public class HiveNodeImporter extends HiveImporter {
//OTHER METHODS
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
//Check hive function call
checkParameterOI(argOIs);

//Expected output columns
ArrayList<String> fieldNames = getCommonFieldNames();
fieldNames.add("Latitude");
fieldNames.add("Longitude");

//Expected output types
ArrayList<ObjectInspector> fieldOIs = getCommonFieldOIs();
fieldOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
fieldOIs.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);

return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}

@Override
public void process(Object[] args) throws HiveException {
try {
Element current = preprocess(args);

//Check if element is valid and is a node
if(current != null && current instanceof Node) {
//Create result
Object[] currentRow = new Object[9];
fillRow(currentRow, current);
currentRow[7] = ((Node) current).getLat();
currentRow[8] = ((Node) current).getLon();

//Send result
forward(currentRow);
}
} catch (InvalidMarkupException e) {
throw new HiveException(e);
}
}
}
78 changes: 78 additions & 0 deletions src/main/info/pavie/osm2hive/controller/HiveRelationImporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package info.pavie.osm2hive.controller;

import info.pavie.osm2hive.model.osm.Element;
import info.pavie.osm2hive.model.osm.Relation;
import info.pavie.osm2hive.model.xml.InvalidMarkupException;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

/**
* This class reads an OSM XML file, and creates rows for Hive (Ways only).
* To use it, you need to have a JAR of this application, and in Hive :
* ADD JAR /path/to/osm2hive.jar;
* CREATE TEMPORARY FUNCTION OSMImportRelations AS 'info.pavie.basicosmparser.controller.hive.HiveRelationImporter';
* CREATE TABLE osmdata(osm_content STRING) STORED AS TEXTFILE;
* LOAD DATA LOCAL INPATH '/path/to/data.osm' OVERWRITE INTO TABLE osmdata;
* CREATE TABLE osmrelations AS SELECT OSMImportRelations(osm_content) FROM osmdata;
* @author Adrien PAVIE
*/
public class HiveRelationImporter extends HiveImporter {
//OTHER METHODS
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
//Check hive function call
checkParameterOI(argOIs);

//Expected output columns
ArrayList<String> fieldNames = getCommonFieldNames();
fieldNames.add("Members");

//Expected output types
ArrayList<ObjectInspector> fieldOIs = getCommonFieldOIs();
fieldOIs.add(ObjectInspectorFactory.getStandardMapObjectInspector(
PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.javaStringObjectInspector));

return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}

@Override
public void process(Object[] args) throws HiveException {
try {
Element current = preprocess(args);

//Check if element is valid and is a node
if(current != null && current instanceof Relation) {
//Create result
Object[] currentRow = new Object[8];
fillRow(currentRow, current);

//Create members map
Map<String,String> members = new HashMap<String,String>();
for(String e : ((Relation) current).getMembers()) {
//Role
String role = ((Relation) current).getMemberRole(e);
if(role.equals("")) { role = "null"; }

members.put(e, role);
}

currentRow[7] = members;

//Send result
forward(currentRow);
}
} catch (InvalidMarkupException e) {
throw new HiveException(e);
}
}
}
Loading

0 comments on commit 00ad03d

Please sign in to comment.