imotov · mattweber · Jul 25, 2014 · Jul 25, 2014
diff --git a/pom.xml b/pom.xml
@@ -38,8 +38,8 @@
     <!-- The Elasticsearch version that the project will be built with -->
     <!-- ============================================================= -->
     <properties>
-        <elasticsearch.version>1.2.0</elasticsearch.version>
-        <lucene.version>4.8.1</lucene.version>
+        <elasticsearch.version>1.3.0</elasticsearch.version>
+        <lucene.version>4.9.0</lucene.version>
     </properties>
 
     <!-- ============================================================= -->

diff --git a/src/main/java/org/elasticsearch/examples/nativescript/plugin/NativeScriptExamplesPlugin.java b/src/main/java/org/elasticsearch/examples/nativescript/plugin/NativeScriptExamplesPlugin.java
@@ -1,13 +1,6 @@
 package org.elasticsearch.examples.nativescript.plugin;
 
-import org.elasticsearch.examples.nativescript.script.IsPrimeSearchScript;
-import org.elasticsearch.examples.nativescript.script.LanguageModelScoreScript;
-import org.elasticsearch.examples.nativescript.script.LookupScript;
-import org.elasticsearch.examples.nativescript.script.CosineSimilarityScoreScript;
-import org.elasticsearch.examples.nativescript.script.PhraseScoreScript;
-import org.elasticsearch.examples.nativescript.script.TFIDFScoreScript;
-import org.elasticsearch.examples.nativescript.script.PopularityScoreScriptFactory;
-import org.elasticsearch.examples.nativescript.script.RandomSortScriptFactory;
+import org.elasticsearch.examples.nativescript.script.*;
 import org.elasticsearch.plugins.AbstractPlugin;
 import org.elasticsearch.script.ScriptModule;
 
@@ -50,5 +43,6 @@ public void onModule(ScriptModule module) {
         module.registerScript(CosineSimilarityScoreScript.SCRIPT_NAME, CosineSimilarityScoreScript.Factory.class);
         module.registerScript(PhraseScoreScript.SCRIPT_NAME, PhraseScoreScript.Factory.class);
         module.registerScript(LanguageModelScoreScript.SCRIPT_NAME, LanguageModelScoreScript.Factory.class);
+        module.registerScript(SplitTransformScript.SCRIPT_NAME, SplitTransformScript.Factory.class);
     }
 }
diff --git a/src/main/java/org/elasticsearch/examples/nativescript/script/SplitTransformScript.java b/src/main/java/org/elasticsearch/examples/nativescript/script/SplitTransformScript.java
@@ -0,0 +1,98 @@
+package org.elasticsearch.examples.nativescript.script;
+
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.collect.Maps;
+import org.elasticsearch.common.xcontent.support.XContentMapValues;
+import org.elasticsearch.script.AbstractExecutableScript;
+import org.elasticsearch.script.ExecutableScript;
+import org.elasticsearch.script.NativeScriptFactory;
+import org.elasticsearch.script.ScriptException;
+
+import java.util.Map;
+
+/**
+ * Script that takes an input field and splits it's value
+ * by the configured delimiter.  "a,b,c" -> ["a","b","c"]
+ */
+public class SplitTransformScript extends AbstractExecutableScript {
+
+    private final String field;
+    private final String delimiter;
+
+    // expect size 1 because we should only have a "ctx" var
+    private Map<String, Object> vars = Maps.newHashMapWithExpectedSize(1);
+
+    final static public String SCRIPT_NAME = "split_transform_script";
+
+    /**
+     * Native scripts are build using factories that are registered in the
+     * {@link org.elasticsearch.examples.nativescript.plugin.NativeScriptExamplesPlugin#onModule(org.elasticsearch.script.ScriptModule)}
+     * method when plugin is loaded.
+     */
+    public static class Factory implements NativeScriptFactory {
+
+        /**
+         * This method is called for every document indexed on all shards
+         *
+         * @param params list of script parameters configured in the mapping
+         * @return new native script
+         */
+        @Override
+        public ExecutableScript newScript(@Nullable Map<String, Object> params) {
+            // Example of a mandatory string parameter
+            // The XContentMapValues helper class can be used to simplify parameter parsing
+            String field = params == null ? null : XContentMapValues.nodeStringValue(params.get("field"), null);
+            if (field == null) {
+                throw new ScriptException("[" + SCRIPT_NAME + "]: Missing the field parameter");
+            }
+
+            // Example of an optional string parameter
+            String delimiter = params == null ? "," : XContentMapValues.nodeStringValue(params.get("delimiter"), ",");
+            return new SplitTransformScript(field, delimiter);
+        }
+    }
+
+    /**
+     * @param field     the field within _source to split
+     * @param delimiter the value to split on
+     */
+    private SplitTransformScript(String field, String delimiter) {
+        this.field = field;
+        this.delimiter = delimiter;
+    }
+
+    // this loads the ctx
+    @Override
+    public void setNextVar(String name, Object value) {
+        vars.put(name, value);
+    }
+
+    @Override
+    public Object run() {
+        // extract the source from the ctx
+        if (vars.containsKey("ctx") && vars.get("ctx") instanceof Map) {
+            Map<String, Object> ctx = (Map<String, Object>) vars.get("ctx");
+            if (ctx.containsKey("_source") && ctx.get("_source") instanceof Map) {
+                Map<String, Object> source = (Map<String, Object>) ctx.get("_source");
+
+                // only split if the field is a string
+                String fieldVal = XContentMapValues.nodeStringValue(source.get(field), null);
+                if (field != null) {
+                    // split the value and only overwrite existing value if the split was successful
+                    String[] splitVals = Strings.delimitedListToStringArray(fieldVal, delimiter);
+                    if (splitVals.length > 1) {
+                        source.put(field, splitVals);
+                    }
+                }
+            }
+
+            // return the context
+            return ctx;
+        }
+
+        // we should always have a ctx above, but if not, just return a null value
+        return null;
+    }
+}
diff --git a/src/test/java/org/elasticsearch/examples/nativescript/script/AbstractSearchScriptTests.java b/src/test/java/org/elasticsearch/examples/nativescript/script/AbstractSearchScriptTests.java
@@ -27,6 +27,7 @@ public Settings indexSettings() {
     @Override
     protected Settings nodeSettings(int nodeOrdinal) {
         return ImmutableSettings.settingsBuilder()
+                .put("plugins.load_classpath_plugins", true)
                 .put("gateway.type", "none")
                 .put(super.nodeSettings(nodeOrdinal))
                 .build();

diff --git a/src/test/java/org/elasticsearch/examples/nativescript/script/SplitTransformScriptTests.java b/src/test/java/org/elasticsearch/examples/nativescript/script/SplitTransformScriptTests.java
@@ -0,0 +1,124 @@
+package org.elasticsearch.examples.nativescript.script;
+
+import org.elasticsearch.action.ListenableActionFuture;
+import org.elasticsearch.action.index.IndexRequestBuilder;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.elasticsearch.index.query.FilterBuilders.termFilter;
+import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
+import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
+import static org.hamcrest.Matchers.equalTo;
+
+public class SplitTransformScriptTests extends AbstractSearchScriptTests {
+
+    @Test
+    public void testSplitTransformScript() throws Exception {
+
+        // Create a new index
+        // strcomma = test default delimiter
+        // strdash = test configured delimiter
+        String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
+                .startArray("transform")
+                .startObject()
+                .field("script", SplitTransformScript.SCRIPT_NAME)
+                .startObject("params")
+                .field("field", "strcomma")
+                .endObject()
+                .field("lang", "native")
+                .endObject()
+                .startObject()
+                .field("script", SplitTransformScript.SCRIPT_NAME)
+                .startObject("params")
+                .field("field", "strdash")
+                .field("delimiter", "-")
+                .endObject()
+                .field("lang", "native")
+                .endObject()
+                .endArray()
+                .startObject("properties")
+                .startObject("strcomma").field("type", "string").field("index", "not_analyzed").field("store", true).endObject()
+                .startObject("strdash").field("type", "string").field("index", "not_analyzed").field("store", true).endObject()
+                .startObject("num").field("type", "integer").endObject()
+                .endObject().endObject().endObject()
+                .string();
+
+        assertAcked(prepareCreate("test")
+                .addMapping("type", mapping));
+
+        List<IndexRequestBuilder> indexBuilders = new ArrayList<IndexRequestBuilder>();
+        // Index 100 records (0..99)
+        // str = 0,1,...,i
+        // num = i
+        StringBuilder strComma = new StringBuilder();
+        StringBuilder strDash = new StringBuilder();
+        for (int i = 0; i < 100; i++) {
+            if (i != 0) {
+                strComma.append(",");
+                strDash.append("-");
+            }
+            strComma.append(i);
+            strDash.append(i);
+            indexBuilders.add(
+                    client().prepareIndex("test", "type", Integer.toString(i))
+                            .setSource(XContentFactory.jsonBuilder().startObject()
+                                    .field("strcomma", strComma.toString())
+                                    .field("strdash", strDash.toString())
+                                    .field("num", i)
+                                    .endObject()));
+        }
+
+        // Index a few records with empty str
+        for (int i = 100; i < 105; i++) {
+            indexBuilders.add(
+                    client().prepareIndex("test", "type", Integer.toString(i))
+                            .setSource(XContentFactory.jsonBuilder().startObject()
+                                    .field("num", i)
+                                    .endObject()));
+        }
+
+        indexRandom(true, indexBuilders);
+
+        // test comma (default) delimiter
+        for (int i = 0; i < 105; i++) {
+            ListenableActionFuture<SearchResponse> commaFuture = client().prepareSearch("test")
+                    .setQuery(filteredQuery(matchAllQuery(), termFilter("strcomma", Integer.toString(i))))
+                    .addFields("strcomma", "num")
+                    .setSize(105)
+                    .execute();
+
+            ListenableActionFuture<SearchResponse> dashFuture = client().prepareSearch("test")
+                    .setQuery(filteredQuery(matchAllQuery(), termFilter("strdash", Integer.toString(i))))
+                    .addFields("strdash", "num")
+                    .setSize(105)
+                    .execute();
+
+            SearchResponse commaResp = commaFuture.actionGet();
+            SearchResponse dashResp = dashFuture.actionGet();
+
+            assertNoFailures(commaResp);
+            assertNoFailures(dashResp);
+
+            if (i >= 100) {
+                assertHitCount(commaResp, 0);
+                assertHitCount(dashResp, 0);
+            } else {
+                assertHitCount(commaResp, 100 - i);
+                assertHitCount(dashResp, 100 - i);
+            }
+
+            // Verify that they were actually split
+            for (int j = 0; j < 100 - i; j++) {
+                int commaNum = commaResp.getHits().getAt(j).field("num").value();
+                int dashNum = dashResp.getHits().getAt(j).field("num").value();
+                assertThat(commaResp.getHits().getAt(j).field("strcomma").values().size(), equalTo(commaNum + 1));
+                assertThat(dashResp.getHits().getAt(j).field("strdash").values().size(), equalTo(dashNum + 1));
+            }
+        }
+    }
+}