diff --git a/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcher.java b/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcher.java new file mode 100644 index 0000000..92f81b7 --- /dev/null +++ b/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcher.java @@ -0,0 +1,150 @@ +package casia.isiteam.zdr.neo4j.index; +/** + *         ┏┓ ┏┓+ + + *        ┏┛┻━━━━━━━┛┻┓ + + + *        ┃       ┃ + *        ┃   ━   ┃ ++ + + + + *        █████━█████ ┃+ + *        ┃       ┃ + + *        ┃   ┻   ┃ + *        ┃       ┃ + + + *        ┗━━┓    ┏━┛ + * ┃   ┃ + *          ┃   ┃ + + + + + *          ┃   ┃ Code is far away from bug with the animal protecting + *          ┃   ┃ + + *          ┃   ┃ + *          ┃   ┃  + + *          ┃    ┗━━━┓ + + + *          ┃      ┣┓ + *          ┃      ┏┛ + *          ┗┓┓┏━━━┳┓┏┛ + + + + + *           ┃┫┫  ┃┫┫ + *           ┗┻┛  ┗┻┛+ + + + + */ + +import org.apache.lucene.queryparser.classic.ParseException; +import org.neo4j.graphdb.DependencyResolver; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.NotFoundException; +import org.neo4j.graphdb.schema.IndexDefinition; +import org.neo4j.graphdb.schema.Schema; +import org.neo4j.internal.kernel.api.IndexReference; +import org.neo4j.internal.kernel.api.exceptions.schema.IndexNotFoundKernelException; +import org.neo4j.kernel.api.KernelTransaction; +import org.neo4j.kernel.api.impl.fulltext.FulltextAdapter; +import org.neo4j.kernel.api.impl.fulltext.FulltextProcedures; +import org.neo4j.kernel.api.impl.fulltext.ScoreEntityIterator; +import org.neo4j.kernel.impl.api.KernelTransactionImplementation; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; +import org.neo4j.storageengine.api.EntityType; +import org.neo4j.storageengine.api.schema.IndexDescriptor; +import org.neo4j.util.FeatureToggles; + +import java.io.IOException; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import static org.neo4j.procedure.Mode.READ; + +/** + * @author YanchaoMa yanchaoma@foxmail.com + * @PACKAGE_NAME: casia.isiteam.zdr.neo4j.index + * @Description: TODO(自定义全文检索得分算法) + * @date 2019/8/16 14:30 + */ +public class CustomizeFullTextSearcher { + + private static final long INDEX_ONLINE_QUERY_TIMEOUT_SECONDS = FeatureToggles.getInteger( + FulltextProcedures.class, "INDEX_ONLINE_QUERY_TIMEOUT_SECONDS", 30); + + @Context + public KernelTransaction tx; + + @Context + public GraphDatabaseService db; + + @Context + public DependencyResolver resolver; + + @Context + public FulltextAdapter accessor; + + /** + * @param name:索引名 + * @param query:查询STRING + * @return + * @Description: TODO(使用SimHash计算得分) + */ + @Description("Query the given fulltext index. Returns the matching nodes and their lucene query score, ordered by score.") + @Procedure(name = "db.index.fulltext.queryNodesBySimHash", mode = READ) + public Stream queryFulltextForNodesBySimHash(@Name("indexName") String name, @Name("queryString") String query) + throws ParseException, IndexNotFoundKernelException, IOException { + IndexReference indexReference = getValidIndexReference(name); + awaitOnline(indexReference); + EntityType entityType = indexReference.schema().entityType(); + if (entityType != EntityType.NODE) { + throw new IllegalArgumentException("The '" + name + "' index (" + indexReference + ") is an index on " + entityType + + ", so it cannot be queried for nodes."); + } + ScoreEntityIterator resultIterator = accessor.query(tx, name, query); +// return resultIterator.stream() +// .map(result -> NodeOutput.forExistingEntityOrNull(db, result)) +// .filter(Objects::nonNull); + return null; + } + + private IndexReference getValidIndexReference(@Name("indexName") String name) { + IndexReference indexReference = tx.schemaRead().indexGetForName(name); + if (indexReference == IndexReference.NO_INDEX) { + throw new IllegalArgumentException("There is no such fulltext schema index: " + name); + } + return indexReference; + } + + private void awaitOnline(IndexReference indexReference) throws IndexNotFoundKernelException { + // We do the isAdded check on the transaction state first, because indexGetState will grab a schema read-lock, which can deadlock on the write-lock + // held by the index populator. Also, if we index was created in this transaction, then we will never see it come online in this transaction anyway. + // Indexes don't come online until the transaction that creates them has committed. + if (!((KernelTransactionImplementation) tx).txState().indexDiffSetsBySchema(indexReference.schema()).isAdded((IndexDescriptor) indexReference)) { + // If the index was not created in this transaction, then wait for it to come online before querying. + Schema schema = db.schema(); + IndexDefinition index = schema.getIndexByName(indexReference.name()); + schema.awaitIndexOnline(index, INDEX_ONLINE_QUERY_TIMEOUT_SECONDS, TimeUnit.SECONDS); + } + // If the index was created in this transaction, then we skip this check entirely. + // We will get an exception later, when we try to get an IndexReader, so this is fine. + } + + public static final class NodeOutput + { + public final Node node; + public final double score; + + protected NodeOutput( Node node, double score ) + { + this.node = node; + this.score = score; + } + +// public static FulltextProcedures.NodeOutput forExistingEntityOrNull(GraphDatabaseService db, ScoreEntityIterator.ScoreEntry result ) +// { +// try +// { +// return new FulltextProcedures.NodeOutput( db.getNodeById( result.entityId() ), result.score() ); +// } +// catch ( NotFoundException ignore ) +// { +// // This node was most likely deleted by a concurrent transaction, so we just ignore it. +// return null; +// } +// } + } + +} + diff --git a/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeScoreEntityIterator.java b/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeScoreEntityIterator.java new file mode 100644 index 0000000..4df4d2c --- /dev/null +++ b/src/main/java/casia/isiteam/zdr/neo4j/index/CustomizeScoreEntityIterator.java @@ -0,0 +1,210 @@ +package casia.isiteam.zdr.neo4j.index; +/** + *         ┏┓ ┏┓+ + + *        ┏┛┻━━━━━━━┛┻┓ + + + *        ┃       ┃ + *        ┃   ━   ┃ ++ + + + + *        █████━█████ ┃+ + *        ┃       ┃ + + *        ┃   ┻   ┃ + *        ┃       ┃ + + + *        ┗━━┓    ┏━┛ + * ┃   ┃ + *          ┃   ┃ + + + + + *          ┃   ┃ Code is far away from bug with the animal protecting + *          ┃   ┃ + + *          ┃   ┃ + *          ┃   ┃  + + *          ┃    ┗━━━┓ + + + *          ┃      ┣┓ + *          ┃      ┏┛ + *          ┗┓┓┏━━━┳┓┏┛ + + + + + *           ┃┫┫  ┃┫┫ + *           ┗┻┛  ┗┻┛+ + + + + */ + +import org.neo4j.kernel.api.impl.index.collector.ValuesIterator; + +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * @author YanchaoMa yanchaoma@foxmail.com + * @PACKAGE_NAME: casia.isiteam.zdr.neo4j.index + * @Description: TODO(Iterator over entity ids together with their respective score.) + * @date 2019/8/16 16:41 + */ +public class CustomizeScoreEntityIterator implements Iterator { + private final ValuesIterator iterator; + private final Predicate predicate; + private CustomizeScoreEntityIterator.ScoreEntry next; + + CustomizeScoreEntityIterator( ValuesIterator sortedValuesIterator ) + { + this.iterator = sortedValuesIterator; + this.predicate = null; + } + + private CustomizeScoreEntityIterator( ValuesIterator sortedValuesIterator, Predicate predicate ) + { + this.iterator = sortedValuesIterator; + this.predicate = predicate; + } + + public Stream stream() + { + return StreamSupport.stream( Spliterators.spliteratorUnknownSize( this, Spliterator.ORDERED ), false ); + } + + @Override + public boolean hasNext() + { + while ( next == null && iterator.hasNext() ) + { + long entityId = iterator.next(); + float score = iterator.currentScore(); + CustomizeScoreEntityIterator.ScoreEntry tmp = new CustomizeScoreEntityIterator.ScoreEntry( entityId, score ); + if ( predicate == null || predicate.test( tmp ) ) + { + next = tmp; + } + } + return next != null; + } + + @Override + public CustomizeScoreEntityIterator.ScoreEntry next() + { + if ( hasNext() ) + { + CustomizeScoreEntityIterator.ScoreEntry tmp = next; + next = null; + return tmp; + } + else + { + throw new NoSuchElementException( "The iterator is exhausted" ); + } + } + + CustomizeScoreEntityIterator filter( Predicate predicate ) + { + if ( this.predicate != null ) + { + predicate = this.predicate.and( predicate ); + } + return new CustomizeScoreEntityIterator( iterator, predicate ); + } + + /** + * Merges the given iterators into a single iterator, that maintains the aggregate descending score sort order. + * + * @param iterators to concatenate + * @return a {@link CustomizeScoreEntityIterator} that iterates over all of the elements in all of the given iterators + */ + static CustomizeScoreEntityIterator mergeIterators( List iterators ) + { + return new CustomizeScoreEntityIterator.ConcatenatingScoreEntityIterator( iterators ); + } + + private static class ConcatenatingScoreEntityIterator extends CustomizeScoreEntityIterator + { + private final List iterators; + private final CustomizeScoreEntityIterator.ScoreEntry[] buffer; + private boolean fetched; + private CustomizeScoreEntityIterator.ScoreEntry nextHead; + + ConcatenatingScoreEntityIterator( List iterators ) + { + super( null ); + this.iterators = iterators; + this.buffer = new CustomizeScoreEntityIterator.ScoreEntry[iterators.size()]; + } + + @Override + public boolean hasNext() + { + if ( !fetched ) + { + fetch(); + } + return nextHead != null; + } + + private void fetch() + { + int candidateHead = -1; + for ( int i = 0; i < iterators.size(); i++ ) + { + CustomizeScoreEntityIterator.ScoreEntry entry = buffer[i]; + //Fill buffer if needed. + if ( entry == null && iterators.get( i ).hasNext() ) + { + entry = iterators.get( i ).next(); + buffer[i] = entry; + } + + //Check if entry might be candidate for next to return. + if ( entry != null && (nextHead == null || entry.score > nextHead.score) ) + { + nextHead = entry; + candidateHead = i; + } + } + if ( candidateHead != -1 ) + { + buffer[candidateHead] = null; + } + fetched = true; + } + + @Override + public CustomizeScoreEntityIterator.ScoreEntry next() + { + if ( hasNext() ) + { + fetched = false; + CustomizeScoreEntityIterator.ScoreEntry best = nextHead; + nextHead = null; + return best; + } + else + { + throw new NoSuchElementException( "The iterator is exhausted" ); + } + } + } + + /** + * A ScoreEntry consists of an entity id together with its score. + */ + static class ScoreEntry + { + private final long entityId; + private final float score; + + long entityId() + { + return entityId; + } + + float score() + { + return score; + } + + ScoreEntry( long entityId, float score ) + { + this.entityId = entityId; + this.score = score; + } + + @Override + public String toString() + { + return "ScoreEntry[entityId=" + entityId + ", score=" + score + "]"; + } + } +} diff --git a/src/test/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcherTest.java b/src/test/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcherTest.java new file mode 100644 index 0000000..87cc959 --- /dev/null +++ b/src/test/java/casia/isiteam/zdr/neo4j/index/CustomizeFullTextSearcherTest.java @@ -0,0 +1,239 @@ +package casia.isiteam.zdr.neo4j.index; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.RuleChain; +import org.junit.rules.Timeout; +import org.neo4j.graphdb.*; +import org.neo4j.graphdb.factory.GraphDatabaseBuilder; +import org.neo4j.graphdb.factory.GraphDatabaseFactory; +import org.neo4j.graphdb.factory.GraphDatabaseSettings; +import org.neo4j.harness.junit.Neo4jRule; +import org.neo4j.kernel.internal.GraphDatabaseAPI; +import org.neo4j.test.rule.CleanupRule; +import org.neo4j.test.rule.TestDirectory; +import org.neo4j.test.rule.VerboseTimeout; +import org.neo4j.test.rule.fs.DefaultFileSystemRule; + +import java.util.Arrays; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import static java.lang.String.format; +import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +/** + *         ┏┓ ┏┓+ + + *        ┏┛┻━━━━━━━┛┻┓ + + + *        ┃       ┃ + *        ┃   ━   ┃ ++ + + + + *        █████━█████ ┃+ + *        ┃       ┃ + + *        ┃   ┻   ┃ + *        ┃       ┃ + + + *        ┗━━┓    ┏━┛ + * ┃   ┃ + *          ┃   ┃ + + + + + *          ┃   ┃ Code is far away from bug with the animal protecting + *          ┃   ┃ + + *          ┃   ┃ + *          ┃   ┃  + + *          ┃    ┗━━━┓ + + + *          ┃      ┣┓ + *          ┃      ┏┛ + *          ┗┓┓┏━━━┳┓┏┛ + + + + + *           ┃┫┫  ┃┫┫ + *           ┗┻┛  ┗┻┛+ + + + + */ + +/** + * @author YanchaoMa yanchaoma@foxmail.com + * @PACKAGE_NAME: casia.isiteam.zdr.neo4j.index + * @Description: TODO(测试自定义得分计算) + * @date 2019/8/16 14:49 + */ +public class CustomizeFullTextSearcherTest { + + @Rule + public Neo4jRule neo4j = new Neo4jRule().withProcedure(CustomizeFullTextSearcher.class); + +// static final String QUERY_NODES = "CALL db.index.fulltext.queryNodesBySimHash(\"%s\", \"%s\")"; + static final String QUERY_NODES = "CALL db.index.fulltext.queryNodes(\"%s\", \"%s\")"; + + static final String NODE_CREATE = "CALL db.index.fulltext.createNodeIndex(\"%s\", %s, %s )"; + + private GraphDatabaseAPI db; + private GraphDatabaseBuilder builder; + + private final Timeout timeout = VerboseTimeout.builder().withTimeout(1, TimeUnit.HOURS).build(); + private final DefaultFileSystemRule fs = new DefaultFileSystemRule(); + private final TestDirectory testDirectory = TestDirectory.testDirectory(); + private final ExpectedException expectedException = ExpectedException.none(); + private final CleanupRule cleanup = new CleanupRule(); + + private static final Label LABEL = Label.label("Label"); + private static final String PROP = "prop"; + static final String NODE = "node"; + static final String RELATIONSHIP = "relationship"; + private static final String SCORE = "score"; + + @Rule + public final RuleChain rules = RuleChain.outerRule(timeout).around(fs).around(testDirectory).around(expectedException).around(cleanup); + + + @Before + public void before() { + GraphDatabaseFactory factory = new GraphDatabaseFactory(); + builder = factory.newEmbeddedDatabaseBuilder(testDirectory.databaseDir()); + builder.setConfig(GraphDatabaseSettings.store_internal_log_level, "DEBUG"); + } + + @After + public void tearDown() { + if (db != null) { + db.shutdown(); + } + } + + private GraphDatabaseAPI createDatabase() { + return (GraphDatabaseAPI) cleanup.add(builder.newGraphDatabase()); + } + + static String array(String... args) { + return Arrays.stream(args).map(s -> "\"" + s + "\"").collect(Collectors.joining(", ", "[", "]")); + } + + private void awaitIndexesOnline() { + try (Transaction tx = db.beginTx()) { + db.schema().awaitIndexesOnline(1, TimeUnit.MINUTES); + tx.success(); + } + } + + /** + * @param + * @return + * @Description: TODO(测试SimHash算法) + */ + @Test + public void queryFulltextForNodesBySimHash() { + + db = createDatabase(); + + try (Transaction tx = db.beginTx()) { + db.execute(format(NODE_CREATE, "nodes", array(LABEL.name()), array("prop1", "prop2"))).close(); + tx.success(); + } + long nodeId; + try (Transaction tx = db.beginTx()) { + Node node = db.createNode(LABEL); + nodeId = node.getId(); + node.setProperty("prop1", "foo"); + node.setProperty("prop2", "bar"); + tx.success(); + } + + awaitIndexesOnline(); + + try (Transaction tx = db.beginTx()) { + Node node = db.getNodeById(nodeId); + node.setProperty("prop2", 42); + tx.success(); + } + + try (Transaction tx = db.beginTx()) { + assertQueryFindsIds( db, true, "nodes", "foo", nodeId ); + Result result = db.execute(format(QUERY_NODES, "nodes", "bar")); + while (result.hasNext()) { + Map map = result.next(); + for (Map.Entry entry : map.entrySet()) { + System.out.println(entry.getKey() + " " + entry.getValue()); + } + } + result.close(); + tx.success(); + } + } + + /** + * @param + * @return + * @Description: TODO(测试SimHash算法) + */ + @Test + public void queryFulltextForNodesBySimHash_2() { + db = (GraphDatabaseAPI) neo4j.getGraphDatabaseService(); + + try (Transaction tx = db.beginTx()) { + db.execute(format(NODE_CREATE, "nodes", array(LABEL.name()), array("prop1", "prop2"))).close(); + tx.success(); + } + long nodeId; + try (Transaction tx = db.beginTx()) { + Node node = db.createNode(LABEL); + nodeId = node.getId(); + node.setProperty("prop1", "foo"); + node.setProperty("prop2", "bar"); + tx.success(); + } + + awaitIndexesOnline(); + + try (Transaction tx = db.beginTx()) { + Node node = db.getNodeById(nodeId); + node.setProperty("prop2", 42); + System.out.println(node.getId()); + System.out.println(node.getProperty("prop1")); + tx.success(); + } + + try (Transaction tx = db.beginTx()) { + assertQueryFindsIds( db, true, "nodes", "foo", nodeId ); + Result result = db.execute(format(QUERY_NODES, "nodes", "bar")); + while (result.hasNext()) { + Map map = result.next(); + for (Map.Entry entry : map.entrySet()) { + System.out.println(entry.getKey() + " " + entry.getValue()); + } + } + result.close(); + tx.success(); + } + } + + static void assertQueryFindsIds( GraphDatabaseService db, boolean queryNodes, String index, String query, long... ids ) + { + try ( Transaction tx = db.beginTx() ) + { + String queryCall = queryNodes ? QUERY_NODES : null; + Result result = db.execute( format( queryCall, index, query ) ); + int num = 0; + Double score = Double.MAX_VALUE; + while ( result.hasNext() ) + { + Map entry = result.next(); + Long nextId = ((Entity) entry.get( queryNodes ? NODE : RELATIONSHIP )).getId(); + Double nextScore = (Double) entry.get( SCORE ); +// assertThat( nextScore, lessThanOrEqualTo( score ) ); + score = nextScore; + if ( num < ids.length ) + { + assertEquals( format( "Result returned id %d, expected %d", nextId, ids[num] ), ids[num], nextId.longValue() ); + } + else + { + fail( format( "Result returned id %d, which is beyond the number of ids (%d) that were expected.", nextId, ids.length ) ); + } + num++; + } + assertEquals( "Number of results differ from expected", ids.length, num ); + tx.success(); + } + } + +} +