From 9b7ec3e91eaddf41ecbe7439dbe45db1a0941bfc Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Sun, 5 Nov 2023 17:56:39 +0800 Subject: [PATCH 01/16] chore: option rename --- .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 33d738440..7bcdaa7b8 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -37,8 +37,8 @@ public class RandomWalk implements Computation { private static final Logger LOG = Log.logger(RandomWalk.class); - public static final String OPTION_WALK_PER_NODE = "randomwalk.walk_per_node"; - public static final String OPTION_WALK_LENGTH = "randomwalk.walk_length"; + public static final String OPTION_WALK_PER_NODE = "random_walk.walk_per_node"; + public static final String OPTION_WALK_LENGTH = "random_walk.walk_length"; /** * number of times per vertex(source vertex) walks From 7d760ee8890fa37f734bfd85cf4a40a758d41be4 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Sun, 5 Nov 2023 20:16:00 +0800 Subject: [PATCH 02/16] feat: define second order random walk algorithm param --- .../algorithm/sampling/RandomWalk.java | 109 +++++++++++++++--- 1 file changed, 95 insertions(+), 14 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 7bcdaa7b8..45be1588c 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -17,6 +17,9 @@ package org.apache.hugegraph.computer.algorithm.sampling; +import java.util.Iterator; +import java.util.Random; + import org.apache.hugegraph.computer.core.common.exception.ComputerException; import org.apache.hugegraph.computer.core.config.Config; import org.apache.hugegraph.computer.core.graph.edge.Edge; @@ -30,9 +33,6 @@ import org.apache.hugegraph.util.Log; import org.slf4j.Logger; -import java.util.Iterator; -import java.util.Random; - public class RandomWalk implements Computation { private static final Logger LOG = Log.logger(RandomWalk.class); @@ -40,20 +40,56 @@ public class RandomWalk implements Computation { public static final String OPTION_WALK_PER_NODE = "random_walk.walk_per_node"; public static final String OPTION_WALK_LENGTH = "random_walk.walk_length"; + public static final String OPTION_WEIGHT_PROPERTY = "random_walk.weight_property"; + public static final String OPTION_WEIGHT_MIN_THRESHOLD = "random_walk.weight_min_threshold"; + public static final String OPTION_WEIGHT_MAX_THRESHOLD = "random_walk.weight_max_threshold"; + + public static final String OPTION_RETURN_FACTOR = "random_walk.return_factor"; + public static final String OPTION_INOUT_FACTOR = "random_walk.inout_factor"; + + /** + * Random + */ + private Random random; + /** - * number of times per vertex(source vertex) walks + * Number of times per vertex(source vertex) walks */ private Integer walkPerNode; /** - * walk length + * Walk length */ private Integer walkLength; /** - * random + * Weight property, related to the walking probability */ - private Random random; + private String weightProperty; + + /** + * Weight less than this threshold will be truncated. + * Default 0 + */ + private Integer weightMinThreshold; + + /** + * Weight greater than this threshold will be truncated. + * Default Integer.MAX_VALUE + */ + private Integer weightMaxThreshold; + + /** + * Controls the probability of re-walk to a previously walked vertex. + * Default 1 + */ + private Double returnFactor; + + /** + * Controls whether to walk inward or outward. + * Default 1 + */ + private Double inOutFactor; @Override public String category() { @@ -67,23 +103,68 @@ public String name() { @Override public void init(Config config) { + this.random = new Random(); + this.walkPerNode = config.getInt(OPTION_WALK_PER_NODE, 3); if (this.walkPerNode <= 0) { throw new ComputerException("The param %s must be greater than 0, " + - "actual got '%s'", - OPTION_WALK_PER_NODE, this.walkPerNode); + "actual got '%s'", + OPTION_WALK_PER_NODE, this.walkPerNode); } - LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_PER_NODE, walkPerNode); + LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_PER_NODE, this.walkPerNode); this.walkLength = config.getInt(OPTION_WALK_LENGTH, 3); if (this.walkLength <= 0) { throw new ComputerException("The param %s must be greater than 0, " + - "actual got '%s'", - OPTION_WALK_LENGTH, this.walkLength); + "actual got '%s'", + OPTION_WALK_LENGTH, this.walkLength); } - LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_LENGTH, walkLength); + LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_LENGTH, this.walkLength); - this.random = new Random(); + this.weightProperty = config.getString(OPTION_WEIGHT_PROPERTY, ""); + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_WEIGHT_PROPERTY, this.weightProperty); + + this.weightMinThreshold = config.getInt(OPTION_WEIGHT_MIN_THRESHOLD, 0); + if (this.weightMinThreshold < 0) { + throw new ComputerException("The param %s must be greater than or equal 0, " + + "actual got '%s'", + OPTION_WEIGHT_MIN_THRESHOLD, this.weightMinThreshold); + } + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_WEIGHT_MIN_THRESHOLD, this.weightMinThreshold); + + this.weightMaxThreshold = config.getInt(OPTION_WEIGHT_MAX_THRESHOLD, Integer.MAX_VALUE); + if (this.weightMaxThreshold < 0) { + throw new ComputerException("The param %s must be greater than or equal 0, " + + "actual got '%s'", + OPTION_WEIGHT_MAX_THRESHOLD, this.weightMaxThreshold); + } + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_WEIGHT_MAX_THRESHOLD, this.weightMaxThreshold); + + if (this.weightMinThreshold > this.weightMaxThreshold) { + throw new ComputerException("%s must be greater than or equal %s, ", + OPTION_WEIGHT_MAX_THRESHOLD, OPTION_WEIGHT_MIN_THRESHOLD); + } + + this.returnFactor = config.getDouble(OPTION_RETURN_FACTOR, 1); + if (this.returnFactor <= 0) { + throw new ComputerException("The param %s must be greater than 0, " + + "actual got '%s'", + OPTION_RETURN_FACTOR, this.returnFactor); + } + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_RETURN_FACTOR, this.returnFactor); + + this.inOutFactor = config.getDouble(OPTION_INOUT_FACTOR, 1); + if (this.inOutFactor <= 0) { + throw new ComputerException("The param %s must be greater than 0, " + + "actual got '%s'", + OPTION_INOUT_FACTOR, this.inOutFactor); + } + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_INOUT_FACTOR, this.inOutFactor); } @Override From 5d8eb983fb0d8fb7daa14d9d2938a00fb4b8a8db Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Sun, 5 Nov 2023 20:47:53 +0800 Subject: [PATCH 03/16] chore: code format --- .../algorithm/sampling/RandomWalkMessage.java | 6 ++-- .../algorithm/sampling/RandomWalkOutput.java | 16 ++++----- .../algorithm/sampling/RandomWalkParams.java | 12 +++---- .../algorithm/sampling/RandomWalkTest.java | 33 ++++++++++--------- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java index bf32ee75c..45fa5db48 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java @@ -17,6 +17,9 @@ package org.apache.hugegraph.computer.algorithm.sampling; +import java.io.IOException; +import java.util.List; + import org.apache.hugegraph.computer.core.graph.value.BooleanValue; import org.apache.hugegraph.computer.core.graph.value.IdList; import org.apache.hugegraph.computer.core.graph.value.Value; @@ -24,9 +27,6 @@ import org.apache.hugegraph.computer.core.io.RandomAccessInput; import org.apache.hugegraph.computer.core.io.RandomAccessOutput; -import java.io.IOException; -import java.util.List; - public class RandomWalkMessage implements Value.CustomizeValue> { /** diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java index ad43d5bd7..47ef2d58f 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java @@ -17,23 +17,23 @@ package org.apache.hugegraph.computer.algorithm.sampling; +import java.util.ArrayList; +import java.util.List; + import org.apache.hugegraph.computer.core.graph.value.IdListList; import org.apache.hugegraph.computer.core.graph.vertex.Vertex; import org.apache.hugegraph.computer.core.output.hg.HugeGraphOutput; -import java.util.ArrayList; -import java.util.List; - public class RandomWalkOutput extends HugeGraphOutput> { @Override protected void prepareSchema() { this.client().schema().propertyKey(this.name()) - .asText() - .writeType(this.writeType()) - .valueList() - .ifNotExist() - .create(); + .asText() + .writeType(this.writeType()) + .valueList() + .ifNotExist() + .create(); } @Override diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java index 273d7fd67..cd8b49688 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java @@ -17,24 +17,24 @@ package org.apache.hugegraph.computer.algorithm.sampling; +import java.util.Map; + import org.apache.hugegraph.computer.algorithm.AlgorithmParams; import org.apache.hugegraph.computer.core.config.ComputerOptions; import org.apache.hugegraph.computer.core.graph.value.IdListList; -import java.util.Map; - public class RandomWalkParams implements AlgorithmParams { @Override public void setAlgorithmParameters(Map params) { this.setIfAbsent(params, ComputerOptions.WORKER_COMPUTATION_CLASS, - RandomWalk.class.getName()); + RandomWalk.class.getName()); this.setIfAbsent(params, ComputerOptions.ALGORITHM_MESSAGE_CLASS, - RandomWalkMessage.class.getName()); + RandomWalkMessage.class.getName()); this.setIfAbsent(params, ComputerOptions.ALGORITHM_RESULT_CLASS, - IdListList.class.getName()); + IdListList.class.getName()); this.setIfAbsent(params, ComputerOptions.OUTPUT_CLASS, - RandomWalkOutput.class.getName()); + RandomWalkOutput.class.getName()); this.setIfAbsent(params, RandomWalk.OPTION_WALK_PER_NODE, "3"); this.setIfAbsent(params, RandomWalk.OPTION_WALK_LENGTH, "3"); diff --git a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java index 5c09af0a7..d5cfd3a44 100644 --- a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java +++ b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java @@ -17,8 +17,11 @@ package org.apache.hugegraph.computer.algorithm.sampling; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.hugegraph.computer.algorithm.AlgorithmTestBase; import org.apache.hugegraph.computer.core.config.ComputerOptions; import org.apache.hugegraph.computer.core.graph.id.Id; @@ -34,10 +37,8 @@ import org.junit.Test; import org.slf4j.Logger; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; public class RandomWalkTest extends AlgorithmTestBase { @@ -59,14 +60,14 @@ public static void setup() { SchemaManager schema = client.schema(); schema.vertexLabel("user") - .useCustomizeStringId() - .ifNotExist() - .create(); + .useCustomizeStringId() + .ifNotExist() + .create(); schema.edgeLabel("know") - .sourceLabel("user") - .targetLabel("user") - .ifNotExist() - .create(); + .sourceLabel("user") + .targetLabel("user") + .ifNotExist() + .create(); GraphManager graph = client.graph(); Vertex vA = graph.addVertex(T.LABEL, "user", T.ID, "A"); @@ -111,11 +112,11 @@ public static class RandomWalkTestParams extends RandomWalkParams { @Override public void setAlgorithmParameters(Map params) { this.setIfAbsent(params, ComputerOptions.OUTPUT_CLASS, - RandomWalkTest.RandomWalkTestOutput.class.getName()); + RandomWalkTest.RandomWalkTestOutput.class.getName()); this.setIfAbsent(params, RandomWalk.OPTION_WALK_PER_NODE, - WALK_PER_NODE.toString()); + WALK_PER_NODE.toString()); this.setIfAbsent(params, RandomWalk.OPTION_WALK_LENGTH, - WALK_LENGTH.toString()); + WALK_LENGTH.toString()); super.setAlgorithmParameters(params); } From 3e4da8d1f22a15d0d5e357be4028df18d10a30c6 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Sun, 5 Nov 2023 23:13:01 +0800 Subject: [PATCH 04/16] feat: add defaultWeight param --- .../algorithm/sampling/RandomWalk.java | 43 +++++++++++++------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 45be1588c..232cebbf3 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -41,8 +41,9 @@ public class RandomWalk implements Computation { public static final String OPTION_WALK_LENGTH = "random_walk.walk_length"; public static final String OPTION_WEIGHT_PROPERTY = "random_walk.weight_property"; - public static final String OPTION_WEIGHT_MIN_THRESHOLD = "random_walk.weight_min_threshold"; - public static final String OPTION_WEIGHT_MAX_THRESHOLD = "random_walk.weight_max_threshold"; + public static final String OPTION_DEFAULT_WEIGHT = "random_walk.default_weight"; + public static final String OPTION_MIN_WEIGHT_THRESHOLD = "random_walk.min_weight_threshold"; + public static final String OPTION_MAX_WEIGHT_THRESHOLD = "random_walk.max_weight_threshold"; public static final String OPTION_RETURN_FACTOR = "random_walk.return_factor"; public static final String OPTION_INOUT_FACTOR = "random_walk.inout_factor"; @@ -67,17 +68,22 @@ public class RandomWalk implements Computation { */ private String weightProperty; + /** + * Default 1 + */ + private Double defaultWeight; + /** * Weight less than this threshold will be truncated. * Default 0 */ - private Integer weightMinThreshold; + private Integer minWeightThreshold; /** * Weight greater than this threshold will be truncated. * Default Integer.MAX_VALUE */ - private Integer weightMaxThreshold; + private Integer maxWeightThreshold; /** * Controls the probability of re-walk to a previously walked vertex. @@ -125,27 +131,36 @@ public void init(Config config) { LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WEIGHT_PROPERTY, this.weightProperty); - this.weightMinThreshold = config.getInt(OPTION_WEIGHT_MIN_THRESHOLD, 0); - if (this.weightMinThreshold < 0) { + this.defaultWeight = config.getDouble(OPTION_DEFAULT_WEIGHT, 1); + if (this.defaultWeight <= 0) { + throw new ComputerException("The param %s must be greater than 0, " + + "actual got '%s'", + OPTION_DEFAULT_WEIGHT, this.defaultWeight); + } + LOG.info("[RandomWalk] algorithm param, {}: {}", + OPTION_DEFAULT_WEIGHT, this.defaultWeight); + + this.minWeightThreshold = config.getInt(OPTION_MIN_WEIGHT_THRESHOLD, 0); + if (this.minWeightThreshold < 0) { throw new ComputerException("The param %s must be greater than or equal 0, " + "actual got '%s'", - OPTION_WEIGHT_MIN_THRESHOLD, this.weightMinThreshold); + OPTION_MIN_WEIGHT_THRESHOLD, this.minWeightThreshold); } LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_WEIGHT_MIN_THRESHOLD, this.weightMinThreshold); + OPTION_MIN_WEIGHT_THRESHOLD, this.minWeightThreshold); - this.weightMaxThreshold = config.getInt(OPTION_WEIGHT_MAX_THRESHOLD, Integer.MAX_VALUE); - if (this.weightMaxThreshold < 0) { + this.maxWeightThreshold = config.getInt(OPTION_MAX_WEIGHT_THRESHOLD, Integer.MAX_VALUE); + if (this.maxWeightThreshold < 0) { throw new ComputerException("The param %s must be greater than or equal 0, " + "actual got '%s'", - OPTION_WEIGHT_MAX_THRESHOLD, this.weightMaxThreshold); + OPTION_MAX_WEIGHT_THRESHOLD, this.maxWeightThreshold); } LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_WEIGHT_MAX_THRESHOLD, this.weightMaxThreshold); + OPTION_MAX_WEIGHT_THRESHOLD, this.maxWeightThreshold); - if (this.weightMinThreshold > this.weightMaxThreshold) { + if (this.minWeightThreshold > this.maxWeightThreshold) { throw new ComputerException("%s must be greater than or equal %s, ", - OPTION_WEIGHT_MAX_THRESHOLD, OPTION_WEIGHT_MIN_THRESHOLD); + OPTION_MAX_WEIGHT_THRESHOLD, OPTION_MIN_WEIGHT_THRESHOLD); } this.returnFactor = config.getDouble(OPTION_RETURN_FACTOR, 1); From a2b6efc4635f334729efe7d374b6004e19c05d40 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Tue, 7 Nov 2023 23:52:58 +0800 Subject: [PATCH 05/16] feat: implement second order random walk algorithm --- .../algorithm/sampling/RandomWalk.java | 125 ++++++++++++++++-- .../algorithm/sampling/RandomWalkMessage.java | 21 ++- 2 files changed, 131 insertions(+), 15 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 232cebbf3..cdd48d1d6 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -17,7 +17,9 @@ package org.apache.hugegraph.computer.algorithm.sampling; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Random; import org.apache.hugegraph.computer.core.common.exception.ComputerException; @@ -25,8 +27,10 @@ import org.apache.hugegraph.computer.core.graph.edge.Edge; import org.apache.hugegraph.computer.core.graph.edge.Edges; import org.apache.hugegraph.computer.core.graph.id.Id; +import org.apache.hugegraph.computer.core.graph.value.DoubleValue; import org.apache.hugegraph.computer.core.graph.value.IdList; import org.apache.hugegraph.computer.core.graph.value.IdListList; +import org.apache.hugegraph.computer.core.graph.value.Value; import org.apache.hugegraph.computer.core.graph.vertex.Vertex; import org.apache.hugegraph.computer.core.worker.Computation; import org.apache.hugegraph.computer.core.worker.ComputationContext; @@ -191,14 +195,16 @@ public void compute0(ComputationContext context, Vertex vertex) { if (vertex.numEdges() <= 0) { // isolated vertex - this.savePath(vertex, message.path()); // save result + this.savePath(vertex, message.path()); vertex.inactivate(); return; } + vertex.edges().forEach(edge -> message.addToPreVertexAdjacence(edge.targetId())); + for (int i = 0; i < walkPerNode; ++i) { // random select one edge and walk - Edge selectedEdge = this.randomSelectEdge(vertex.edges()); + Edge selectedEdge = this.randomSelectEdge(null, null, vertex.edges()); context.sendMessage(selectedEdge.targetId(), message); } } @@ -208,9 +214,11 @@ public void compute(ComputationContext context, Vertex vertex, Iterator messages) { while (messages.hasNext()) { RandomWalkMessage message = messages.next(); + // the last id of path is the previous id + Id preVertexId = message.path().getLast(); if (message.isFinish()) { - this.savePath(vertex, message.path()); // save result + this.savePath(vertex, message.path()); vertex.inactivate(); continue; @@ -219,7 +227,7 @@ public void compute(ComputationContext context, Vertex vertex, message.addToPath(vertex); if (vertex.numEdges() <= 0) { - // there is nowhere to walk,finish eariler + // there is nowhere to walk, finish eariler message.finish(); context.sendMessage(this.getSourceId(message.path()), message); @@ -233,7 +241,7 @@ public void compute(ComputationContext context, Vertex vertex, if (vertex.id().equals(sourceId)) { // current vertex is the source vertex,no need to send message once more - this.savePath(vertex, message.path()); // save result + this.savePath(vertex, message.path()); } else { context.sendMessage(sourceId, message); } @@ -242,8 +250,11 @@ public void compute(ComputationContext context, Vertex vertex, continue; } + vertex.edges().forEach(edge -> message.addToPreVertexAdjacence(edge.targetId())); + // random select one edge and walk - Edge selectedEdge = this.randomSelectEdge(vertex.edges()); + Edge selectedEdge = this.randomSelectEdge(preVertexId, message.preVertexAdjacence(), + vertex.edges()); context.sendMessage(selectedEdge.targetId(), message); } } @@ -251,20 +262,108 @@ public void compute(ComputationContext context, Vertex vertex, /** * random select one edge */ - private Edge randomSelectEdge(Edges edges) { - Edge selectedEdge = null; - int randomNum = random.nextInt(edges.size()); + private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, Edges edges) { + List weightList = new ArrayList<>(); - int i = 0; Iterator iterator = edges.iterator(); while (iterator.hasNext()) { - selectedEdge = iterator.next(); - if (i == randomNum) { + Edge edge = iterator.next(); + // calculate weight + Value weight = this.getWeight(edge); + Double finalWeight = this.calculateWeight(preVertexId, preVertexAdjacenceIdList, + edge.targetId(), weight); + weightList.add(finalWeight); + } + + int selectedIndex = this.randomSelectIndex(weightList); + Edge selectedEdge = this.selectEdge(edges.iterator(), selectedIndex); + return selectedEdge; + } + + /** + * get edge weight by weight property + */ + private Value getWeight(Edge edge) { + Value weight = edge.property(this.weightProperty); + if (weight == null) { + weight.assign(new DoubleValue(this.defaultWeight)); + } + + if (!weight.isNumber()) { + throw new ComputerException("The value of %s must be a numeric value, " + + "actual got '%s'", + this.weightProperty, weight.string()); + } + return weight; + } + + /** + * calculate edge weight + */ + private Double calculateWeight(Id preVertexId, IdList preVertexAdjacenceIdList, + Id nextVertexId, Value weight) { + /* + * 3 types of vertices. + * 1. current vertex, called v + * 2. previous vertex, called t + * 3. current vertex outer vertex, called x(x1, x2.. xn) + * + * Definition of weight correction coefficient α: + * if distance(t, x) = 0, then α = 1.0 / returnFactor + * if distance(t, x) = 1, then α = 1.0 + * if distance(t, x) = 2, then α = 1.0 / inOutFactor + * + * Final edge weight π(v, x) = α * edgeWeight + */ + Double finalWeight = 0.0; + if (preVertexId != null && preVertexId.equals(nextVertexId)) { + // distance(t, x) = 0 + finalWeight = 1.0 / this.returnFactor * (Double) weight.value(); + } else if (preVertexAdjacenceIdList != null + && preVertexAdjacenceIdList.contains(nextVertexId)) { + // distance(t, x) = 1 + finalWeight = 1.0 * (Double) weight.value(); + } else { + // distance(t, x) = 2 + finalWeight = 1.0 / this.inOutFactor * (Double) weight.value(); + } + return finalWeight; + } + + /** + * random select index + */ + private int randomSelectIndex(List weightList) { + int selectedIndex = 0; + double totalWeight = weightList.stream().mapToDouble(Double::doubleValue).sum(); + double randomNum = random.nextDouble() * totalWeight; // [0, totalWeight) + + // determine which interval the random number falls into + double cumulativeWeight = 0; + for (int i = 0; i < weightList.size(); ++i) { + cumulativeWeight += weightList.get(i); + if (randomNum < cumulativeWeight) { + selectedIndex = i; break; } - i++; } + return selectedIndex; + } + /** + * select edge from iterator by index + */ + private Edge selectEdge(Iterator iterator, int selectedIndex) { + Edge selectedEdge = null; + + int index = 0; + while (iterator.hasNext()) { + selectedEdge = iterator.next(); + if (index == selectedIndex) { + break; + } + index++; + } return selectedEdge; } diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java index 45fa5db48..6d92781ac 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkMessage.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.List; +import org.apache.hugegraph.computer.core.graph.id.Id; import org.apache.hugegraph.computer.core.graph.value.BooleanValue; import org.apache.hugegraph.computer.core.graph.value.IdList; import org.apache.hugegraph.computer.core.graph.value.Value; @@ -30,28 +31,36 @@ public class RandomWalkMessage implements Value.CustomizeValue> { /** - * random walk path + * Previous vertex adjacent(out edge) vertex id list + */ + private final IdList preVertexAdjacence; + + /** + * Random walk path */ private final IdList path; /** - * finish flag + * Finish flag */ private BooleanValue isFinish; public RandomWalkMessage() { + this.preVertexAdjacence = new IdList(); this.path = new IdList(); this.isFinish = new BooleanValue(false); } @Override public void read(RandomAccessInput in) throws IOException { + this.preVertexAdjacence.read(in); this.path.read(in); this.isFinish.read(in); } @Override public void write(RandomAccessOutput out) throws IOException { + this.preVertexAdjacence.write(out); this.path.write(out); this.isFinish.write(out); } @@ -61,6 +70,14 @@ public List value() { return this.path.value(); } + public IdList preVertexAdjacence() { + return this.preVertexAdjacence; + } + + public void addToPreVertexAdjacence(Id vertexId) { + this.preVertexAdjacence.add(vertexId); + } + public IdList path() { return this.path; } From 0c14381a7a2e2a0021210c7659bff4c9010efa4e Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Wed, 8 Nov 2023 00:04:26 +0800 Subject: [PATCH 06/16] feat: ListValue support getFirst --- .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 3 ++- .../hugegraph/computer/core/graph/value/ListValue.java | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index cdd48d1d6..ffb257c3d 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -73,6 +73,7 @@ public class RandomWalk implements Computation { private String weightProperty; /** + * Biased walk * Default 1 */ private Double defaultWeight; @@ -372,7 +373,7 @@ private Edge selectEdge(Iterator iterator, int selectedIndex) { */ private Id getSourceId(IdList path) { // the first id of path is the source id - return path.get(0); + return path.getFirst(); } /** diff --git a/computer-api/src/main/java/org/apache/hugegraph/computer/core/graph/value/ListValue.java b/computer-api/src/main/java/org/apache/hugegraph/computer/core/graph/value/ListValue.java index 4813ec19c..c9f5d05ed 100644 --- a/computer-api/src/main/java/org/apache/hugegraph/computer/core/graph/value/ListValue.java +++ b/computer-api/src/main/java/org/apache/hugegraph/computer/core/graph/value/ListValue.java @@ -90,6 +90,13 @@ public T get(int index) { return this.values.get(index); } + public T getFirst() { + if (this.values.size() == 0) { + throw new NoSuchElementException("The list is empty"); + } + return this.values.get(0); + } + public T getLast() { int index = this.values.size() - 1; if (index < 0) { From 5914bfffb459c0db4f5d6df247695a50e5c90abe Mon Sep 17 00:00:00 2001 From: diaohancai Date: Wed, 8 Nov 2023 20:52:28 +0800 Subject: [PATCH 07/16] bug: null point --- .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index ffb257c3d..b232f84ac 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -287,7 +287,7 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E private Value getWeight(Edge edge) { Value weight = edge.property(this.weightProperty); if (weight == null) { - weight.assign(new DoubleValue(this.defaultWeight)); + weight = new DoubleValue(this.defaultWeight); } if (!weight.isNumber()) { @@ -320,8 +320,8 @@ private Double calculateWeight(Id preVertexId, IdList preVertexAdjacenceIdList, if (preVertexId != null && preVertexId.equals(nextVertexId)) { // distance(t, x) = 0 finalWeight = 1.0 / this.returnFactor * (Double) weight.value(); - } else if (preVertexAdjacenceIdList != null - && preVertexAdjacenceIdList.contains(nextVertexId)) { + } else if (preVertexAdjacenceIdList != null && + preVertexAdjacenceIdList.contains(nextVertexId)) { // distance(t, x) = 1 finalWeight = 1.0 * (Double) weight.value(); } else { From 77d3752e360ec809ddf686625c999010676611c5 Mon Sep 17 00:00:00 2001 From: diaohancai Date: Wed, 8 Nov 2023 20:55:14 +0800 Subject: [PATCH 08/16] chore: supplementary test parameters --- .../algorithm/sampling/RandomWalkTest.java | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java index d5cfd3a44..f3829a81b 100644 --- a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java +++ b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java @@ -42,6 +42,8 @@ public class RandomWalkTest extends AlgorithmTestBase { + private static final String PROPERTY_KEY = "frequency"; + private static final Map> EXPECT_WALK_PATH = ImmutableMap.of( "F", ImmutableList.of( @@ -59,6 +61,10 @@ public static void setup() { HugeClient client = client(); SchemaManager schema = client.schema(); + schema.propertyKey(PROPERTY_KEY) + .asInt() + .ifNotExist() + .create(); schema.vertexLabel("user") .useCustomizeStringId() .ifNotExist() @@ -66,6 +72,7 @@ public static void setup() { schema.edgeLabel("know") .sourceLabel("user") .targetLabel("user") + .properties(PROPERTY_KEY) .ifNotExist() .create(); @@ -81,17 +88,17 @@ public static void setup() { Vertex vF = graph.addVertex(T.LABEL, "user", T.ID, "F"); Vertex vG = graph.addVertex(T.LABEL, "user", T.ID, "G"); - vA.addEdge("know", vB); - vA.addEdge("know", vC); - vA.addEdge("know", vD); - vB.addEdge("know", vC); - vC.addEdge("know", vA); - vC.addEdge("know", vE); - vD.addEdge("know", vA); - vD.addEdge("know", vC); - vE.addEdge("know", vD); - - vF.addEdge("know", vG); + vA.addEdge("know", vB, PROPERTY_KEY, 9); + vA.addEdge("know", vC, PROPERTY_KEY, 1); + vA.addEdge("know", vD, PROPERTY_KEY, 3); + vB.addEdge("know", vC, PROPERTY_KEY, 2); + vC.addEdge("know", vA, PROPERTY_KEY, 1); + vC.addEdge("know", vE, PROPERTY_KEY, 2); + vD.addEdge("know", vA, PROPERTY_KEY, 7); + vD.addEdge("know", vC, PROPERTY_KEY, 1); + vE.addEdge("know", vD, PROPERTY_KEY, 8); + + vF.addEdge("know", vG, PROPERTY_KEY, 5); } @AfterClass @@ -109,6 +116,14 @@ public static class RandomWalkTestParams extends RandomWalkParams { private static Integer WALK_PER_NODE = 3; private static Integer WALK_LENGTH = 3; + private static String WEIGHT_PROPERTY = PROPERTY_KEY; + private static Double DEFAULT_WEIGHT = 1.0; + private static Integer MIN_WEIGHT_THRESHOLD = 0; + private static Integer MAX_WEIGHT_THRESHOLD = Integer.MAX_VALUE; + + private static Double RETURN_FACTOR = 2.0; + private static Double INOUT_FACTOR = 1.0 / 2.0; + @Override public void setAlgorithmParameters(Map params) { this.setIfAbsent(params, ComputerOptions.OUTPUT_CLASS, @@ -118,6 +133,20 @@ public void setAlgorithmParameters(Map params) { this.setIfAbsent(params, RandomWalk.OPTION_WALK_LENGTH, WALK_LENGTH.toString()); + this.setIfAbsent(params, RandomWalk.OPTION_WEIGHT_PROPERTY, + WEIGHT_PROPERTY); + this.setIfAbsent(params, RandomWalk.OPTION_DEFAULT_WEIGHT, + DEFAULT_WEIGHT.toString()); + this.setIfAbsent(params, RandomWalk.OPTION_MIN_WEIGHT_THRESHOLD, + MIN_WEIGHT_THRESHOLD.toString()); + this.setIfAbsent(params, RandomWalk.OPTION_MAX_WEIGHT_THRESHOLD, + MAX_WEIGHT_THRESHOLD.toString()); + + this.setIfAbsent(params, RandomWalk.OPTION_RETURN_FACTOR, + RETURN_FACTOR.toString()); + this.setIfAbsent(params, RandomWalk.OPTION_INOUT_FACTOR, + INOUT_FACTOR.toString()); + super.setAlgorithmParameters(params); } } From 70d8f0f301788af2893d8558d342f9bf1780f3c7 Mon Sep 17 00:00:00 2001 From: diaohancai Date: Wed, 8 Nov 2023 21:10:49 +0800 Subject: [PATCH 09/16] optimize: change weightThreshold parameter to Double type --- .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 8 ++++---- .../computer/algorithm/sampling/RandomWalkTest.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index b232f84ac..3eb24122b 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -82,13 +82,13 @@ public class RandomWalk implements Computation { * Weight less than this threshold will be truncated. * Default 0 */ - private Integer minWeightThreshold; + private Double minWeightThreshold; /** * Weight greater than this threshold will be truncated. * Default Integer.MAX_VALUE */ - private Integer maxWeightThreshold; + private Double maxWeightThreshold; /** * Controls the probability of re-walk to a previously walked vertex. @@ -145,7 +145,7 @@ public void init(Config config) { LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_DEFAULT_WEIGHT, this.defaultWeight); - this.minWeightThreshold = config.getInt(OPTION_MIN_WEIGHT_THRESHOLD, 0); + this.minWeightThreshold = config.getDouble(OPTION_MIN_WEIGHT_THRESHOLD, 0.0); if (this.minWeightThreshold < 0) { throw new ComputerException("The param %s must be greater than or equal 0, " + "actual got '%s'", @@ -154,7 +154,7 @@ public void init(Config config) { LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_MIN_WEIGHT_THRESHOLD, this.minWeightThreshold); - this.maxWeightThreshold = config.getInt(OPTION_MAX_WEIGHT_THRESHOLD, Integer.MAX_VALUE); + this.maxWeightThreshold = config.getDouble(OPTION_MAX_WEIGHT_THRESHOLD, Double.MAX_VALUE); if (this.maxWeightThreshold < 0) { throw new ComputerException("The param %s must be greater than or equal 0, " + "actual got '%s'", diff --git a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java index f3829a81b..1bb57e7f9 100644 --- a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java +++ b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java @@ -118,8 +118,8 @@ public static class RandomWalkTestParams extends RandomWalkParams { private static String WEIGHT_PROPERTY = PROPERTY_KEY; private static Double DEFAULT_WEIGHT = 1.0; - private static Integer MIN_WEIGHT_THRESHOLD = 0; - private static Integer MAX_WEIGHT_THRESHOLD = Integer.MAX_VALUE; + private static Double MIN_WEIGHT_THRESHOLD = 0.0; + private static Double MAX_WEIGHT_THRESHOLD = Double.MAX_VALUE; private static Double RETURN_FACTOR = 2.0; private static Double INOUT_FACTOR = 1.0 / 2.0; From 548148e4691a80cd96cbb83b20b4918fea0bbc14 Mon Sep 17 00:00:00 2001 From: diaohancai Date: Wed, 8 Nov 2023 21:28:01 +0800 Subject: [PATCH 10/16] optimize: weight threshold truncation --- .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 8 ++++++++ .../computer/algorithm/sampling/RandomWalkTest.java | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 3eb24122b..ceaaaa48d 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -295,6 +295,14 @@ private Value getWeight(Edge edge) { "actual got '%s'", this.weightProperty, weight.string()); } + + // weight threshold truncation + if ((Double) weight.value() < this.minWeightThreshold) { + weight = new DoubleValue(this.minWeightThreshold); + } + if ((Double) weight.value() > this.maxWeightThreshold) { + weight = new DoubleValue(this.maxWeightThreshold); + } return weight; } diff --git a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java index 1bb57e7f9..c4e2940e6 100644 --- a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java +++ b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java @@ -118,8 +118,8 @@ public static class RandomWalkTestParams extends RandomWalkParams { private static String WEIGHT_PROPERTY = PROPERTY_KEY; private static Double DEFAULT_WEIGHT = 1.0; - private static Double MIN_WEIGHT_THRESHOLD = 0.0; - private static Double MAX_WEIGHT_THRESHOLD = Double.MAX_VALUE; + private static Double MIN_WEIGHT_THRESHOLD = 3.0; + private static Double MAX_WEIGHT_THRESHOLD = 7.0; private static Double RETURN_FACTOR = 2.0; private static Double INOUT_FACTOR = 1.0 / 2.0; From b4b94b161a5b7f06d73d80533485752a631ddd16 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Mon, 13 Nov 2023 16:06:42 +0800 Subject: [PATCH 11/16] optimize: method optimize --- .../algorithm/sampling/RandomWalk.java | 53 +++++++------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index ceaaaa48d..6426f43e7 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -122,7 +122,6 @@ public void init(Config config) { "actual got '%s'", OPTION_WALK_PER_NODE, this.walkPerNode); } - LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_PER_NODE, this.walkPerNode); this.walkLength = config.getInt(OPTION_WALK_LENGTH, 3); if (this.walkLength <= 0) { @@ -130,11 +129,8 @@ public void init(Config config) { "actual got '%s'", OPTION_WALK_LENGTH, this.walkLength); } - LOG.info("[RandomWalk] algorithm param, {}: {}", OPTION_WALK_LENGTH, this.walkLength); this.weightProperty = config.getString(OPTION_WEIGHT_PROPERTY, ""); - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_WEIGHT_PROPERTY, this.weightProperty); this.defaultWeight = config.getDouble(OPTION_DEFAULT_WEIGHT, 1); if (this.defaultWeight <= 0) { @@ -142,8 +138,6 @@ public void init(Config config) { "actual got '%s'", OPTION_DEFAULT_WEIGHT, this.defaultWeight); } - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_DEFAULT_WEIGHT, this.defaultWeight); this.minWeightThreshold = config.getDouble(OPTION_MIN_WEIGHT_THRESHOLD, 0.0); if (this.minWeightThreshold < 0) { @@ -151,8 +145,6 @@ public void init(Config config) { "actual got '%s'", OPTION_MIN_WEIGHT_THRESHOLD, this.minWeightThreshold); } - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_MIN_WEIGHT_THRESHOLD, this.minWeightThreshold); this.maxWeightThreshold = config.getDouble(OPTION_MAX_WEIGHT_THRESHOLD, Double.MAX_VALUE); if (this.maxWeightThreshold < 0) { @@ -160,8 +152,6 @@ public void init(Config config) { "actual got '%s'", OPTION_MAX_WEIGHT_THRESHOLD, this.maxWeightThreshold); } - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_MAX_WEIGHT_THRESHOLD, this.maxWeightThreshold); if (this.minWeightThreshold > this.maxWeightThreshold) { throw new ComputerException("%s must be greater than or equal %s, ", @@ -174,8 +164,6 @@ public void init(Config config) { "actual got '%s'", OPTION_RETURN_FACTOR, this.returnFactor); } - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_RETURN_FACTOR, this.returnFactor); this.inOutFactor = config.getDouble(OPTION_INOUT_FACTOR, 1); if (this.inOutFactor <= 0) { @@ -183,8 +171,6 @@ public void init(Config config) { "actual got '%s'", OPTION_INOUT_FACTOR, this.inOutFactor); } - LOG.info("[RandomWalk] algorithm param, {}: {}", - OPTION_INOUT_FACTOR, this.inOutFactor); } @Override @@ -269,10 +255,10 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E Iterator iterator = edges.iterator(); while (iterator.hasNext()) { Edge edge = iterator.next(); - // calculate weight - Value weight = this.getWeight(edge); - Double finalWeight = this.calculateWeight(preVertexId, preVertexAdjacenceIdList, - edge.targetId(), weight); + // calculate edge weight + double weight = this.getEdgeWeight(edge); + Double finalWeight = this.calculateEdgeWeight(preVertexId, preVertexAdjacenceIdList, + edge.targetId(), weight); weightList.add(finalWeight); } @@ -282,35 +268,36 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E } /** - * get edge weight by weight property + * get the weight of an edge by its weight property */ - private Value getWeight(Edge edge) { - Value weight = edge.property(this.weightProperty); - if (weight == null) { - weight = new DoubleValue(this.defaultWeight); + private double getEdgeWeight(Edge edge) { + Value property = edge.property(this.weightProperty); + if (property == null) { + property = new DoubleValue(this.defaultWeight); } - if (!weight.isNumber()) { + if (!property.isNumber()) { throw new ComputerException("The value of %s must be a numeric value, " + "actual got '%s'", - this.weightProperty, weight.string()); + this.weightProperty, property.string()); } // weight threshold truncation - if ((Double) weight.value() < this.minWeightThreshold) { + DoubleValue weight = (DoubleValue) property; + if (weight.doubleValue() < this.minWeightThreshold) { weight = new DoubleValue(this.minWeightThreshold); } - if ((Double) weight.value() > this.maxWeightThreshold) { + if (weight.doubleValue() > this.maxWeightThreshold) { weight = new DoubleValue(this.maxWeightThreshold); } - return weight; + return weight.doubleValue(); } /** * calculate edge weight */ - private Double calculateWeight(Id preVertexId, IdList preVertexAdjacenceIdList, - Id nextVertexId, Value weight) { + private Double calculateEdgeWeight(Id preVertexId, IdList preVertexAdjacenceIdList, + Id nextVertexId, double weight) { /* * 3 types of vertices. * 1. current vertex, called v @@ -327,14 +314,14 @@ private Double calculateWeight(Id preVertexId, IdList preVertexAdjacenceIdList, Double finalWeight = 0.0; if (preVertexId != null && preVertexId.equals(nextVertexId)) { // distance(t, x) = 0 - finalWeight = 1.0 / this.returnFactor * (Double) weight.value(); + finalWeight = 1.0 / this.returnFactor * weight; } else if (preVertexAdjacenceIdList != null && preVertexAdjacenceIdList.contains(nextVertexId)) { // distance(t, x) = 1 - finalWeight = 1.0 * (Double) weight.value(); + finalWeight = 1.0 * weight; } else { // distance(t, x) = 2 - finalWeight = 1.0 / this.inOutFactor * (Double) weight.value(); + finalWeight = 1.0 / this.inOutFactor * weight; } return finalWeight; } From 7d6e8ca8c5a4a5317b835ad14c596c231981c282 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Mon, 13 Nov 2023 16:56:22 +0800 Subject: [PATCH 12/16] chore: nullableKeys test --- .../computer/algorithm/sampling/RandomWalkTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java index c4e2940e6..2d1a00c6e 100644 --- a/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java +++ b/computer-test/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkTest.java @@ -62,7 +62,7 @@ public static void setup() { SchemaManager schema = client.schema(); schema.propertyKey(PROPERTY_KEY) - .asInt() + .asDouble() .ifNotExist() .create(); schema.vertexLabel("user") @@ -73,6 +73,7 @@ public static void setup() { .sourceLabel("user") .targetLabel("user") .properties(PROPERTY_KEY) + .nullableKeys(PROPERTY_KEY) .ifNotExist() .create(); @@ -89,10 +90,10 @@ public static void setup() { Vertex vG = graph.addVertex(T.LABEL, "user", T.ID, "G"); vA.addEdge("know", vB, PROPERTY_KEY, 9); - vA.addEdge("know", vC, PROPERTY_KEY, 1); + vA.addEdge("know", vC); vA.addEdge("know", vD, PROPERTY_KEY, 3); vB.addEdge("know", vC, PROPERTY_KEY, 2); - vC.addEdge("know", vA, PROPERTY_KEY, 1); + vC.addEdge("know", vA); vC.addEdge("know", vE, PROPERTY_KEY, 2); vD.addEdge("know", vA, PROPERTY_KEY, 7); vD.addEdge("know", vC, PROPERTY_KEY, 1); From 18f1260891ef0182d0fcaa78f3aef6aeeabe612b Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Wed, 15 Nov 2023 15:22:22 +0800 Subject: [PATCH 13/16] refactor: double value simplification --- .../algorithm/sampling/RandomWalk.java | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 6426f43e7..62ee44e9a 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -257,7 +257,7 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E Edge edge = iterator.next(); // calculate edge weight double weight = this.getEdgeWeight(edge); - Double finalWeight = this.calculateEdgeWeight(preVertexId, preVertexAdjacenceIdList, + double finalWeight = this.calculateEdgeWeight(preVertexId, preVertexAdjacenceIdList, edge.targetId(), weight); weightList.add(finalWeight); } @@ -271,32 +271,33 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E * get the weight of an edge by its weight property */ private double getEdgeWeight(Edge edge) { + double weight = this.defaultWeight; + Value property = edge.property(this.weightProperty); - if (property == null) { - property = new DoubleValue(this.defaultWeight); - } + if (property != null) { + if (!property.isNumber()) { + throw new ComputerException("The value of %s must be a numeric value, " + + "actual got '%s'", + this.weightProperty, property.string()); + } - if (!property.isNumber()) { - throw new ComputerException("The value of %s must be a numeric value, " + - "actual got '%s'", - this.weightProperty, property.string()); + weight = ((DoubleValue) property).doubleValue(); } // weight threshold truncation - DoubleValue weight = (DoubleValue) property; - if (weight.doubleValue() < this.minWeightThreshold) { - weight = new DoubleValue(this.minWeightThreshold); + if (weight < this.minWeightThreshold) { + weight = this.minWeightThreshold; } - if (weight.doubleValue() > this.maxWeightThreshold) { - weight = new DoubleValue(this.maxWeightThreshold); + if (weight > this.maxWeightThreshold) { + weight = this.maxWeightThreshold; } - return weight.doubleValue(); + return weight; } /** * calculate edge weight */ - private Double calculateEdgeWeight(Id preVertexId, IdList preVertexAdjacenceIdList, + private double calculateEdgeWeight(Id preVertexId, IdList preVertexAdjacenceIdList, Id nextVertexId, double weight) { /* * 3 types of vertices. @@ -311,7 +312,7 @@ private Double calculateEdgeWeight(Id preVertexId, IdList preVertexAdjacenceIdLi * * Final edge weight π(v, x) = α * edgeWeight */ - Double finalWeight = 0.0; + double finalWeight = 0.0; if (preVertexId != null && preVertexId.equals(nextVertexId)) { // distance(t, x) = 0 finalWeight = 1.0 / this.returnFactor * weight; From 9a93341445de75a0a0bff317dd1934f22d70bde7 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Wed, 15 Nov 2023 15:28:28 +0800 Subject: [PATCH 14/16] chore: add todo note --- .../apache/hugegraph/computer/algorithm/community/lpa/Lpa.java | 2 ++ .../computer/algorithm/path/rings/RingsDetectionOutput.java | 2 ++ .../computer/algorithm/path/rings/filter/SpreadFilter.java | 2 ++ .../hugegraph/computer/algorithm/sampling/RandomWalk.java | 3 +++ .../computer/algorithm/sampling/RandomWalkOutput.java | 2 ++ 5 files changed, 11 insertions(+) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java index 1bb34413e..49dcd464b 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java @@ -79,6 +79,8 @@ private Id voteLabel(Iterator messages) { } // Calculate the labels with maximum frequency + // TODO: use primitive array instead, like DoubleArray, + // in order to reduce memory fragmentation generated during calculations List maxLabels = new ArrayList<>(); int maxFreq = 1; for (Map.Entry e : labels.entrySet()) { diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java index d16b499a8..e49a26989 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java @@ -39,6 +39,8 @@ protected void prepareSchema() { @Override protected List value(Vertex vertex) { IdListList value = vertex.value(); + // TODO: use primitive array instead, like DoubleArray, + // in order to reduce memory fragmentation generated during calculations List propValues = new ArrayList<>(); for (int i = 0; i < value.size(); i++) { propValues.add(value.get(i).toString()); diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java index 9fdccbb5f..8688fdc4a 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java @@ -128,6 +128,8 @@ private static List expressions( if (filter.size() == 0) { return PASS; } + // TODO: use primitive array instead, like DoubleArray, + // in order to reduce memory fragmentation generated during calculations List expressions = new ArrayList<>(); if (filter.containsKey(ALL)) { expressions.add(filter.get(ALL)); diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java index 62ee44e9a..a74cd4888 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalk.java @@ -250,6 +250,8 @@ public void compute(ComputationContext context, Vertex vertex, * random select one edge */ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, Edges edges) { + // TODO: use primitive array instead, like DoubleArray, + // in order to reduce memory fragmentation generated during calculations List weightList = new ArrayList<>(); Iterator iterator = edges.iterator(); @@ -259,6 +261,7 @@ private Edge randomSelectEdge(Id preVertexId, IdList preVertexAdjacenceIdList, E double weight = this.getEdgeWeight(edge); double finalWeight = this.calculateEdgeWeight(preVertexId, preVertexAdjacenceIdList, edge.targetId(), weight); + // TODO: improve to avoid OOM weightList.add(finalWeight); } diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java index 47ef2d58f..4b009e9aa 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java @@ -39,6 +39,8 @@ protected void prepareSchema() { @Override protected List value(Vertex vertex) { IdListList value = vertex.value(); + // TODO: use primitive array instead, like DoubleArray, + // in order to reduce memory fragmentation generated during calculations List propValues = new ArrayList<>(); for (int i = 0; i < value.size(); i++) { propValues.add(value.get(i).toString()); From 37d2aa738f4e33572d6ec18706689c9ccd0dddea Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Fri, 17 Nov 2023 16:54:07 +0800 Subject: [PATCH 15/16] fix: can not load edge properties --- .../hugegraph/computer/algorithm/sampling/RandomWalkParams.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java index cd8b49688..a8d9fd817 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkParams.java @@ -33,6 +33,8 @@ public void setAlgorithmParameters(Map params) { RandomWalkMessage.class.getName()); this.setIfAbsent(params, ComputerOptions.ALGORITHM_RESULT_CLASS, IdListList.class.getName()); + this.setIfAbsent(params, ComputerOptions.INPUT_FILTER_CLASS, + EXTRACTALLPROPERTYINPUTFILTER_CLASS_NAME); this.setIfAbsent(params, ComputerOptions.OUTPUT_CLASS, RandomWalkOutput.class.getName()); From 8cc17349aacb11c59bb8c48f79f749cb2515d512 Mon Sep 17 00:00:00 2001 From: diaohancai <550630588@qq.com> Date: Mon, 20 Nov 2023 16:31:19 +0800 Subject: [PATCH 16/16] chore: remove TODO --- .../apache/hugegraph/computer/algorithm/community/lpa/Lpa.java | 2 -- .../computer/algorithm/path/rings/RingsDetectionOutput.java | 2 -- .../computer/algorithm/path/rings/filter/SpreadFilter.java | 2 -- .../hugegraph/computer/algorithm/sampling/RandomWalkOutput.java | 2 -- 4 files changed, 8 deletions(-) diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java index 49dcd464b..1bb34413e 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/community/lpa/Lpa.java @@ -79,8 +79,6 @@ private Id voteLabel(Iterator messages) { } // Calculate the labels with maximum frequency - // TODO: use primitive array instead, like DoubleArray, - // in order to reduce memory fragmentation generated during calculations List maxLabels = new ArrayList<>(); int maxFreq = 1; for (Map.Entry e : labels.entrySet()) { diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java index e49a26989..d16b499a8 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/RingsDetectionOutput.java @@ -39,8 +39,6 @@ protected void prepareSchema() { @Override protected List value(Vertex vertex) { IdListList value = vertex.value(); - // TODO: use primitive array instead, like DoubleArray, - // in order to reduce memory fragmentation generated during calculations List propValues = new ArrayList<>(); for (int i = 0; i < value.size(); i++) { propValues.add(value.get(i).toString()); diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java index 8688fdc4a..9fdccbb5f 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/path/rings/filter/SpreadFilter.java @@ -128,8 +128,6 @@ private static List expressions( if (filter.size() == 0) { return PASS; } - // TODO: use primitive array instead, like DoubleArray, - // in order to reduce memory fragmentation generated during calculations List expressions = new ArrayList<>(); if (filter.containsKey(ALL)) { expressions.add(filter.get(ALL)); diff --git a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java index 4b009e9aa..47ef2d58f 100644 --- a/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java +++ b/computer-algorithm/src/main/java/org/apache/hugegraph/computer/algorithm/sampling/RandomWalkOutput.java @@ -39,8 +39,6 @@ protected void prepareSchema() { @Override protected List value(Vertex vertex) { IdListList value = vertex.value(); - // TODO: use primitive array instead, like DoubleArray, - // in order to reduce memory fragmentation generated during calculations List propValues = new ArrayList<>(); for (int i = 0; i < value.size(); i++) { propValues.add(value.get(i).toString());