From 2951a0c0f0b6da107b91b98c5f8569172466bf69 Mon Sep 17 00:00:00 2001 From: wyatt-joyner-pometry Date: Mon, 25 Nov 2024 16:17:15 +0000 Subject: [PATCH] add fast_rp algorithm (#1867) * add fast_rp algorithm Added Rust implementation; Rust test; Python integration; Python test * remove unnecessary imports in test_algorithms; fix rust doc for fastrp * implement AsOrd for float vecs and update fastRP to return partially ordered AlgorithmResult; make tidy NOTE: make tidy applied formatting to: python/tests/graphql/test_nodes_property_filter.py, python/tests/graphql/test_graph_nodes_edges_property_filter.py, and python/python/raphtory/__init__.pyi --------- Co-authored-by: Ben Steer --- python/python/raphtory/__init__.pyi | 440 ++++++++++++++---- .../python/raphtory/algorithms/__init__.pyi | 23 + .../test_graph_nodes_edges_property_filter.py | 315 ++++--------- .../graphql/test_nodes_property_filter.py | 193 +++----- python/tests/test_algorithms.py | 170 ++++++- raphtory/src/algorithms/algorithm_result.rs | 7 + raphtory/src/algorithms/embeddings/fast_rp.rs | 428 +++++++++++++++++ raphtory/src/algorithms/embeddings/mod.rs | 3 + raphtory/src/algorithms/mod.rs | 1 + raphtory/src/python/graph/algorithm_result.rs | 13 + raphtory/src/python/packages/algorithms.rs | 33 ++ raphtory/src/python/packages/base_modules.rs | 1 + 12 files changed, 1192 insertions(+), 435 deletions(-) create mode 100644 raphtory/src/algorithms/embeddings/fast_rp.rs create mode 100644 raphtory/src/algorithms/embeddings/mod.rs diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 0f53ef53e..5a50e12a4 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -210,6 +210,7 @@ class DiskGraphStorage(object): def __repr__(self): """Return repr(self).""" + def append_node_temporal_properties(self, location, chunk_size=20000000): ... def graph_dir(self): ... @staticmethod def load_from_dir(graph_dir): ... @@ -1200,12 +1201,18 @@ class Graph(GraphView): """Create and return a new object. See help(type) for accurate signature.""" def __reduce__(self): ... - def add_constant_properties(self, properties: PropInput): + def add_constant_properties(self, properties: PropInput) -> None: """ Adds static properties to the graph. Arguments: properties (PropInput): The static properties of the graph. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def add_edge( @@ -1227,7 +1234,10 @@ class Graph(GraphView): layer (str, optional): The layer of the edge. Returns: - MutableEdge: The added edge + MutableEdge: The added edge. + + Raises: + GraphError: If the operation fails. """ def add_node( @@ -1245,17 +1255,27 @@ class Graph(GraphView): id (str|int): The id of the node. properties (PropInput, optional): The properties of the node. node_type (str, optional): The optional string which will be used as a node type + Returns: - MutableNode: The added node + MutableNode: The added node. + + Raises: + GraphError: If the operation fails. """ - def add_property(self, timestamp: TimeInput, properties: PropInput): + def add_property(self, timestamp: TimeInput, properties: PropInput) -> None: """ Adds properties to the graph. Arguments: timestamp (TimeInput): The timestamp of the temporal property. properties (PropInput): The temporal properties of the graph. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def cache(self, path: str): @@ -1284,8 +1304,12 @@ class Graph(GraphView): id (str|int): The id of the node. properties (PropInput, optional): The properties of the node. node_type (str, optional): The optional string which will be used as a node type + Returns: - MutableNode: The created node + MutableNode: The created node. + + Raises: + GraphError: If the operation fails. """ @staticmethod @@ -1321,62 +1345,148 @@ class Graph(GraphView): List[str] """ - def import_edge(self, edge: Edge, force: bool = False) -> Edge: + def import_edge(self, edge: Edge, merge: bool = False): """ Import a single edge into the graph. - This function takes a PyEdge object and an optional boolean flag. If the flag is set to true, - the function will force the import of the edge even if it already exists in the graph. - Arguments: + edge (Edge): A Edge object representing the edge to be imported. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if the imported edge already exists in the graph. + If merge is true, the function merges the histories of the imported edge and the existing edge (in the graph). + + Returns: + EdgeView: An EdgeView object if the edge was successfully imported. + + Raises: + GraphError: If the operation fails. + """ - edge (Edge): A PyEdge object representing the edge to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the edge. + def import_edge_as(self, edge: Edge, new_id: tuple, merge: bool = False): + """ + Import a single edge into the graph with new id. + + Arguments: + edge (Edge): A Edge object representing the edge to be imported. + new_id (tuple) : The ID of the new edge. It's a tuple of the source and destination node ids. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if the imported edge already exists in the graph. + If merge is true, the function merges the histories of the imported edge and the existing edge (in the graph). Returns: - Edge: A Result object which is Ok if the edge was successfully imported, and Err otherwise. + EdgeView: An EdgeView object if the edge was successfully imported. + + Raises: + GraphError: If the operation fails. """ - def import_edges(self, edges: List[Edge], force: bool = False): + def import_edges(self, edges: List[Edge], merge: bool = False) -> None: """ Import multiple edges into the graph. - This function takes a vector of PyEdge objects and an optional boolean flag. If the flag is set to true, - the function will force the import of the edges even if they already exist in the graph. - Arguments: + edges (List[Edge]): A list of Edge objects representing the edges to be imported. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if any of the imported edges already exists in the graph. + If merge is true, the function merges the histories of the imported edges and the existing edges (in the graph). + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def import_edges_as(self, edges, new_ids, merge=False): + """ + Import multiple edges into the graph with new ids. + + Arguments: edges (List[Edge]): A list of Edge objects representing the edges to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the edges. + new_ids (List[tuple]) - The IDs of the new edges. It's a vector of tuples of the source and destination node ids. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if any of the imported edges already exists in the graph. + If merge is true, the function merges the histories of the imported edges and the existing edges (in the graph). + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ - def import_node(self, node: Node, force: bool = False) -> Node: + def import_node(self, node: Node, merge: bool = False) -> Node: """ Import a single node into the graph. - This function takes a PyNode object and an optional boolean flag. If the flag is set to true, - the function will force the import of the node even if it already exists in the graph. + Arguments: + node (Node): A Node object representing the node to be imported. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if the imported node already exists in the graph. + If merge is true, the function merges the histories of the imported node and the existing node (in the graph). + + Returns: + Node: A node object if the node was successfully imported. + + Raises: + GraphError: If the operation fails. + """ + + def import_node_as( + self, node: Node, new_id: str | int, merge: bool = False + ) -> Node: + """ + Import a single node into the graph with new id. Arguments: node (Node): A Node object representing the node to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the node. + new_id (str|int): The new node id. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if the imported node already exists in the graph. + If merge is true, the function merges the histories of the imported node and the existing node (in the graph). Returns: - Node: A Result object which is Ok if the node was successfully imported, and Err otherwise. + Node: A node object if the node was successfully imported. + + Raises: + GraphError: If the operation fails. """ - def import_nodes(self, nodes: List[Node], force: bool = False): + def import_nodes(self, nodes: List[Node], merge: bool = False) -> None: """ Import multiple nodes into the graph. - This function takes a vector of PyNode objects and an optional boolean flag. If the flag is set to true, - the function will force the import of the nodes even if they already exist in the graph. + Arguments: + nodes (List[Node]): A vector of Node objects representing the nodes to be imported. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if any of the imported nodes already exists in the graph. + If merge is true, the function merges the histories of the imported nodes and the existing nodes (in the graph). + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: + """ + Import multiple nodes into the graph with new ids. Arguments: + nodes (List[Node]): A vector of Node objects representing the nodes to be imported. + new_ids (List[str|int]): A list of node IDs to use for the imported nodes. + merge (bool): An optional boolean flag. + If merge is false, the function will return an error if any of the imported nodes already exists in the graph. + If merge is true, the function merges the histories of the imported nodes and the existing nodes (in the graph). - nodes (List[Node]): A vector of PyNode objects representing the nodes to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the nodes. + Returns: + None: This function does not return a value, if the operation is successful. + Raises: + GraphError: If the operation fails. """ def largest_connected_component(self): @@ -1415,7 +1525,7 @@ class Graph(GraphView): shared_constant_properties: PropInput = None, layer: str = None, layer_col: str = None, - ): + ) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1427,6 +1537,12 @@ class Graph(GraphView): shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): The edge layer name (optional) Defaults to None. layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_edge_props_from_parquet( @@ -1438,7 +1554,7 @@ class Graph(GraphView): shared_constant_properties: PropInput = None, layer: str = None, layer_col: str = None, - ): + ) -> None: """ Load edge properties from parquet file @@ -1450,6 +1566,12 @@ class Graph(GraphView): shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): The edge layer name (optional) Defaults to None. layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_edges_from_pandas( @@ -1463,7 +1585,7 @@ class Graph(GraphView): shared_constant_properties: PropInput = None, layer: str = None, layer_col: str = None, - ): + ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1477,6 +1599,12 @@ class Graph(GraphView): shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_edges_from_parquet( @@ -1490,7 +1618,7 @@ class Graph(GraphView): shared_constant_properties: PropInput = None, layer: str = None, layer_col: str = None, - ): + ) -> None: """ Load edges from a Parquet file into the graph. @@ -1504,6 +1632,12 @@ class Graph(GraphView): shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ @staticmethod @@ -1526,7 +1660,7 @@ class Graph(GraphView): node_type_col: str = None, constant_properties: List[str] = None, shared_constant_properties: PropInput = None, - ): + ) -> None: """ Load node properties from a Pandas DataFrame. @@ -1537,6 +1671,12 @@ class Graph(GraphView): node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_node_props_from_parquet( @@ -1547,7 +1687,7 @@ class Graph(GraphView): node_type_col: str = None, constant_properties: List[str] = None, shared_constant_properties: PropInput = None, - ): + ) -> None: """ Load node properties from a parquet file. @@ -1558,6 +1698,12 @@ class Graph(GraphView): node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_nodes_from_pandas( @@ -1570,7 +1716,7 @@ class Graph(GraphView): properties: List[str] = None, constant_properties: List[str] = None, shared_constant_properties: PropInput = None, - ): + ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1583,6 +1729,12 @@ class Graph(GraphView): properties (List[str]): List of node property column names. Defaults to None. (optional) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def load_nodes_from_parquet( @@ -1595,7 +1747,7 @@ class Graph(GraphView): properties: List[str] = None, constant_properties: List[str] = None, shared_constant_properties: PropInput = None, - ): + ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1608,6 +1760,12 @@ class Graph(GraphView): properties (List[str]): List of node property column names. Defaults to None. (optional) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (PropInput): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def node(self, id: str | int) -> Node: @@ -1618,7 +1776,7 @@ class Graph(GraphView): id (str|int): the node id Returns: - Node: the node with the specified id, or None if the node does not exist + Node: The node object with the specified id, or None if the node does not exist """ def persist_as_disk_graph(self, graph_dir): @@ -1652,13 +1810,18 @@ class Graph(GraphView): """ def to_disk_graph(self, graph_dir): ... - def update_constant_properties(self, properties: PropInput): + def update_constant_properties(self, properties: PropInput) -> None: """ Updates static properties to the graph. Arguments: properties (PropInput): The static properties of the graph. + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def write_updates(self): @@ -3533,7 +3696,7 @@ class PersistentGraph(GraphView): """Create and return a new object. See help(type) for accurate signature.""" def __reduce__(self): ... - def add_constant_properties(self, properties: dict): + def add_constant_properties(self, properties: dict) -> None: """ Adds static properties to the graph. @@ -3541,7 +3704,10 @@ class PersistentGraph(GraphView): properties (dict): The static properties of the graph. Returns: - None + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def add_edge( @@ -3551,7 +3717,7 @@ class PersistentGraph(GraphView): dst: str | int, properties: dict = None, layer: str = None, - ): + ) -> None: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -3563,7 +3729,10 @@ class PersistentGraph(GraphView): layer (str): The layer of the edge. Returns: - None + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def add_node( @@ -3572,7 +3741,7 @@ class PersistentGraph(GraphView): id: str | int, properties: dict = None, node_type: str = None, - ): + ) -> None: """ Adds a new node with the given id and properties to the graph. @@ -3583,10 +3752,13 @@ class PersistentGraph(GraphView): node_type (str) : The optional string which will be used as a node type Returns: - None + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ - def add_property(self, timestamp: TimeInput, properties: dict): + def add_property(self, timestamp: TimeInput, properties: dict) -> None: """ Adds properties to the graph. @@ -3595,7 +3767,10 @@ class PersistentGraph(GraphView): properties (dict): The temporal properties of the graph. Returns: - None + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def cache(self, path: str): @@ -3627,6 +3802,9 @@ class PersistentGraph(GraphView): Returns: MutableNode + + Raises: + GraphError: If the operation fails. """ def delete_edge( @@ -3643,6 +3821,9 @@ class PersistentGraph(GraphView): Returns: The deleted edge + + Raises: + GraphError: If the operation fails. """ @staticmethod @@ -3666,7 +3847,7 @@ class PersistentGraph(GraphView): dst (str | int): the destination node id Returns: - the edge with the specified source and destination nodes, or None if the edge does not exist + The edge with the specified source and destination nodes, or None if the edge does not exist """ def event_graph(self): @@ -3680,63 +3861,153 @@ class PersistentGraph(GraphView): A list of node types """ - def import_edge(self, edge: Edge, force: bool = False) -> Edge: + def import_edge(self, edge: Edge, merge: bool = False) -> Edge: """ Import a single edge into the graph. - This function takes a PyEdge object and an optional boolean flag. If the flag is set to true, - the function will force the import of the edge even if it already exists in the graph. + This function takes an edge object and an optional boolean flag. If the flag is set to true, + the function will merge the import of the edge even if it already exists in the graph. Arguments: + edge (Edge): An edge object representing the edge to be imported. + merge (bool): An optional boolean flag indicating whether to merge the import of the edge. Defaults to False. + + Returns: + Edge: The imported edge. + + Raises: + GraphError: If the operation fails. + """ + + def import_edge_as(self, edge: Edge, new_id: tuple, merge: bool = False) -> Edge: + """ + Import a single edge into the graph with new id. + + This function takes a edge object, a new edge id and an optional boolean flag. If the flag is set to true, + the function will merge the import of the edge even if it already exists in the graph. - edge (Edge): A PyEdge object representing the edge to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the edge. + Arguments: + edge (Edge): A edge object representing the edge to be imported. + new_id (tuple) : The ID of the new edge. It's a tuple of the source and destination node ids. + merge (bool): An optional boolean flag indicating whether to merge the import of the edge. Defaults to False. Returns: Edge: The imported edge. + + Raises: + GraphError: If the operation fails. """ - def import_edges(self, edges: List[Edge], force: bool = False): + def import_edges(self, edges: List[Edge], merge: bool = False) -> None: """ Import multiple edges into the graph. - This function takes a vector of PyEdge objects and an optional boolean flag. If the flag is set to true, - the function will force the import of the edges even if they already exist in the graph. + This function takes a vector of edge objects and an optional boolean flag. If the flag is set to true, + the function will merge the import of the edges even if they already exist in the graph. Arguments: + edges (List[Edge]): A vector of edge objects representing the edges to be imported. + merge (bool): An optional boolean flag indicating whether to merge the import of the edges. Defaults to False. - edges (List[Edge]): A vector of PyEdge objects representing the edges to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the edges. + Returns: + None: This function does not return a value, if the operation is successful. + Raises: + GraphError: If the operation fails. """ - def import_node(self, node: Node, force: bool = False): + def import_edges_as(self, edges: List[Edge], new_ids, merge: bool = False) -> None: + """ + Import multiple edges into the graph with new ids. + + This function takes a vector of edge objects, a list of new edge ids and an optional boolean flag. If the flag is set to true, + the function will merge the import of the edges even if they already exist in the graph. + + Arguments: + edges (List[Edge]): A vector of edge objects representing the edges to be imported. + merge (bool): An optional boolean flag indicating whether to merge the import of the edges. Defaults to False. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def import_node(self, node: Node, merge: bool = False): """ Import a single node into the graph. - This function takes a PyNode object and an optional boolean flag. If the flag is set to true, - the function will force the import of the node even if it already exists in the graph. + This function takes a node object and an optional boolean flag. If the flag is set to true, + the function will merge the import of the node even if it already exists in the graph. Arguments: - node (Node): A PyNode object representing the node to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the node. + node (Node): A node object representing the node to be imported. + merge (bool): An optional boolean flag indicating whether to merge the import of the node. Defaults to False. Returns: - Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. + NodeView: A nodeview object if the node was successfully imported, and an error otherwise. + + Raises: + GraphError: If the operation fails. """ - def import_nodes(self, nodes: List[Node], force: bool = False): + def import_node_as(self, node: Node, new_id: str | int, merge: bool = False): + """ + Import a single node into the graph with new id. + + This function takes a node object, a new node id and an optional boolean flag. If the flag is set to true, + the function will merge the import of the node even if it already exists in the graph. + + Arguments: + node (Node): A node object representing the node to be imported. + new_id (str|int): The new node id. + merge (bool): An optional boolean flag indicating whether to merge the import of the node. Defaults to False. + + Returns: + NodeView: A nodeview object if the node was successfully imported, and an error otherwise. + + Raises: + GraphError: If the operation fails. + """ + + def import_nodes(self, nodes: List[Node], merge: bool = False) -> None: """ Import multiple nodes into the graph. - This function takes a vector of PyNode objects and an optional boolean flag. If the flag is set to true, - the function will force the import of the nodes even if they already exist in the graph. + This function takes a vector of node objects and an optional boolean flag. If the flag is set to true, + the function will merge the import of the nodes even if they already exist in the graph. + + Arguments: + nodes (List[Node]): A vector of node objects representing the nodes to be imported. + merge (bool): An optional boolean flag indicating whether to merge the import of the nodes. Defaults to False. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: + """ + Import multiple nodes into the graph with new ids. + + This function takes a vector of node objects, a list of new node ids and an optional boolean flag. If the flag is set to true, + the function will merge the import of the nodes even if they already exist in the graph. Arguments: + nodes (List[Node]): A vector of node objects representing the nodes to be imported. + new_ids (List[str|int]): A list of node IDs to use for the imported nodes. + merge (bool): An optional boolean flag indicating whether to merge the import of the nodes. Defaults to False. - nodes (List[Node]): A vector of PyNode objects representing the nodes to be imported. - force (bool): An optional boolean flag indicating whether to force the import of the nodes. + Returns: + None: This function does not return a value, if the operation is successful. + Raises: + GraphError: If the operation fails. """ @staticmethod @@ -3773,8 +4044,9 @@ class PersistentGraph(GraphView): dst (str): The column name for the destination node ids. layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3799,8 +4071,9 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3829,7 +4102,7 @@ class PersistentGraph(GraphView): layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3858,7 +4131,7 @@ class PersistentGraph(GraphView): layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3889,8 +4162,9 @@ class PersistentGraph(GraphView): shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3921,8 +4195,9 @@ class PersistentGraph(GraphView): shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3961,7 +4236,7 @@ class PersistentGraph(GraphView): shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -3988,7 +4263,7 @@ class PersistentGraph(GraphView): shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -4017,8 +4292,9 @@ class PersistentGraph(GraphView): properties (List[str]): List of node property column names. Defaults to None. (optional) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -4047,8 +4323,9 @@ class PersistentGraph(GraphView): properties (List[str]): List of node property column names. Defaults to None. (optional) constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: - None: If the operation is successful. + None: This function does not return a value, if the operation is successful. Raises: GraphError: If the operation fails. @@ -4062,7 +4339,7 @@ class PersistentGraph(GraphView): id (str | int): the node id Returns: - the node with the specified id, or None if the node does not exist + The node with the specified id, or None if the node does not exist """ def persistent_graph(self): ... @@ -4090,7 +4367,7 @@ class PersistentGraph(GraphView): bytes """ - def update_constant_properties(self, properties: dict): + def update_constant_properties(self, properties: dict) -> None: """ Updates static properties to the graph. @@ -4098,7 +4375,10 @@ class PersistentGraph(GraphView): properties (dict): The static properties of the graph. Returns: - None + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. """ def write_updates(self): diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index dd41e3282..c98f4dbbb 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -214,6 +214,29 @@ def directed_graph_density(g: GraphView): float : Directed graph density of G. """ +def fast_rp( + g: GraphView, + embedding_dim: int, + normalization_strength: float, + iter_weights: list[float], + seed: Optional[int] = None, + threads: Optional[int] = None, +) -> AlgorithmResult: + """ + Computes embedding vectors for each vertex of an undirected/bidirectional graph according to the Fast RP algorithm. + Original Paper: https://doi.org/10.48550/arXiv.1908.11512 + Arguments: + g (GraphView): The graph view on which embeddings are generated. + embedding_dim (int): The size (dimension) of the generated embeddings. + normalization_strength (float): The extent to which high-degree vertices should be discounted (range: 1-0) + iter_weights (list[float]): The scalar weights to apply to the results of each iteration + seed (int, optional): The seed for initialisation of random vectors + threads (int, optional): The number of threads to be used for parallel execution. + + Returns: + AlgorithmResult: Vertices mapped to their corresponding embedding vectors + """ + def fruchterman_reingold( graph: GraphView, iterations: int | None = 100, diff --git a/python/tests/graphql/test_graph_nodes_edges_property_filter.py b/python/tests/graphql/test_graph_nodes_edges_property_filter.py index d68b44918..eaf8dc68b 100644 --- a/python/tests/graphql/test_graph_nodes_edges_property_filter.py +++ b/python/tests/graphql/test_graph_nodes_edges_property_filter.py @@ -11,27 +11,73 @@ def create_test_graph(g): - g.add_node(1, "a", properties={"prop1": 60, "prop2": 31.3, "prop3": "abc123", "prop4": True, "prop5": [1, 2, 3]}) - g.add_node(1, "b", properties={"prop1": 10, "prop2": 31.3, "prop3": "abc223", "prop4": False}) - g.add_node(1, "c", properties={"prop1": 20, "prop2": 31.3, "prop3": "abc333", "prop4": True, "prop5": [5, 6, 7]}) - g.add_node(1, "d", properties={"prop1": 30, "prop2": 31.3, "prop3": "abc444", "prop4": False}) + g.add_node( + 1, + "a", + properties={ + "prop1": 60, + "prop2": 31.3, + "prop3": "abc123", + "prop4": True, + "prop5": [1, 2, 3], + }, + ) + g.add_node( + 1, + "b", + properties={"prop1": 10, "prop2": 31.3, "prop3": "abc223", "prop4": False}, + ) + g.add_node( + 1, + "c", + properties={ + "prop1": 20, + "prop2": 31.3, + "prop3": "abc333", + "prop4": True, + "prop5": [5, 6, 7], + }, + ) + g.add_node( + 1, + "d", + properties={"prop1": 30, "prop2": 31.3, "prop3": "abc444", "prop4": False}, + ) g.add_edge( 2, "a", "d", - properties={"eprop1": 60, "eprop2": 0.4, "eprop3": "xyz123", "eprop4": True, "eprop5": [1, 2, 3]}, + properties={ + "eprop1": 60, + "eprop2": 0.4, + "eprop3": "xyz123", + "eprop4": True, + "eprop5": [1, 2, 3], + }, ) g.add_edge( 2, "b", "d", - properties={"eprop1": 10, "eprop2": 1.7, "eprop3": "xyz123", "eprop4": True, "eprop5": [3, 4, 5]}, + properties={ + "eprop1": 10, + "eprop2": 1.7, + "eprop3": "xyz123", + "eprop4": True, + "eprop5": [3, 4, 5], + }, ) g.add_edge( 2, "c", "d", - properties={"eprop1": 30, "eprop2": 6.4, "eprop3": "xyz123", "eprop4": False, "eprop5": [10]}, + properties={ + "eprop1": 30, + "eprop2": 6.4, + "eprop3": "xyz123", + "eprop4": False, + "eprop5": [10], + }, ) return g @@ -64,7 +110,9 @@ def run_graphql_error_test(query, expected_error_message, graph): match = re.search(r'"message":"(.*?)"', full_error_message) error_message = match.group(1) if match else "" - assert error_message == expected_error_message, f"Expected '{expected_error_message}', but got '{error_message}'" + assert ( + error_message == expected_error_message + ), f"Expected '{expected_error_message}', but got '{error_message}'" @pytest.mark.parametrize("graph", [Graph, PersistentGraph]) @@ -88,17 +136,7 @@ def test_graph_node_property_filter_equal(graph): } } """ - expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "a"} - ] - } - } - } - } + expected_output = {"graph": {"nodeFilter": {"nodes": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -173,16 +211,7 @@ def test_graph_node_property_filter_not_equal(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -257,17 +286,7 @@ def test_graph_node_property_filter_greater_than_or_equal(graph): } } """ - expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "a"} - ] - } - } - } - } + expected_output = {"graph": {"nodeFilter": {"nodes": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -344,13 +363,7 @@ def test_graph_node_property_filter_less_than_or_equal(graph): expected_output = { "graph": { "nodeFilter": { - "nodes": { - "list": [ - {"name": "b"}, - {"name": "c"}, - {"name": "d"} - ] - } + "nodes": {"list": [{"name": "b"}, {"name": "c"}, {"name": "d"}]} } } } @@ -427,17 +440,7 @@ def test_graph_node_property_filter_greater_than(graph): } } """ - expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "a"} - ] - } - } - } - } + expected_output = {"graph": {"nodeFilter": {"nodes": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -512,16 +515,7 @@ def test_graph_node_property_filter_less_than(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "b"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "b"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -596,16 +590,7 @@ def test_graph_node_property_filter_is_none(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -631,16 +616,7 @@ def test_graph_node_property_filter_is_some(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "a"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "a"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -667,16 +643,7 @@ def test_graph_node_property_filter_any(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -702,15 +669,7 @@ def test_node_property_filter_any_empty_list(graph): } } """ - expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [] - } - } - } - } + expected_output = {"graph": {"nodes": {"nodeFilter": {"list": []}}}} run_graphql_test(query, expected_output, graph()) @@ -785,16 +744,7 @@ def test_graph_node_property_filter_not_any(graph): } """ expected_output = { - "graph": { - "nodeFilter": { - "nodes": { - "list": [ - {"name": "a"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodeFilter": {"nodes": {"list": [{"name": "a"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -824,12 +774,7 @@ def test_node_property_filter_not_any_empty_list(graph): "graph": { "nodes": { "nodeFilter": { - "list": [ - {"name": "a"}, - {"name": "b"}, - {"name": "c"}, - {"name": "d"} - ] + "list": [{"name": "a"}, {"name": "b"}, {"name": "c"}, {"name": "d"}] } } } @@ -912,14 +857,7 @@ def test_graph_edge_property_filter_equal(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "a"}, "dst": {"name": "d"}}]} } } } @@ -1052,14 +990,7 @@ def test_graph_edge_property_filter_not_equal(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "c"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "c"}, "dst": {"name": "d"}}]} } } } @@ -1194,14 +1125,7 @@ def test_graph_edge_property_filter_greater_than_or_equal(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "a"}, "dst": {"name": "d"}}]} } } } @@ -1338,14 +1262,8 @@ def test_graph_edge_property_filter_less_than_or_equal(graph): "edgeFilter": { "edges": { "list": [ - { - "src": {"name": "b"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "c"}, - "dst": {"name": "d"} - } + {"src": {"name": "b"}, "dst": {"name": "d"}}, + {"src": {"name": "c"}, "dst": {"name": "d"}}, ] } } @@ -1482,14 +1400,7 @@ def test_graph_edge_property_filter_greater_than(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "a"}, "dst": {"name": "d"}}]} } } } @@ -1624,14 +1535,7 @@ def test_graph_edge_property_filter_less_than(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "b"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "b"}, "dst": {"name": "d"}}]} } } } @@ -1762,15 +1666,7 @@ def test_graph_edge_property_filter_is_none(graph): } } """ - expected_output = { - "graph": { - "edgeFilter": { - "edges": { - "list": [] - } - } - } - } + expected_output = {"graph": {"edgeFilter": {"edges": {"list": []}}}} run_graphql_test(query, expected_output, graph()) @@ -1825,18 +1721,9 @@ def test_graph_edge_property_filter_is_some(graph): "edgeFilter": { "edges": { "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "b"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "c"}, - "dst": {"name": "d"} - } + {"src": {"name": "a"}, "dst": {"name": "d"}}, + {"src": {"name": "b"}, "dst": {"name": "d"}}, + {"src": {"name": "c"}, "dst": {"name": "d"}}, ] } } @@ -1897,14 +1784,8 @@ def test_graph_edge_property_filter_any(graph): "edgeFilter": { "edges": { "list": [ - { - "src": {"name": "b"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "c"}, - "dst": {"name": "d"} - } + {"src": {"name": "b"}, "dst": {"name": "d"}}, + {"src": {"name": "c"}, "dst": {"name": "d"}}, ] } } @@ -1961,15 +1842,7 @@ def test_graph_edge_property_filter_any_empty_list(graph): } } """ - expected_output = { - "graph": { - "edgeFilter": { - "edges": { - "list": [] - } - } - } - } + expected_output = {"graph": {"edgeFilter": {"edges": {"list": []}}}} run_graphql_test(query, expected_output, graph()) @@ -2075,14 +1948,7 @@ def test_graph_edge_property_filter_not_any(graph): expected_output = { "graph": { "edgeFilter": { - "edges": { - "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - } - ] - } + "edges": {"list": [{"src": {"name": "a"}, "dst": {"name": "d"}}]} } } } @@ -2142,18 +2008,9 @@ def test_graph_edge_property_filter_not_any_empty_list(graph): "edgeFilter": { "edges": { "list": [ - { - "src": {"name": "a"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "b"}, - "dst": {"name": "d"} - }, - { - "src": {"name": "c"}, - "dst": {"name": "d"} - } + {"src": {"name": "a"}, "dst": {"name": "d"}}, + {"src": {"name": "b"}, "dst": {"name": "d"}}, + {"src": {"name": "c"}, "dst": {"name": "d"}}, ] } } diff --git a/python/tests/graphql/test_nodes_property_filter.py b/python/tests/graphql/test_nodes_property_filter.py index af6e84402..868576cd0 100644 --- a/python/tests/graphql/test_nodes_property_filter.py +++ b/python/tests/graphql/test_nodes_property_filter.py @@ -11,27 +11,73 @@ def create_test_graph(g): - g.add_node(1, "a", properties={"prop1": 60, "prop2": 31.3, "prop3": "abc123", "prop4": True, "prop5": [1, 2, 3]}) - g.add_node(1, "b", properties={"prop1": 10, "prop2": 31.3, "prop3": "abc223", "prop4": False}) - g.add_node(1, "c", properties={"prop1": 20, "prop2": 31.3, "prop3": "abc333", "prop4": True, "prop5": [5, 6, 7]}) - g.add_node(1, "d", properties={"prop1": 30, "prop2": 31.3, "prop3": "abc444", "prop4": False}) + g.add_node( + 1, + "a", + properties={ + "prop1": 60, + "prop2": 31.3, + "prop3": "abc123", + "prop4": True, + "prop5": [1, 2, 3], + }, + ) + g.add_node( + 1, + "b", + properties={"prop1": 10, "prop2": 31.3, "prop3": "abc223", "prop4": False}, + ) + g.add_node( + 1, + "c", + properties={ + "prop1": 20, + "prop2": 31.3, + "prop3": "abc333", + "prop4": True, + "prop5": [5, 6, 7], + }, + ) + g.add_node( + 1, + "d", + properties={"prop1": 30, "prop2": 31.3, "prop3": "abc444", "prop4": False}, + ) g.add_edge( 2, "a", "d", - properties={"eprop1": 60, "eprop2": 0.4, "eprop3": "xyz123", "eprop4": True, "eprop5": [1, 2, 3]}, + properties={ + "eprop1": 60, + "eprop2": 0.4, + "eprop3": "xyz123", + "eprop4": True, + "eprop5": [1, 2, 3], + }, ) g.add_edge( 2, "b", "d", - properties={"eprop1": 10, "eprop2": 1.7, "eprop3": "xyz123", "eprop4": True, "eprop5": [3, 4, 5]}, + properties={ + "eprop1": 10, + "eprop2": 1.7, + "eprop3": "xyz123", + "eprop4": True, + "eprop5": [3, 4, 5], + }, ) g.add_edge( 2, "c", "d", - properties={"eprop1": 30, "eprop2": 6.4, "eprop3": "xyz123", "eprop4": False, "eprop5": [10]}, + properties={ + "eprop1": 30, + "eprop2": 6.4, + "eprop3": "xyz123", + "eprop4": False, + "eprop5": [10], + }, ) return g @@ -64,7 +110,9 @@ def run_graphql_error_test(query, expected_error_message, graph): match = re.search(r'"message":"(.*?)"', full_error_message) error_message = match.group(1) if match else "" - assert error_message == expected_error_message, f"Expected '{expected_error_message}', but got '{error_message}'" + assert ( + error_message == expected_error_message + ), f"Expected '{expected_error_message}', but got '{error_message}'" @pytest.mark.parametrize("graph", [Graph, PersistentGraph]) @@ -88,19 +136,7 @@ def test_node_property_filter_equal(graph): } } """ - expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - { - "name": "a" - } - ] - } - } - } - } + expected_output = {"graph": {"nodes": {"nodeFilter": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -175,16 +211,7 @@ def test_node_property_filter_not_equal(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -259,17 +286,7 @@ def test_node_property_filter_greater_than_or_equal(graph): } } """ - expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "a"} - ] - } - } - } - } + expected_output = {"graph": {"nodes": {"nodeFilter": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -346,13 +363,7 @@ def test_node_property_filter_less_than_or_equal(graph): expected_output = { "graph": { "nodes": { - "nodeFilter": { - "list": [ - {"name": "b"}, - {"name": "c"}, - {"name": "d"} - ] - } + "nodeFilter": {"list": [{"name": "b"}, {"name": "c"}, {"name": "d"}]} } } } @@ -429,17 +440,7 @@ def test_node_property_filter_greater_than(graph): } } """ - expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "a"} - ] - } - } - } - } + expected_output = {"graph": {"nodes": {"nodeFilter": {"list": [{"name": "a"}]}}}} run_graphql_test(query, expected_output, graph()) @@ -514,16 +515,7 @@ def test_node_property_filter_less_than(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "b"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "b"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -598,16 +590,7 @@ def test_node_property_filter_is_none(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -633,16 +616,7 @@ def test_node_property_filter_is_some(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "a"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "a"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -669,16 +643,7 @@ def test_node_property_filter_any(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "b"}, - {"name": "d"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "b"}, {"name": "d"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -704,15 +669,7 @@ def test_node_property_filter_any_empty_list(graph): } } """ - expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [] - } - } - } - } + expected_output = {"graph": {"nodes": {"nodeFilter": {"list": []}}}} run_graphql_test(query, expected_output, graph()) @@ -787,16 +744,7 @@ def test_node_property_filter_not_any(graph): } """ expected_output = { - "graph": { - "nodes": { - "nodeFilter": { - "list": [ - {"name": "a"}, - {"name": "c"} - ] - } - } - } + "graph": {"nodes": {"nodeFilter": {"list": [{"name": "a"}, {"name": "c"}]}}} } run_graphql_test(query, expected_output, graph()) @@ -826,12 +774,7 @@ def test_node_property_filter_not_any_empty_list(graph): "graph": { "nodes": { "nodeFilter": { - "list": [ - {"name": "a"}, - {"name": "b"}, - {"name": "c"}, - {"name": "d"} - ] + "list": [{"name": "a"}, {"name": "b"}, {"name": "c"}, {"name": "d"}] } } } diff --git a/python/tests/test_algorithms.py b/python/tests/test_algorithms.py index 47112b169..ba90487ab 100644 --- a/python/tests/test_algorithms.py +++ b/python/tests/test_algorithms.py @@ -2,7 +2,7 @@ import pandas as pd import pandas.core.frame -from raphtory import Graph, PersistentGraph +from raphtory import Graph from raphtory import algorithms from raphtory import graph_loader @@ -545,3 +545,171 @@ def test_max_weight_matching(): assert max_weight.dst(2).id == 3 assert max_weight.dst(3) is None + + +def test_fast_rp(): + g = Graph() + edges = [ + (1, 2, 1), + (1, 3, 1), + (2, 3, 1), + (4, 5, 1), + (4, 6, 1), + (4, 7, 1), + (5, 6, 1), + (5, 7, 1), + (6, 7, 1), + (6, 8, 1), + ] + for src, dst, ts in edges: + g.add_edge(ts, src, dst) + + result = algorithms.fast_rp(g, 16, 1.0, [1.0, 1.0], 42).get_all_with_names() + baseline = { + "7": [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + "6": [ + -1.6817928305074292, + 5.045378491522287, + -1.6817928305074292, + 0.0, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + "5": [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + "2": [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + "8": [ + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + 0.0, + 0.0, + ], + "3": [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + "1": [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + "4": [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + } + + assert result == baseline diff --git a/raphtory/src/algorithms/algorithm_result.rs b/raphtory/src/algorithms/algorithm_result.rs index 02272a6ce..92fe1fe2f 100644 --- a/raphtory/src/algorithms/algorithm_result.rs +++ b/raphtory/src/algorithms/algorithm_result.rs @@ -34,6 +34,13 @@ impl AsOrd<(OrderedFloat, OrderedFloat)> for (T, T) { } } +impl AsOrd>> for Vec { + fn as_ord(&self) -> &Vec> { + // Safety: OrderedFloat is #[repr(transparent)] and has no invalid values, i.e. there is no physical difference between OrderedFloat and Float. + unsafe { &*(self as *const Vec as *const Vec>) } + } +} + /// An 'AlgorithmRepr' struct that represents the string output in the terminal after running an algorithm. /// /// It returns the algorithm name, number of nodes in the graph, and the result type. diff --git a/raphtory/src/algorithms/embeddings/fast_rp.rs b/raphtory/src/algorithms/embeddings/fast_rp.rs new file mode 100644 index 000000000..bf50cf422 --- /dev/null +++ b/raphtory/src/algorithms/embeddings/fast_rp.rs @@ -0,0 +1,428 @@ +use crate::{ + algorithms::algorithm_result::AlgorithmResult, + core::{entities::VID, state::compute_state::ComputeStateVec}, + db::{ + api::view::{NodeViewOps, StaticGraphViewOps}, + task::{ + context::Context, + node::eval_node::EvalNodeView, + task::{ATask, Job, Step}, + task_runner::TaskRunner, + }, + }, +}; +use ordered_float::OrderedFloat; +use rand::prelude::*; +use rayon::prelude::*; +use std::sync::Arc; + +#[derive(Clone, Debug, Default)] +struct FastRPState { + embedding_state: Vec, +} + +/// Computes the embeddings of each vertex of a graph using the Fast RP algorithm +/// +/// # Arguments +/// +/// * `graph` - A reference to the graph +/// * `embedding_dim` - The size of the generated embeddings +/// * `normalization_strength` - The extent to which high-degree vertices should be discounted (range: 1-0) +/// * `iter_weights` - The scalar weights to apply to the results of each iteration +/// * `seed` - The seed for initialisation of random vectors +/// * `threads` - Number of threads to use +/// +/// # Returns: +/// +/// An AlgorithmResult containing the mapping from the node to its embedding +/// +pub fn fast_rp( + graph: &G, + embedding_dim: usize, + normalization_strength: f64, + iter_weights: Vec, + seed: Option, + threads: Option, +) -> AlgorithmResult, Vec>> +where + G: StaticGraphViewOps, +{ + let ctx: Context = graph.into(); + let m = graph.count_nodes() as f64; + let s = m.sqrt(); + let beta = normalization_strength - 1.0; + let num_iters = iter_weights.len() - 1; + let weights = Arc::new(iter_weights); + let seed = seed.unwrap_or(rand::thread_rng().gen()); + + // initialize each vertex with a random vector according to FastRP's construction rules + let step1 = { + let weights = Arc::clone(&weights); + ATask::new(move |vv| { + let l = ((vv.degree() as f64) / (m * 2.0)).powf(beta); + let choices = [ + (l * s.sqrt(), 1.0 / (s * 2.0)), + (0.0, 1.0 - (1.0 / s)), + (-l * s.sqrt(), 1.0 / (s * 2.0)), + ]; + let mut rng = SmallRng::seed_from_u64(vv.node.0 as u64 ^ seed); + let state: &mut FastRPState = vv.get_mut(); + state.embedding_state = (0..embedding_dim) + .map(|_| choices.choose_weighted(&mut rng, |item| item.1).unwrap().0 * weights[0]) + .collect(); + Step::Continue + }) + }; + + // sum each vector from neighbours and scale + let step2 = ATask::new(move |vv: &mut EvalNodeView| { + // for neighbor, for i, add neighbors.prev[i] to current state + // scale state by iteration weight + let weights = Arc::clone(&weights); + let ss = vv.eval_graph.ss; + // TODO: rewrite using iters? + for neighbour in vv.neighbours() { + for i in 0..embedding_dim { + vv.get_mut().embedding_state[i] += neighbour.prev().embedding_state[i]; + } + } + for value in vv.get_mut().embedding_state.iter_mut() { + *value *= weights[ss]; + } + + Step::Continue + }); + + let mut runner: TaskRunner = TaskRunner::new(ctx); + let results_type = std::any::type_name::(); + + let res = runner.run( + vec![Job::new(step1)], + vec![Job::read_only(step2)], + None, + |_, _, _, local: Vec| { + graph + .nodes() + .par_iter() + .map(|node| { + let VID(id) = node.node; + let embedding = local[id].embedding_state.clone(); + (id, embedding) + }) + .collect() + }, + threads, + num_iters, + None, + None, + ); + + // TODO: add flag to optionally normalize results + + AlgorithmResult::new(graph.clone(), "Fast RP", results_type, res) +} + +#[cfg(test)] +mod fast_rp_test { + use super::*; + use crate::{db::api::mutation::AdditionOps, prelude::*, test_storage}; + use std::collections::HashMap; + + #[test] + fn simple_fast_rp_test() { + let graph = Graph::new(); + + let edges = vec![ + (1, 2, 1), + (1, 3, 1), + (2, 3, 1), + (4, 5, 1), + (4, 6, 1), + (4, 7, 1), + (5, 6, 1), + (5, 7, 1), + (6, 7, 1), + (6, 8, 1), + ]; + + for (src, dst, ts) in edges { + graph.add_edge(ts, src, dst, NO_PROPS, None).unwrap(); + } + + let baseline = HashMap::from([ + ( + "7", + [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + ), + ( + "6", + [ + -1.6817928305074292, + 5.045378491522287, + -1.6817928305074292, + 0.0, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + ), + ( + "5", + [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + ), + ( + "2", + [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + ), + ( + "8", + [ + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + 0.0, + 0.0, + ], + ), + ( + "3", + [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + ), + ( + "1", + [ + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 0.0, + 0.0, + 3.3635856610148585, + 1.6817928305074292, + 1.6817928305074292, + 3.3635856610148585, + -3.3635856610148585, + 0.0, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + -3.3635856610148585, + ], + ), + ( + "4", + [ + 0.0, + 3.3635856610148585, + -1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + 1.6817928305074292, + 0.0, + -1.6817928305074292, + 0.0, + 0.0, + -1.6817928305074292, + 1.6817928305074292, + 1.6817928305074292, + -1.6817928305074292, + -1.6817928305074292, + ], + ), + ]); + + test_storage!(&graph, |graph| { + let results = + fast_rp(graph, 16, 1.0, vec![1.0, 1.0], Some(42), None).get_all_with_names(); + for (v_id, embedding) in results { + assert_eq!(embedding, *baseline.get(v_id.as_str()).unwrap()); + } + }); + } + + // NOTE(Wyatt): the simple fast_rp test is more of a validation of idempotency than correctness (although the results are expected) + // This test-- in progress-- is going to validate that the algorithm preserves the pairwise topological distances + /* + use crate::io::csv_loader::CsvLoader; + use serde::{Deserialize, Serialize}; + use std::path::PathBuf; + + fn print_samples(map: &HashMap>, n: usize) { + let mut count = 0; + + for (key, value) in map { + println!("Key: {}, Value: {:#?}", key, value); + + count += 1; + if count >= n { + break; + } + } + } + + fn top_k_neighbors( + data: &HashMap>, + k: usize, + ) -> HashMap> { + let mut neighbors: HashMap> = HashMap::new(); + + // Iterate over each ID to find its top K neighbors + for (id, vector) in data { + // Collect distances to all other IDs + let mut distances: Vec<(&String, f64)> = Vec::new(); + for (other_id, other_vector) in data { + if id == other_id { + continue; // Skip self + } + // Compute Euclidean distance + let distance = euclidean_distance(vector, other_vector); + distances.push((other_id, distance)); + } + // Sort the distances in ascending order + distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); + // Collect top K neighbor IDs + let top_k: Vec = distances + .iter() + .take(k) + .map(|(other_id, _)| (*other_id).clone()) + .collect(); + // Insert into the neighbors map + neighbors.insert(id.clone(), top_k); + } + + neighbors + } + + fn euclidean_distance(a: &Vec, b: &Vec) -> f64 { + assert_eq!(a.len(), b.len(), "Vectors must be of the same length"); + a.iter() + .zip(b.iter()) + .map(|(&x, &y)| (x - y).powi(2)) + .sum::() + .sqrt() + } + + #[test] + fn big_fast_rp_test() { + let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + d.push("resources/test"); + let loader = CsvLoader::new(d.join("test.csv")).set_delimiter(","); + let graph = Graph::new(); + + #[derive(Deserialize, Serialize, Debug)] + struct CsvEdge { + src: u64, + dst: u64, + } + + loader + .load_into_graph(&graph, |e: CsvEdge, g| { + g.add_edge(1, e.src, e.dst, NO_PROPS, None).unwrap(); + g.add_edge(1, e.dst, e.src, NO_PROPS, None).unwrap(); + }) + .unwrap(); + + test_storage!(&graph, |graph| { + let results = fast_rp( + graph, + 32, + 1.0, + vec![1.0, 1.0, 0.5], + Some(42), + None, + ).get_all_with_names(); + // println!("Result: {:#?}", results); + print_samples(&results, 10); + }); + } + */ +} diff --git a/raphtory/src/algorithms/embeddings/mod.rs b/raphtory/src/algorithms/embeddings/mod.rs new file mode 100644 index 000000000..98e406485 --- /dev/null +++ b/raphtory/src/algorithms/embeddings/mod.rs @@ -0,0 +1,3 @@ +mod fast_rp; + +pub use fast_rp::fast_rp; diff --git a/raphtory/src/algorithms/mod.rs b/raphtory/src/algorithms/mod.rs index 8f21dd870..91710fc55 100644 --- a/raphtory/src/algorithms/mod.rs +++ b/raphtory/src/algorithms/mod.rs @@ -34,6 +34,7 @@ pub mod bipartite; pub mod components; pub mod cores; pub mod dynamics; +pub mod embeddings; pub mod layout; pub mod metrics; pub mod motifs; diff --git a/raphtory/src/python/graph/algorithm_result.rs b/raphtory/src/python/graph/algorithm_result.rs index fe08e3acd..7429b5020 100644 --- a/raphtory/src/python/graph/algorithm_result.rs +++ b/raphtory/src/python/graph/algorithm_result.rs @@ -312,6 +312,19 @@ py_algorithm_result_new_ord_hash_eq!( Vec<(i64, String)> ); +py_algorithm_result!( + AlgorithmResultVecF64, + DynamicGraph, + Vec, + Vec> +); +py_algorithm_result_partial_ord!( + AlgorithmResultVecF64, + DynamicGraph, + Vec, + Vec> +); + py_algorithm_result!(AlgorithmResultUsize, DynamicGraph, usize, usize); py_algorithm_result_new_ord_hash_eq!(AlgorithmResultUsize, DynamicGraph, usize, usize); diff --git a/raphtory/src/python/packages/algorithms.rs b/raphtory/src/python/packages/algorithms.rs index 5c6ff5b41..875bf5777 100644 --- a/raphtory/src/python/packages/algorithms.rs +++ b/raphtory/src/python/packages/algorithms.rs @@ -15,6 +15,7 @@ use crate::{ }, components, dynamics::temporal::epidemics::{temporal_SEIR as temporal_SEIR_rs, Infected, SeedError}, + embeddings::fast_rp as fast_rp_rs, layout::{ cohesive_fruchterman_reingold::cohesive_fruchterman_reingold as cohesive_fruchterman_reingold_rs, fruchterman_reingold::fruchterman_reingold_unbounded as fruchterman_reingold_rs, @@ -911,3 +912,35 @@ pub fn max_weight_matching( verify_optimum_flag, ) } + +/// Computes embedding vectors for each vertex of an undirected/bidirectional graph according to the Fast RP algorithm. +/// Original Paper: https://doi.org/10.48550/arXiv.1908.11512 +/// Arguments: +/// g (GraphView): The graph view on which embeddings are generated. +/// embedding_dim (int): The size (dimension) of the generated embeddings. +/// normalization_strength (float): The extent to which high-degree vertices should be discounted (range: 1-0) +/// iter_weights (list[float]): The scalar weights to apply to the results of each iteration +/// seed (int, optional): The seed for initialisation of random vectors +/// threads (int, optional): The number of threads to be used for parallel execution. +/// +/// Returns: +/// AlgorithmResult: Vertices mapped to their corresponding embedding vectors +#[pyfunction] +#[pyo3[signature = (g, embedding_dim, normalization_strength, iter_weights, seed=None, threads=None)]] +pub fn fast_rp( + g: &PyGraphView, + embedding_dim: usize, + normalization_strength: f64, + iter_weights: Vec, + seed: Option, + threads: Option, +) -> AlgorithmResult, Vec>> { + fast_rp_rs( + &g.graph, + embedding_dim, + normalization_strength, + iter_weights, + seed, + threads, + ) +} diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 792b6c8c0..9422667da 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -93,6 +93,7 @@ pub fn base_algorithm_module(py: Python<'_>) -> Result, PyErr> { in_component, out_components, out_component, + fast_rp, global_temporal_three_node_motif, global_temporal_three_node_motif_multi, local_temporal_three_node_motifs,