From caa89273505ffbe97afe46479bf16035ab8c918e Mon Sep 17 00:00:00 2001 From: Shivam <4599890+shivam-880@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:14:04 +0100 Subject: [PATCH] Tidy/ingestor (#1714) * fix dfview * fix more tests * fmt * fix time, src, dst order * const_prop rename * replace layer, layer_in_df with layer_name and layer_col * replace node_type_in_df with node_type_col * fix notebook * Attemted merge * fmt * missed a const * forgot const * fixed tests, added node types * Fix test * Added extra tests for node_type and layer - changed function name * update stubs + black * fixed after merge --------- Co-authored-by: Shivam Kapoor <4599890+iamsmkr@users.noreply.github.com> Co-authored-by: miratepuffin --- examples/python/socio-patterns/example.ipynb | 66 +- python/python/raphtory/__init__.pyi | 1366 ++++++----------- .../python/raphtory/algorithms/__init__.pyi | 25 +- .../python/raphtory/graph_loader/__init__.pyi | 11 +- python/python/raphtory/graphql/__init__.pyi | 40 +- python/python/raphtory/vectors/__init__.pyi | 32 +- python/tests/test_algorithms.py | 18 +- python/tests/test_disk_graph.py | 21 +- python/tests/test_graph_conversions.py | 107 +- python/tests/test_graphql.py | 658 ++++++-- python/tests/test_iterables.py | 2 +- python/tests/test_load_from_pandas.py | 737 ++++++--- python/tests/test_load_from_parquet.py | 632 ++++++-- raphtory-cypher/src/lib.rs | 2 +- raphtory/src/core/utils/errors.rs | 2 + raphtory/src/disk_graph/mod.rs | 4 +- raphtory/src/io/arrow/df_loaders.rs | 239 +-- raphtory/src/io/arrow/mod.rs | 14 +- raphtory/src/io/arrow/prop_handler.rs | 31 +- raphtory/src/io/parquet_loaders.rs | 106 +- raphtory/src/python/graph/disk_graph.rs | 10 +- raphtory/src/python/graph/graph.rs | 404 ++--- .../src/python/graph/graph_with_deletions.rs | 448 ++---- .../src/python/graph/io/pandas_loaders.rs | 103 +- 24 files changed, 2755 insertions(+), 2323 deletions(-) diff --git a/examples/python/socio-patterns/example.ipynb b/examples/python/socio-patterns/example.ipynb index 833be8e6c8..b727f28446 100644 --- a/examples/python/socio-patterns/example.ipynb +++ b/examples/python/socio-patterns/example.ipynb @@ -99,13 +99,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "16de15732c834eafb018c88c0b052c00", + "model_id": "ec2b2e5c92b54503940f06b3c30f184a", "version_major": 2, "version_minor": 0 }, @@ -120,7 +120,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Graph(number_of_nodes=22, number_of_edges=290, number_of_temporal_edges=3196, earliest_time=1560419400000, latest_time=1562756700000)\n" + "Graph(number_of_nodes=22, number_of_edges=290, number_of_temporal_edges=6392, earliest_time=1560419400000, latest_time=1562756700000)\n" ] } ], @@ -130,7 +130,7 @@ " src=\"Actor\",\n", " dst=\"Recipient\",\n", " time=\"DateTime\",\n", - " layer=\"Behavior\",\n", + " layer_col=\"Behavior\",\n", " properties=[\"Weight\"],\n", ")\n", "print(g)" @@ -149,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -159,8 +159,8 @@ "Stats on the graph structure:\n", "Number of nodes (Baboons): 22\n", "Number of unique edges (src,dst,layer): 290\n", - "Total interactions (edge updates): 3196\n", - "Unique layers: ['_default', 'Grooming', 'Resting', 'Presenting', 'Playing with', 'Grunting-Lipsmacking', 'Supplanting', 'Threatening', 'Submission', 'Touching', 'Avoiding', 'Attacking', 'Carrying', 'Embracing', 'Mounting', 'Copulating', 'Chasing'] \n", + "Total interactions (edge updates): 6392\n", + "Unique layers: ['_default', 'Behavior', 'Grooming', 'Resting', 'Presenting', 'Playing with', 'Grunting-Lipsmacking', 'Supplanting', 'Threatening', 'Submission', 'Touching', 'Avoiding', 'Attacking', 'Carrying', 'Embracing', 'Mounting', 'Copulating', 'Chasing'] \n", "\n", "Stats on the graphs time range:\n", "Earliest datetime: 2019-06-13 09:50:00+00:00\n", @@ -213,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -226,11 +226,11 @@ "\n", "Getting individual nodes and edges:\n", "Node(name=LOME, earliest_time=1560419520000, latest_time=1562756100000)\n", - "Edge(source=LOME, target=NEKKE, earliest_time=1560421080000, latest_time=1562755980000, properties={Weight: 1}) \n", + "Edge(source=LOME, target=NEKKE, earliest_time=1560421080000, latest_time=1562755980000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}) \n", "\n", "Getting iterators over all nodes and edges:\n", "[Node(name=ANGELE, earliest_time=1560419400000, latest_time=1562754600000), Node(name=FELIPE, earliest_time=1560419400000, latest_time=1562756700000), Node(name=LIPS, earliest_time=1560419460000, latest_time=1562756700000), Node(name=NEKKE, earliest_time=1560419520000, latest_time=1562756700000), Node(name=LOME, earliest_time=1560419520000, latest_time=1562756100000)]\n", - "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1}), Edge(source=LOME, target=FEYA, earliest_time=1560421260000, latest_time=1562328420000, properties={Weight: 1}), Edge(source=VIOLETTE, target=LIPS, earliest_time=1560423600000, latest_time=1560423600000, properties={Weight: -1})]\n" + "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=FELIPE, target=ANGELE, earliest_time=1560419460000, latest_time=1562754600000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=FELIPE, target=LIPS, earliest_time=1560419460000, latest_time=1562251080000, properties={Weight: 1, Weight: 1, Weight: 1})]\n" ] } ], @@ -263,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -307,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "scrolled": true }, @@ -318,7 +318,7 @@ "text": [ "FELIPE has 17 incoming interactions and 18 outgoing interactions.\n", "\n", - "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1}), Edge(source=LIPS, target=FELIPE, earliest_time=1560423600000, latest_time=1562756700000, properties={Weight: 1}), Edge(source=NEKKE, target=FELIPE, earliest_time=1560443040000, latest_time=1562596380000, properties={Weight: 1})]\n", + "[Edge(source=ANGELE, target=FELIPE, earliest_time=1560419400000, latest_time=1562753640000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=LIPS, target=FELIPE, earliest_time=1560423600000, latest_time=1562756700000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1}), Edge(source=NEKKE, target=FELIPE, earliest_time=1560443040000, latest_time=1562596380000, properties={Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1, Weight: 1})]\n", "[Node(name=ANGELE, earliest_time=1560419400000, latest_time=1562754600000), Node(name=LIPS, earliest_time=1560419460000, latest_time=1562756700000), Node(name=NEKKE, earliest_time=1560419520000, latest_time=1562756700000)] \n", "\n", "FELIPE interacted with the following baboons ['ANGELE', 'LIPS', 'NEKKE', 'LOME', 'BOBO', 'ATMOSPHERE', 'FEYA', 'FANA', 'PIPO', 'MUSE', 'MAKO', 'MALI', 'PETOULETTE', 'ARIELLE', 'HARLEM', 'VIOLETTE', 'EWINE', 'SELF']\n" @@ -361,7 +361,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -371,18 +371,18 @@ "output_type": "stream", "text": [ "Update history per layer:\n", + "FELIPE interacted with MAKO with the following behaviour 'Behavior' at this times: [1560437400000, 1560437640000, 1560935460000, 1561043280000, 1561043280000, 1561043340000, 1561117620000, 1561373880000, 1561373880000, 1561373940000, 1561373940000, 1561373940000, 1561373940000, 1561373940000, 1561390860000, 1561390860000, 1561390860000, 1561390920000, 1561643580000, 1561717080000, 1561717140000, 1561970760000, 1562148960000, 1562148960000, 1562149020000, 1562149020000, 1562149080000, 1562671020000]\n", "FELIPE interacted with MAKO with the following behaviour 'Grooming' at this times: [1561043280000, 1561043340000]\n", "FELIPE interacted with MAKO with the following behaviour 'Resting' at this times: [1560437400000, 1560437640000, 1560935460000, 1561117620000, 1561373880000, 1561390860000, 1561390860000, 1561390860000, 1561643580000, 1561970760000, 1562149020000, 1562671020000]\n", "FELIPE interacted with MAKO with the following behaviour 'Playing with' at this times: [1561373880000, 1561373940000, 1561373940000, 1561390920000, 1562148960000, 1562148960000, 1562149080000]\n", "FELIPE interacted with MAKO with the following behaviour 'Grunting-Lipsmacking' at this times: [1561373940000, 1561717080000, 1561717140000]\n", - "FELIPE interacted with MAKO with the following behaviour 'Touching' at this times: [1562149020000]\n", "\n", "Individual updates as edges:\n", + "At 2019-06-13 14:50:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "At 2019-06-13 14:50:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", + "At 2019-06-13 14:54:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "At 2019-06-13 14:54:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", - "At 2019-06-19 09:11:00+00:00 FELIPE interacted with MAKO in the following manner: 'Resting'\n", - "At 2019-06-20 15:08:00+00:00 FELIPE interacted with MAKO in the following manner: 'Carrying'\n", - "At 2019-06-20 15:08:00+00:00 FELIPE interacted with MAKO in the following manner: 'Grooming'\n", + "At 2019-06-19 09:11:00+00:00 FELIPE interacted with MAKO in the following manner: 'Behavior'\n", "...\n", "\n", "Individual updates for 'Touching' and 'Carrying:\n", @@ -435,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "scrolled": true }, @@ -485,15 +485,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Felipe's favourite baboons in descending order are [('NEKKE', 41), ('ANGELE', 31), ('MAKO', 26), ('LOME', 23), ('LIPS', 11), ('HARLEM', 10), ('FANA', 8), ('MALI', 6), ('FEYA', 5), ('ARIELLE', 5), ('EWINE', 5), ('PIPO', 3), ('SELF', 2), ('BOBO', 1), ('ATMOSPHERE', 1), ('PETOULETTE', 1), ('VIOLETTE', 1), ('MUSE', -1)]\n", - "EXTERNE is the most annoying monkey with an average score of -2.0\n" + "Felipe's favourite baboons in descending order are [('NEKKE', 82), ('ANGELE', 62), ('MAKO', 52), ('LOME', 46), ('LIPS', 22), ('HARLEM', 20), ('FANA', 16), ('MALI', 12), ('FEYA', 10), ('ARIELLE', 10), ('EWINE', 10), ('PIPO', 6), ('SELF', 4), ('BOBO', 2), ('ATMOSPHERE', 2), ('PETOULETTE', 2), ('VIOLETTE', 2), ('MUSE', -2)]\n", + "EXTERNE is the most annoying monkey with an average score of -4.0\n" ] } ], @@ -547,16 +547,16 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Across the full dataset LOME interacted with NEKKE 41 times\n", - "Between None and 2019-06-13 12:17:19+00:00, LOME interacted with NEKKE 8 times\n", - "Window start: 2019-06-13 00:00:00+00:00, First update: 2019-06-13 10:18:00+00:00, Last update: 2019-06-13 15:05:00+00:00, Window End: 2019-06-14 00:00:00+00:00\n" + "Across the full dataset LOME interacted with NEKKE 82 times\n", + "Between None and 2019-06-13 12:17:19+00:00, LOME interacted with NEKKE 16 times\n", + "Window start: 2019-06-12 23:00:00+00:00, First update: 2019-06-13 10:18:00+00:00, Last update: 2019-06-13 15:05:00+00:00, Window End: 2019-06-13 23:00:00+00:00\n" ] } ], @@ -601,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -646,14 +646,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Total weight across all edges is 2948.\n", + "Total weight across all edges is 5896.\n", "Total weight across Grooming and Resting is 1685.\n", "Total weight across Grooming and Resting between 2019-06-13 00:00:00 and 2019-06-20 00:00:00 is 403.\n" ] @@ -701,7 +701,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -759,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -801,7 +801,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -848,7 +848,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "metadata": {}, "outputs": [ { diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 2bc0a333fd..d2fa738250 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -8,10 +8,8 @@ ############################################################################### class AlgorithmResult: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ Returns the value corresponding to the provided key @@ -19,7 +17,6 @@ class AlgorithmResult: Arguments: key: The key of type `H` for which the value is to be retrieved. """ - def get_all(self): """ Returns a Dict containing all the nodes (as keys) and their corresponding values (values) or none. @@ -27,10 +24,8 @@ class AlgorithmResult: Returns: A dict of nodes and their values """ - def get_all_values(self): """Returns a a list of all values""" - def get_all_with_names(self): """ Returns a dict with node names and values @@ -38,7 +33,6 @@ class AlgorithmResult: Returns: a dict with node names and values """ - def group_by(self): """ Groups the `AlgorithmResult` by its values. @@ -47,16 +41,12 @@ class AlgorithmResult: A `HashMap` where keys are unique values from the `AlgorithmResult` and values are vectors containing keys of type `H` that share the same value. """ - def max(self): """Returns a tuple of the max result with its key""" - def median(self): """Returns a tuple of the median result with its key""" - def min(self): """Returns a tuple of the min result with its key""" - def sort_by_node(self, reverse=True): """ Sorts by node id in ascending or descending order. @@ -67,7 +57,6 @@ class AlgorithmResult: Returns: A sorted list of tuples containing node names and values. """ - def sort_by_node_name(self, reverse=True): """ The function `sort_by_node_name` sorts a vector of tuples containing a node and an optional @@ -81,7 +70,6 @@ class AlgorithmResult: Returns: The function sort_by_node_name returns a vector of tuples. Each tuple contains a Node and value """ - def sort_by_value(self, reverse=True): """ Sorts the `AlgorithmResult` by its values in ascending or descending order. @@ -92,7 +80,6 @@ class AlgorithmResult: Returns: A sorted vector of tuples containing keys of type `H` and values of type `Y`. """ - def to_df(self): """ Creates a dataframe from the result @@ -100,10 +87,8 @@ class AlgorithmResult: Returns: A `pandas.DataFrame` containing the result """ - def to_string(self): """Returns a formatted string representation of the algorithm.""" - def top_k(self, k, percentage=False, reverse=True): """ Retrieves the top-k elements from the `AlgorithmResult` based on its values. @@ -125,14 +110,12 @@ class ConstProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """ as_dict() -> dict[str, Any] convert the properties view to a python dict """ - def get(self, key): """ get(key: str) -> Any | None @@ -142,21 +125,18 @@ class ConstProperties: get property value by key (returns `None` if key does not exist) """ - def items(self): """ items() -> list[tuple[str, Any]] lists the property keys together with the corresponding value """ - def keys(self): """ keys() -> list[str] lists the available property keys """ - def values(self): """ values() -> list[Any] @@ -164,6 +144,34 @@ class ConstProperties: lists the property values """ +class DiskGraphStorage: + def __init__(self): + """Initialize self. See help(type(self)) for accurate signature.""" + def graph_dir(self): ... + @staticmethod + def load_from_dir(graph_dir): ... + @staticmethod + def load_from_pandas(graph_dir, edge_df, time_col, src_col, dst_col): ... + @staticmethod + def load_from_parquets( + graph_dir, + layer_parquet_cols, + node_properties, + chunk_size, + t_props_chunk_size, + read_chunk_size, + concurrent_files, + num_threads, + node_type_col, + ): ... + def merge_by_sorted_gids(self, other, graph_dir): + """ + Merge this graph with another `DiskGraph`. Note that both graphs should have nodes that are + sorted by their global ids or the resulting graph will be nonsense! + """ + def to_events(self): ... + def to_persistent(self): ... + class Edge: """ PyEdge is a Python class that represents an edge in the graph. @@ -172,7 +180,6 @@ class Edge: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -183,7 +190,6 @@ class Edge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -194,7 +200,6 @@ class Edge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -205,7 +210,6 @@ class Edge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -214,14 +218,12 @@ class Edge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -229,7 +231,6 @@ class Edge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -237,11 +238,9 @@ class Edge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -250,7 +249,6 @@ class Edge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -259,7 +257,6 @@ class Edge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -268,7 +265,6 @@ class Edge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -277,7 +273,6 @@ class Edge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -289,7 +284,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -301,7 +295,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -311,7 +304,6 @@ class Edge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -321,7 +313,6 @@ class Edge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -334,16 +325,11 @@ class Edge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -352,7 +338,6 @@ class Edge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -361,20 +346,15 @@ class Edge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -383,7 +363,6 @@ class Edge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -392,7 +371,6 @@ class Edge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -401,7 +379,6 @@ class Edge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -410,7 +387,6 @@ class Edge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -419,7 +395,6 @@ class Edge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -431,11 +406,9 @@ class Edge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -444,7 +417,6 @@ class Edge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -458,7 +430,6 @@ class Edge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -468,7 +439,6 @@ class Edge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -479,7 +449,6 @@ class Edge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -487,11 +456,9 @@ class Edge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -500,7 +467,6 @@ class Edge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -509,7 +475,6 @@ class Edge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -518,7 +483,6 @@ class Edge: Returns: (int) The time of an exploded edge """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -530,7 +494,6 @@ class Edge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -542,17 +505,15 @@ class Edge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class Edges: """A list of edges that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Edges including all events after `start` (exclusive). @@ -563,7 +524,6 @@ class Edges: Returns: A Edges object. """ - def at(self, time): """ Create a view of the Edges including all events at `time`. @@ -574,7 +534,6 @@ class Edges: Returns: A Edges object. """ - def before(self, end): """ Create a view of the Edges including all events before `end` (exclusive). @@ -585,7 +544,6 @@ class Edges: Returns: A Edges object. """ - def collect(self): """ Collect all edges into a list @@ -593,10 +551,8 @@ class Edges: Returns: list[Edge]: the list of edges """ - def count(self): """Returns the number of edges""" - @property def date_time(self): """ @@ -605,14 +561,12 @@ class Edges: Returns: A list of date times. """ - def default_layer(self): """ Return a view of Edges containing only the default edge layer Returns: Edges: The layered view """ - def deletions(self): """ Returns all timestamps of edges where an edge is deleted @@ -620,7 +574,6 @@ class Edges: Returns: A list of lists of unix timestamps """ - def deletions_date_time(self): """ Returns all timestamps of edges where an edge is deleted @@ -628,11 +581,9 @@ class Edges: Returns: A list of lists of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -641,7 +592,6 @@ class Edges: Returns: Earliest date time of the edges. """ - @property def earliest_time(self): """ @@ -650,7 +600,6 @@ class Edges: Returns: Earliest time of the edges. """ - @property def end(self): """ @@ -659,7 +608,6 @@ class Edges: Returns: The latest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def end_date_time(self): """ @@ -668,7 +616,6 @@ class Edges: Returns: The latest datetime that this Edges is valid or None if the Edges is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -680,7 +627,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -692,7 +638,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edges containing all layers except the excluded `name` @@ -702,7 +647,6 @@ class Edges: Returns: Edges: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edges containing all layers except the excluded `names` @@ -712,7 +656,6 @@ class Edges: Returns: Edges: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -725,16 +668,11 @@ class Edges: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edges has the layer `"name"`""" - + """Check if Edges has the layer `"name"`""" def history(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -743,7 +681,6 @@ class Edges: A list of lists unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of edges, when an edge is added or change to an edge is made. @@ -752,20 +689,15 @@ class Edges: A list of lists of timestamps. """ - @property def id(self): """Returns all ids of the edges.""" - def is_deleted(self): """Check if the edges are deleted""" - def is_self_loop(self): """Check if the edges are on the same node""" - def is_valid(self): """Check if the edges are valid (i.e. not deleted)""" - @property def latest_date_time(self): """ @@ -774,7 +706,6 @@ class Edges: Returns: Latest date time of the edges. """ - @property def latest_time(self): """ @@ -783,7 +714,6 @@ class Edges: Returns: Latest time of the edges. """ - def layer(self, name): """ Return a view of Edges containing the layer `"name"` @@ -792,7 +722,6 @@ class Edges: Returns: Edges: The layered view """ - @property def layer_name(self): """ @@ -801,7 +730,6 @@ class Edges: Returns: The name of the layer """ - @property def layer_names(self): """ @@ -810,7 +738,6 @@ class Edges: Returns: A list of layer names """ - def layers(self, names): """ Return a view of Edges containing all layers `names` @@ -822,15 +749,12 @@ class Edges: Returns: Edges: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """Returns all properties of the edges""" - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -844,7 +768,6 @@ class Edges: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -854,7 +777,6 @@ class Edges: Returns: A Edges object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -865,7 +787,6 @@ class Edges: Returns: A Edges object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -873,11 +794,9 @@ class Edges: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -886,7 +805,6 @@ class Edges: Returns: The earliest time that this Edges is valid or None if the Edges is valid for all times. """ - @property def start_date_time(self): """ @@ -895,7 +813,6 @@ class Edges: Returns: The earliest datetime that this Edges is valid or None if the Edges is valid for all times. """ - @property def time(self): """ @@ -904,8 +821,9 @@ class Edges: Returns: Time of edge """ - - def to_df(self, include_property_history=True, convert_datetime=False, explode=False): + def to_df( + self, include_property_history=True, convert_datetime=False, explode=False + ): """ Converts the graph's edges into a Pandas DataFrame. @@ -924,7 +842,6 @@ class Edges: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - def valid_layers(self, names): """ Return a view of Edges containing all layers `names` @@ -936,7 +853,6 @@ class Edges: Returns: Edges: The layered view """ - def window(self, start, end): """ Create a view of the Edges including all events between `start` (inclusive) and `end` (exclusive) @@ -948,17 +864,15 @@ class Edges: Returns: r A Edges object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edges""" + """Get the window size (difference between start and end) for this Edges""" class Graph: """A temporal graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -969,7 +883,6 @@ class Graph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -984,7 +897,6 @@ class Graph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -997,7 +909,6 @@ class Graph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -1009,7 +920,6 @@ class Graph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -1020,7 +930,6 @@ class Graph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -1031,7 +940,6 @@ class Graph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -1042,18 +950,8 @@ class Graph: Returns: A GraphView object. """ - - def cache(self, path): - """ - Write Graph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - """ - + def bincode(self): + """Get bincode encoded graph""" def count_edges(self): """ Number of edges in the graph @@ -1061,7 +959,6 @@ class Graph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -1069,7 +966,6 @@ class Graph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -1077,26 +973,12 @@ class Graph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - - @staticmethod - def deserialise(bytes): - """ - Load Graph from serialised bytes. - - Arguments: - bytes (Bytes): The serialised bytes to decode - - Returns: - Graph - """ - @property def earliest_date_time(self): """ @@ -1105,7 +987,6 @@ class Graph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -1114,7 +995,6 @@ class Graph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -1126,7 +1006,6 @@ class Graph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -1135,7 +1014,6 @@ class Graph: Returns: the edges in the graph """ - @property def end(self): """ @@ -1144,7 +1022,6 @@ class Graph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -1153,7 +1030,6 @@ class Graph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1165,7 +1041,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1177,7 +1052,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -1188,7 +1062,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -1198,7 +1071,6 @@ class Graph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -1208,7 +1080,6 @@ class Graph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1221,7 +1092,6 @@ class Graph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -1230,7 +1100,6 @@ class Graph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -1239,7 +1108,9 @@ class Graph: Returns: the nodes that match the properties name and value """ - + @staticmethod + def from_bincode(bytes): + """Creates a graph from a bincode encoded graph""" def get_all_node_types(self): """ Returns all the node types in the graph. @@ -1247,7 +1118,6 @@ class Graph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -1259,10 +1129,8 @@ class Graph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -1273,7 +1141,6 @@ class Graph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -1289,7 +1156,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -1303,7 +1169,6 @@ class Graph: force (boolean) - An optional boolean flag indicating whether to force the import of the edges. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -1318,7 +1183,6 @@ class Graph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -1332,7 +1196,6 @@ class Graph: force (boolean) - An optional boolean flag indicating whether to force the import of the nodes. """ - def index(self): """ Indexes all node and edge properties. @@ -1342,7 +1205,6 @@ class Graph: Returns: GraphIndex - Returns a GraphIndex """ - def largest_connected_component(self): """ Gives the large connected component of a graph. @@ -1354,7 +1216,6 @@ class Graph: A raphtory graph, which essentially is a sub-graph of the graph `g` """ - @property def latest_date_time(self): """ @@ -1363,7 +1224,6 @@ class Graph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -1372,7 +1232,6 @@ class Graph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -1381,7 +1240,6 @@ class Graph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1393,23 +1251,16 @@ class Graph: Returns: GraphView: The layered view """ - - @staticmethod - def load_cached(path): - """ - Load Graph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - Graph - """ - - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edge_props_from_pandas( + self, + df, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from a Pandas DataFrame. @@ -1417,16 +1268,27 @@ class Graph: df (Dataframe): The Pandas DataFrame containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from parquet file @@ -1434,183 +1296,200 @@ class Graph: parquet_path (str): Parquet file or directory of Parquet files path containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_pandas( + self, + df, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Pandas DataFrame into the graph. Arguments: df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dateframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_parquet( + self, + parquet_path, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing edges + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ @staticmethod - def load_from_file(path): + def load_from_file(path, force=False): """ - Load Graph from a file. + Loads a graph from the given path. Arguments: - path (str): The path to the file. + path (str): The path to the graph. Returns: - Graph - """ - - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from a Pandas DataFrame. - - Args: - edge_df (pandas.DataFrame): The DataFrame containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. + Graph: The loaded graph. """ - - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from Parquet file. - - Args: - edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. - """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, + df, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a Pandas DataFrame. Arguments: df (Dataframe): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_parquet( + self, + parquet_path, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a parquet file. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas( + self, + df, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. Arguments: df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_parquet( + self, + parquet_path, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files containing the nodes - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -1618,7 +1497,6 @@ class Graph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -1629,7 +1507,6 @@ class Graph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -1638,10 +1515,10 @@ class Graph: Returns: the nodes in the graph """ - + def persist_as_disk_graph(self, graph_dir): + """save graph in disk_graph format and memory map the result""" def persistent_graph(self): """Get persistent graph""" - @property def properties(self): """ @@ -1651,7 +1528,6 @@ class Graph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -1665,23 +1541,16 @@ class Graph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ - Saves the Graph to the given path. + Saves the graph to the given path. Arguments: - path (str): The path to the file. - """ - - def serialise(self): - """ - Serialise Graph to bytes. + path (str): The path to the graph. Returns: - Bytes + None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -1691,7 +1560,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -1702,7 +1570,6 @@ class Graph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -1710,7 +1577,6 @@ class Graph: Arguments: """ - @property def start(self): """ @@ -1719,7 +1585,6 @@ class Graph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -1728,7 +1593,6 @@ class Graph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -1739,7 +1603,6 @@ class Graph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -1750,8 +1613,15 @@ class Graph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_disk_graph(self, graph_dir): ... + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -1769,8 +1639,18 @@ class Graph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs, + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -1794,11 +1674,9 @@ class Graph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -1809,7 +1687,6 @@ class Graph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -1821,8 +1698,16 @@ class Graph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -1837,7 +1722,6 @@ class Graph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -1849,13 +1733,9 @@ class Graph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" - - def write_updates(self): - """Persist the new updates by appending them to the cache file.""" + """Get the window size (difference between start and end) for this GraphView""" class GraphIndex: """ @@ -1866,8 +1746,9 @@ class GraphIndex: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - - def fuzzy_search_edges(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_edges( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for edges which match the given query. This uses Tantivy's fuzzy search. @@ -1881,8 +1762,9 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - - def fuzzy_search_nodes(self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0): + def fuzzy_search_nodes( + self, query, limit=25, offset=0, prefix=False, levenshtein_distance=0 + ): """ Searches for nodes which match the given query. This uses Tantivy's fuzzy search. If you would like to better understand the query syntax, please visit our documentation at https://docs.raphtory.com @@ -1897,7 +1779,6 @@ class GraphIndex: Returns: A list of nodes which match the query. The list will be empty if no nodes match. """ - def search_edges(self, query, limit=25, offset=0): """ Searches for edges which match the given query. This uses Tantivy's exact search. @@ -1910,7 +1791,6 @@ class GraphIndex: Returns: A list of edges which match the query. The list will be empty if no edges match the query. """ - def search_nodes(self, query, limit=25, offset=0): """ Searches for nodes which match the given query. This uses Tantivy's exact search. @@ -1925,10 +1805,8 @@ class GraphIndex: """ class MutableEdge: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties, layer=None): """ Add constant properties to an edge in the graph. @@ -1944,7 +1822,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None, layer=None): """ Add updates to an edge in the graph at a specified time. @@ -1959,7 +1836,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Edge including all events after `start` (exclusive). @@ -1970,7 +1846,6 @@ class MutableEdge: Returns: A Edge object. """ - def at(self, time): """ Create a view of the Edge including all events at `time`. @@ -1981,7 +1856,6 @@ class MutableEdge: Returns: A Edge object. """ - def before(self, end): """ Create a view of the Edge including all events before `end` (exclusive). @@ -1992,7 +1866,6 @@ class MutableEdge: Returns: A Edge object. """ - @property def date_time(self): """ @@ -2001,14 +1874,12 @@ class MutableEdge: Returns: (datetime) the datetime of an exploded edge """ - def default_layer(self): """ Return a view of Edge containing only the default edge layer Returns: Edge: The layered view """ - def deletions(self): """ Returns a list of timestamps of when an edge is deleted @@ -2016,7 +1887,6 @@ class MutableEdge: Returns: A list of unix timestamps """ - def deletions_data_time(self): """ Returns a list of timestamps of when an edge is deleted @@ -2024,11 +1894,9 @@ class MutableEdge: Returns: A list of DateTime objects """ - @property def dst(self): """Returns the destination node of the edge.""" - @property def earliest_date_time(self): """ @@ -2037,7 +1905,6 @@ class MutableEdge: Returns: the earliest datetime of an edge """ - @property def earliest_time(self): """ @@ -2046,7 +1913,6 @@ class MutableEdge: Returns: (int) The earliest time of an edge """ - @property def end(self): """ @@ -2055,7 +1921,6 @@ class MutableEdge: Returns: The latest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def end_date_time(self): """ @@ -2064,7 +1929,6 @@ class MutableEdge: Returns: The latest datetime that this Edge is valid or None if the Edge is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2076,7 +1940,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2088,7 +1951,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Edge containing all layers except the excluded `name` @@ -2098,7 +1960,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Edge containing all layers except the excluded `names` @@ -2108,7 +1969,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2121,16 +1981,11 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def explode(self): """Explodes an edge and returns all instances it had been updated as seperate edges""" - - def explode_layers(self): - ... - + def explode_layers(self): ... def has_layer(self, name): - """ Check if Edge has the layer `"name"`""" - + """Check if Edge has the layer `"name"`""" def history(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2139,7 +1994,6 @@ class MutableEdge: A list of unix timestamps. """ - def history_date_time(self): """ Returns a list of timestamps of when an edge is added or change to an edge is made. @@ -2148,20 +2002,15 @@ class MutableEdge: A list of timestamps. """ - @property def id(self): """The id of the edge.""" - def is_deleted(self): """Check if the edge is currently deleted""" - def is_self_loop(self): """Check if the edge is on the same node""" - def is_valid(self): """Check if the edge is currently valid (i.e., not deleted)""" - @property def latest_date_time(self): """ @@ -2170,7 +2019,6 @@ class MutableEdge: Returns: (datetime) the latest datetime of an edge """ - @property def latest_time(self): """ @@ -2179,7 +2027,6 @@ class MutableEdge: Returns: (int) The latest time of an edge """ - def layer(self, name): """ Return a view of Edge containing the layer `"name"` @@ -2188,7 +2035,6 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def layer_name(self): """ @@ -2197,7 +2043,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - @property def layer_names(self): """ @@ -2206,7 +2051,6 @@ class MutableEdge: Returns: (List) The name of the layer """ - def layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2218,11 +2062,9 @@ class MutableEdge: Returns: Edge: The layered view """ - @property def nbr(self): """Returns the node at the other end of the edge (same as `dst()` for out-edges and `src()` for in-edges)""" - @property def properties(self): """ @@ -2231,7 +2073,6 @@ class MutableEdge: Returns: Properties on the Edge. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2245,7 +2086,6 @@ class MutableEdge: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2255,7 +2095,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2266,7 +2105,6 @@ class MutableEdge: Returns: A Edge object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2274,11 +2112,9 @@ class MutableEdge: Arguments: """ - @property def src(self): """Returns the source node of the edge.""" - @property def start(self): """ @@ -2287,7 +2123,6 @@ class MutableEdge: Returns: The earliest time that this Edge is valid or None if the Edge is valid for all times. """ - @property def start_date_time(self): """ @@ -2296,7 +2131,6 @@ class MutableEdge: Returns: The earliest datetime that this Edge is valid or None if the Edge is valid for all times. """ - @property def time(self): """ @@ -2305,7 +2139,6 @@ class MutableEdge: Returns: (int) The time of an exploded edge """ - def update_constant_properties(self, properties, layer=None): """ Update constant properties of an edge in the graph overwriting existing values. @@ -2321,7 +2154,6 @@ class MutableEdge: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Edge containing all layers `names` @@ -2333,7 +2165,6 @@ class MutableEdge: Returns: Edge: The layered view """ - def window(self, start, end): """ Create a view of the Edge including all events between `start` (inclusive) and `end` (exclusive) @@ -2345,16 +2176,13 @@ class MutableEdge: Returns: r A Edge object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Edge""" + """Get the window size (difference between start and end) for this Edge""" class MutableNode: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Add constant properties to a node in the graph. @@ -2369,7 +2197,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def add_updates(self, t, properties=None): """ Add updates to a node in the graph at a specified time. @@ -2384,7 +2211,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2395,7 +2221,6 @@ class MutableNode: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2406,7 +2231,6 @@ class MutableNode: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2417,14 +2241,12 @@ class MutableNode: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2432,7 +2254,6 @@ class MutableNode: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2441,7 +2262,6 @@ class MutableNode: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2450,7 +2270,6 @@ class MutableNode: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2460,7 +2279,6 @@ class MutableNode: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2469,7 +2287,6 @@ class MutableNode: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2478,7 +2295,6 @@ class MutableNode: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2490,7 +2306,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2502,7 +2317,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2512,7 +2326,6 @@ class MutableNode: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2522,7 +2335,6 @@ class MutableNode: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2535,10 +2347,8 @@ class MutableNode: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2546,7 +2356,6 @@ class MutableNode: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2555,7 +2364,6 @@ class MutableNode: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2565,7 +2373,6 @@ class MutableNode: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -2573,7 +2380,6 @@ class MutableNode: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -2583,7 +2389,6 @@ class MutableNode: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -2593,7 +2398,6 @@ class MutableNode: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -2605,7 +2409,6 @@ class MutableNode: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -2614,7 +2417,6 @@ class MutableNode: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -2623,7 +2425,6 @@ class MutableNode: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -2635,7 +2436,6 @@ class MutableNode: Returns: Node: The layered view """ - @property def name(self): """ @@ -2644,7 +2444,6 @@ class MutableNode: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -2654,11 +2453,9 @@ class MutableNode: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -2666,7 +2463,6 @@ class MutableNode: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -2676,7 +2472,6 @@ class MutableNode: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -2686,7 +2481,6 @@ class MutableNode: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -2695,7 +2489,6 @@ class MutableNode: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -2709,7 +2502,6 @@ class MutableNode: Returns: A `WindowSet` object. """ - def set_node_type(self, new_type): """ Set the type on the node. This only works if the type has not been previously set, otherwise will @@ -2721,7 +2513,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -2731,7 +2522,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -2742,7 +2532,6 @@ class MutableNode: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -2750,7 +2539,6 @@ class MutableNode: Arguments: """ - @property def start(self): """ @@ -2759,7 +2547,6 @@ class MutableNode: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -2768,7 +2555,6 @@ class MutableNode: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def update_constant_properties(self, properties): """ Update constant properties of a node in the graph overwriting existing values. @@ -2783,7 +2569,6 @@ class MutableNode: Returns: Result: A result object indicating success or failure. On failure, it contains a GraphError. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -2795,7 +2580,6 @@ class MutableNode: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -2807,17 +2591,15 @@ class MutableNode: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Node: """A node (or node) in the graph.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Node including all events after `start` (exclusive). @@ -2828,7 +2610,6 @@ class Node: Returns: A Node object. """ - def at(self, time): """ Create a view of the Node including all events at `time`. @@ -2839,7 +2620,6 @@ class Node: Returns: A Node object. """ - def before(self, end): """ Create a view of the Node including all events before `end` (exclusive). @@ -2850,14 +2630,12 @@ class Node: Returns: A Node object. """ - def default_layer(self): """ Return a view of Node containing only the default edge layer Returns: Node: The layered view """ - def degree(self): """ Get the degree of this node (i.e., the number of edges that are incident to it). @@ -2865,7 +2643,6 @@ class Node: Returns The degree of this node. """ - @property def earliest_date_time(self): """ @@ -2874,7 +2651,6 @@ class Node: Returns: The earliest datetime that the node exists as an integer. """ - @property def earliest_time(self): """ @@ -2883,7 +2659,6 @@ class Node: Returns: The earliest time that the node exists as an integer. """ - @property def edges(self): """ @@ -2893,7 +2668,6 @@ class Node: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -2902,7 +2676,6 @@ class Node: Returns: The latest time that this Node is valid or None if the Node is valid for all times. """ - @property def end_date_time(self): """ @@ -2911,7 +2684,6 @@ class Node: Returns: The latest datetime that this Node is valid or None if the Node is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2923,7 +2695,6 @@ class Node: Returns: Node: The layered view """ - def exclude_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2935,7 +2706,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Node containing all layers except the excluded `name` @@ -2945,7 +2715,6 @@ class Node: Returns: Node: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Node containing all layers except the excluded `names` @@ -2955,7 +2724,6 @@ class Node: Returns: Node: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2968,10 +2736,8 @@ class Node: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Node has the layer `"name"`""" - + """Check if Node has the layer `"name"`""" def history(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2979,7 +2745,6 @@ class Node: Returns: A list of unix timestamps of the event history of node. """ - def history_date_time(self): """ Returns the history of a node, including node additions and changes made to node. @@ -2988,7 +2753,6 @@ class Node: A list of timestamps of the event history of node. """ - @property def id(self): """ @@ -2998,7 +2762,6 @@ class Node: Returns: The id of the node as an integer. """ - def in_degree(self): """ Get the in-degree of this node (i.e., the number of edges that are incident to it from other nodes). @@ -3006,7 +2769,6 @@ class Node: Returns: The in-degree of this node. """ - @property def in_edges(self): """ @@ -3016,7 +2778,6 @@ class Node: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -3026,7 +2787,6 @@ class Node: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -3038,7 +2798,6 @@ class Node: Returns: The latest datetime that the node exists as an integer. """ - @property def latest_time(self): """ @@ -3047,7 +2806,6 @@ class Node: Returns: The latest time that the node exists as an integer. """ - def layer(self, name): """ Return a view of Node containing the layer `"name"` @@ -3056,7 +2814,6 @@ class Node: Returns: Node: The layered view """ - def layers(self, names): """ Return a view of Node containing all layers `names` @@ -3068,7 +2825,6 @@ class Node: Returns: Node: The layered view """ - @property def name(self): """ @@ -3077,7 +2833,6 @@ class Node: Returns: The name of the node as a string. """ - @property def neighbours(self): """ @@ -3087,11 +2842,9 @@ class Node: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Get the out-degree of this node (i.e., the number of edges that are incident to it from this node). @@ -3099,7 +2852,6 @@ class Node: Returns: The out-degree of this node. """ - @property def out_edges(self): """ @@ -3109,7 +2861,6 @@ class Node: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3119,7 +2870,6 @@ class Node: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3128,7 +2878,6 @@ class Node: Returns: A list of properties. """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3142,7 +2891,6 @@ class Node: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3152,7 +2900,6 @@ class Node: Returns: A Node object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3163,7 +2910,6 @@ class Node: Returns: A Node object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3171,7 +2917,6 @@ class Node: Arguments: """ - @property def start(self): """ @@ -3180,7 +2925,6 @@ class Node: Returns: The earliest time that this Node is valid or None if the Node is valid for all times. """ - @property def start_date_time(self): """ @@ -3189,7 +2933,6 @@ class Node: Returns: The earliest datetime that this Node is valid or None if the Node is valid for all times. """ - def valid_layers(self, names): """ Return a view of Node containing all layers `names` @@ -3201,7 +2944,6 @@ class Node: Returns: Node: The layered view """ - def window(self, start, end): """ Create a view of the Node including all events between `start` (inclusive) and `end` (exclusive) @@ -3213,17 +2955,15 @@ class Node: Returns: r A Node object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Node""" + """Get the window size (difference between start and end) for this Node""" class Nodes: """A list of nodes that can be iterated over.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def after(self, start): """ Create a view of the Nodes including all events after `start` (exclusive). @@ -3234,7 +2974,6 @@ class Nodes: Returns: A Nodes object. """ - def at(self, time): """ Create a view of the Nodes including all events at `time`. @@ -3245,7 +2984,6 @@ class Nodes: Returns: A Nodes object. """ - def before(self, end): """ Create a view of the Nodes including all events before `end` (exclusive). @@ -3256,7 +2994,6 @@ class Nodes: Returns: A Nodes object. """ - def collect(self): """ Collect all nodes into a list @@ -3264,14 +3001,12 @@ class Nodes: Returns: list[Node]: the list of nodes """ - def default_layer(self): """ Return a view of Nodes containing only the default edge layer Returns: Nodes: The layered view """ - def degree(self): """ Returns the number of edges of the nodes @@ -3279,7 +3014,6 @@ class Nodes: Returns: An iterator of the number of edges of the nodes """ - @property def earliest_date_time(self): """ @@ -3288,11 +3022,9 @@ class Nodes: Returns: Earliest time of the nodes. """ - @property def earliest_time(self): """Returns an iterator over the nodes earliest time""" - @property def edges(self): """ @@ -3302,7 +3034,6 @@ class Nodes: An iterator over the edges that are incident to this node. """ - @property def end(self): """ @@ -3311,7 +3042,6 @@ class Nodes: Returns: The latest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def end_date_time(self): """ @@ -3320,7 +3050,6 @@ class Nodes: Returns: The latest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def exclude_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3332,7 +3061,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3344,7 +3072,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layer(self, name): """ Return a view of Nodes containing all layers except the excluded `name` @@ -3354,7 +3081,6 @@ class Nodes: Returns: Nodes: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of Nodes containing all layers except the excluded `names` @@ -3364,7 +3090,6 @@ class Nodes: Returns: Nodes: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3377,10 +3102,8 @@ class Nodes: Returns: A `WindowSet` object. """ - def has_layer(self, name): - """ Check if Nodes has the layer `"name"`""" - + """Check if Nodes has the layer `"name"`""" def history(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3389,7 +3112,6 @@ class Nodes: A list of unix timestamps. """ - def history_date_time(self): """ Returns all timestamps of nodes, when an node is added or change to an node is made. @@ -3398,11 +3120,9 @@ class Nodes: An list of timestamps. """ - @property def id(self): """Returns an iterator over the nodes ids""" - def in_degree(self): """ Returns the number of in edges of the nodes @@ -3410,7 +3130,6 @@ class Nodes: Returns: An iterator of the number of in edges of the nodes """ - @property def in_edges(self): """ @@ -3420,7 +3139,6 @@ class Nodes: An iterator over the edges that point into this node. """ - @property def in_neighbours(self): """ @@ -3430,7 +3148,6 @@ class Nodes: An iterator over the neighbours of this node that point into this node. """ - @property def latest_date_time(self): """ @@ -3439,11 +3156,9 @@ class Nodes: Returns: Latest date time of the nodes. """ - @property def latest_time(self): """Returns an iterator over the nodes latest time""" - def layer(self, name): """ Return a view of Nodes containing the layer `"name"` @@ -3452,7 +3167,6 @@ class Nodes: Returns: Nodes: The layered view """ - def layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3464,11 +3178,9 @@ class Nodes: Returns: Nodes: The layered view """ - @property def name(self): """Returns an iterator over the nodes name""" - @property def neighbours(self): """ @@ -3478,11 +3190,9 @@ class Nodes: An iterator over the neighbours of this node. """ - @property def node_type(self): """Returns the type of node""" - def out_degree(self): """ Returns the number of out edges of the nodes @@ -3490,7 +3200,6 @@ class Nodes: Returns: An iterator of the number of out edges of the nodes """ - @property def out_edges(self): """ @@ -3500,7 +3209,6 @@ class Nodes: An iterator over the edges that point out of this node. """ - @property def out_neighbours(self): """ @@ -3510,7 +3218,6 @@ class Nodes: An iterator over the neighbours of this node that point out of this node. """ - @property def properties(self): """ @@ -3519,7 +3226,6 @@ class Nodes: Returns: A List of properties """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -3533,7 +3239,6 @@ class Nodes: Returns: A `WindowSet` object. """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -3543,7 +3248,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -3554,7 +3258,6 @@ class Nodes: Returns: A Nodes object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -3562,7 +3265,6 @@ class Nodes: Arguments: """ - @property def start(self): """ @@ -3571,7 +3273,6 @@ class Nodes: Returns: The earliest time that this Nodes is valid or None if the Nodes is valid for all times. """ - @property def start_date_time(self): """ @@ -3580,7 +3281,6 @@ class Nodes: Returns: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df(self, include_property_history=False, convert_datetime=False): """ Converts the graph's nodes into a Pandas DataFrame. @@ -3597,10 +3297,7 @@ class Nodes: Returns: If successful, this PyObject will be a Pandas DataFrame. """ - - def type_filter(self, node_types): - ... - + def type_filter(self, node_types): ... def valid_layers(self, names): """ Return a view of Nodes containing all layers `names` @@ -3612,7 +3309,6 @@ class Nodes: Returns: Nodes: The layered view """ - def window(self, start, end): """ Create a view of the Nodes including all events between `start` (inclusive) and `end` (exclusive) @@ -3624,17 +3320,15 @@ class Nodes: Returns: r A Nodes object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this Nodes""" + """Get the window size (difference between start and end) for this Nodes""" class PersistentGraph: """A temporal graph that allows edges and nodes to be deleted.""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def add_constant_properties(self, properties): """ Adds static properties to the graph. @@ -3645,7 +3339,6 @@ class PersistentGraph: Returns: None """ - def add_edge(self, timestamp, src, dst, properties=None, layer=None): """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -3660,7 +3353,6 @@ class PersistentGraph: Returns: None """ - def add_node(self, timestamp, id, properties=None, node_type=None): """ Adds a new node with the given id and properties to the graph. @@ -3674,7 +3366,6 @@ class PersistentGraph: Returns: None """ - def add_property(self, timestamp, properties): """ Adds properties to the graph. @@ -3686,7 +3377,6 @@ class PersistentGraph: Returns: None """ - def after(self, start): """ Create a view of the GraphView including all events after `start` (exclusive). @@ -3697,7 +3387,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def at(self, time): """ Create a view of the GraphView including all events at `time`. @@ -3708,7 +3397,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def before(self, end): """ Create a view of the GraphView including all events before `end` (exclusive). @@ -3719,18 +3407,8 @@ class PersistentGraph: Returns: A GraphView object. """ - - def cache(self, path): - """ - Write PersistentGraph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - """ - + def bincode(self): + """Get bincode encoded graph""" def count_edges(self): """ Number of edges in the graph @@ -3738,7 +3416,6 @@ class PersistentGraph: Returns: the number of edges in the graph """ - def count_nodes(self): """ Number of nodes in the graph @@ -3746,7 +3423,6 @@ class PersistentGraph: Returns: the number of nodes in the graph """ - def count_temporal_edges(self): """ Number of edges in the graph @@ -3754,14 +3430,12 @@ class PersistentGraph: Returns: the number of temporal edges in the graph """ - def default_layer(self): """ Return a view of GraphView containing only the default edge layer Returns: GraphView: The layered view """ - def delete_edge(self, timestamp, src, dst, layer=None): """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -3775,19 +3449,6 @@ class PersistentGraph: Returns: None or a GraphError if the edge could not be deleted """ - - @staticmethod - def deserialise(bytes): - """ - Load PersistentGraph from serialised bytes. - - Arguments: - bytes (Bytes): The serialised bytes to decode - - Returns: - PersistentGraph - """ - @property def earliest_date_time(self): """ @@ -3796,7 +3457,6 @@ class PersistentGraph: Returns: the datetime of the earliest activity in the graph """ - @property def earliest_time(self): """ @@ -3805,7 +3465,6 @@ class PersistentGraph: Returns: the timestamp of the earliest activity in the graph """ - def edge(self, src, dst): """ Gets the edge with the specified source and destination nodes @@ -3817,7 +3476,6 @@ class PersistentGraph: Returns: the edge with the specified source and destination nodes, or None if the edge does not exist """ - @property def edges(self): """ @@ -3826,7 +3484,6 @@ class PersistentGraph: Returns: the edges in the graph """ - @property def end(self): """ @@ -3835,7 +3492,6 @@ class PersistentGraph: Returns: The latest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def end_date_time(self): """ @@ -3844,10 +3500,8 @@ class PersistentGraph: Returns: The latest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def event_graph(self): """Get event graph""" - def exclude_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3859,7 +3513,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3871,7 +3524,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_nodes(self, nodes): """ Returns a subgraph given a set of nodes that are excluded from the subgraph @@ -3882,7 +3534,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def exclude_valid_layer(self, name): """ Return a view of GraphView containing all layers except the excluded `name` @@ -3892,7 +3543,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def exclude_valid_layers(self, names): """ Return a view of GraphView containing all layers except the excluded `names` @@ -3902,7 +3552,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def expanding(self, step): """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3915,7 +3564,6 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def find_edges(self, properties_dict): """ Get the edges that match the properties name and value @@ -3924,7 +3572,6 @@ class PersistentGraph: Returns: the edges that match the properties name and value """ - def find_nodes(self, properties_dict): """ Get the nodes that match the properties name and value @@ -3933,7 +3580,9 @@ class PersistentGraph: Returns: the nodes that match the properties name and value """ - + @staticmethod + def from_bincode(bytes): + """Creates a graph from a bincode encoded graph""" def get_all_node_types(self): """ Returns all the node types in the graph. @@ -3941,7 +3590,6 @@ class PersistentGraph: Returns: A list of node types """ - def has_edge(self, src, dst): """ Returns true if the graph contains the specified edge @@ -3953,10 +3601,8 @@ class PersistentGraph: Returns: true if the graph contains the specified edge, false otherwise """ - def has_layer(self, name): - """ Check if GraphView has the layer `"name"`""" - + """Check if GraphView has the layer `"name"`""" def has_node(self, id): """ Returns true if the graph contains the specified node @@ -3967,7 +3613,6 @@ class PersistentGraph: Returns: true if the graph contains the specified node, false otherwise """ - def import_edge(self, edge, force=False): """ Import a single edge into the graph. @@ -3983,7 +3628,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the edge was successfully imported, and Err otherwise. """ - def import_edges(self, edges, force=False): """ Import multiple edges into the graph. @@ -3997,7 +3641,6 @@ class PersistentGraph: force (boolean) - An optional boolean flag indicating whether to force the import of the edges. """ - def import_node(self, node, force=False): """ Import a single node into the graph. @@ -4012,7 +3655,6 @@ class PersistentGraph: Returns: Result, GraphError> - A Result object which is Ok if the node was successfully imported, and Err otherwise. """ - def import_nodes(self, nodes, force=False): """ Import multiple nodes into the graph. @@ -4026,7 +3668,6 @@ class PersistentGraph: force (boolean) - An optional boolean flag indicating whether to force the import of the nodes. """ - def index(self): """ Indexes all node and edge properties. @@ -4036,7 +3677,6 @@ class PersistentGraph: Returns: GraphIndex - Returns a GraphIndex """ - @property def latest_date_time(self): """ @@ -4045,7 +3685,6 @@ class PersistentGraph: Returns: the datetime of the latest activity in the graph """ - @property def latest_time(self): """ @@ -4054,7 +3693,6 @@ class PersistentGraph: Returns: the timestamp of the latest activity in the graph """ - def layer(self, name): """ Return a view of GraphView containing the layer `"name"` @@ -4063,7 +3701,6 @@ class PersistentGraph: Returns: GraphView: The layered view """ - def layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4075,23 +3712,54 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - @staticmethod - def load_cached(path): + def load_edge_deletions_from_pandas( + self, df, time, src, dst, layer=None, layer_col=None + ): """ - Load PersistentGraph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. + Load edges deletions from a Pandas DataFrame into the graph. Arguments: - path (str): The path to the cache file - + df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - PersistentGraph + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_deletions_from_parquet( + self, parquet_path, time, src, dst, layer=None, layer_col=None + ): """ + Load edges deletions from a Parquet file into the graph. - def load_edge_props_from_pandas(self, df, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Arguments: + parquet_path (str): Parquet file or directory of Parquet files path containing node information. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + time (str): The column name for the update timestamps. + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_pandas( + self, + df, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from a Pandas DataFrame. @@ -4099,16 +3767,27 @@ class PersistentGraph: df (Dataframe): The Pandas DataFrame containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): The edge layer name (optional) Defaults to None. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edge_props_from_parquet(self, parquet_path, src, dst, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_parquet( + self, + parquet_path, + src, + dst, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edge properties from parquet file @@ -4116,215 +3795,200 @@ class PersistentGraph: parquet_path (str): Parquet file or directory of Parquet files path containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): Layer name. Defaults to None. (optional) - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_deletions_from_pandas(self, df, src, dst, time, layer=None, layer_in_df=True): - """ - Load edges deletions from a Pandas DataFrame into the graph. - - Arguments: - df (Dataframe): The Pandas DataFrame containing the edges. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_edges_deletions_from_parquet(self, parquet_path, src, dst, time, layer=None, layer_in_df=True): + Raises: + GraphError: If the operation fails. """ - Load edges deletions from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing node information. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_pandas(self, df, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + def load_edges_from_pandas( + self, + df, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Pandas DataFrame into the graph. Arguments: df (Dataframe): The Pandas DataFrame containing the edges. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_edges_from_parquet(self, parquet_path, src, dst, time, properties=None, const_properties=None, shared_const_properties=None, layer=None, layer_in_df=True): + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_parquet( + self, + parquet_path, + time, + src, + dst, + properties=None, + constant_properties=None, + shared_constant_properties=None, + layer=None, + layer_col=None, + ): """ Load edges from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing edges + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - properties (List): List of edge property column names. Defaults to None. (optional) - const_properties (List): List of constant edge property column names. Defaults to None. (optional) - shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - + properties (List[str]): List of edge property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ @staticmethod - def load_from_file(path): + def load_from_file(path, force=False): """ - Load PersistentGraph from a file. + Loads a graph from the given path. Arguments: - path (str): The path to the file. + path (str): The path to the graph. Returns: - PersistentGraph - """ - - @staticmethod - def load_from_pandas(edge_df, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_df=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from a Pandas DataFrame. - - Args: - edge_df (pandas.DataFrame): The DataFrame containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. + Graph: The loaded graph. """ - - @staticmethod - def load_from_parquet(edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties=None, edge_const_properties=None, edge_shared_const_properties=None, edge_layer=None, layer_in_df=True, node_parquet_path=None, node_id=None, node_time=None, node_properties=None, node_const_properties=None, node_shared_const_properties=None, node_type=None, node_type_in_df=True): - """ - Load a graph from Parquet file. - - Args: - edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - edge_src (str): The column name for the source node ids. - edge_dst (str): The column name for the destination node ids. - edge_time (str): The column name for the timestamps. - edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - edge_layer (str): The edge layer name (optional) Defaults to None. - layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - node_id (str): The column name for the node ids (optional) Defaults to None. - node_time (str): The column name for the node timestamps (optional) Defaults to None. - node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - - Returns: - Graph: The loaded Graph object. - """ - - def load_node_props_from_pandas(self, df, id, const_properties=None, shared_const_properties=None): + def load_node_props_from_pandas( + self, + df, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a Pandas DataFrame. Arguments: df (Dataframe): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_node_props_from_parquet(self, parquet_path, id, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_parquet( + self, + parquet_path, + id, + node_type=None, + node_type_col=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load node properties from a parquet file. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. - def load_nodes_from_pandas(self, df, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas( + self, + df, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Pandas DataFrame into the graph. Arguments: df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) - Returns: - Result<(), GraphError>: Result of the operation. - """ - - def load_nodes_from_parquet(self, parquet_path, id, time, node_type=None, node_type_in_df=True, properties=None, const_properties=None, shared_const_properties=None): + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + Returns: + None: If the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_parquet( + self, + parquet_path, + time, + id, + node_type=None, + node_type_col=None, + properties=None, + constant_properties=None, + shared_constant_properties=None, + ): """ Load nodes from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files containing the nodes - id (str): The column name for the node IDs. time (str): The column name for the timestamps. - node_type (str): the column name for the node type - node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - properties (List): List of node property column names. Defaults to None. (optional) - const_properties (List): List of constant node property column names. Defaults to None. (optional) - shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + id (str): The column name for the node IDs. + node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + properties (List[str]): List of node property column names. Defaults to None. (optional) + constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) Returns: - Result<(), GraphError>: Result of the operation. - """ + None: If the operation is successful. + Raises: + GraphError: If the operation fails. + """ def materialize(self): """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph @@ -4332,7 +3996,6 @@ class PersistentGraph: Returns: GraphView - Returns a graph clone """ - def node(self, id): """ Gets the node with the specified id @@ -4343,7 +4006,6 @@ class PersistentGraph: Returns: the node with the specified id, or None if the node does not exist """ - @property def nodes(self): """ @@ -4352,7 +4014,6 @@ class PersistentGraph: Returns: the nodes in the graph """ - @property def properties(self): """ @@ -4362,7 +4023,6 @@ class PersistentGraph: Returns: HashMap - Properties paired with their names """ - def rolling(self, window, step=None): """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. @@ -4376,23 +4036,16 @@ class PersistentGraph: Returns: A `WindowSet` object. """ - def save_to_file(self, path): """ - Saves the PersistentGraph to the given path. + Saves the graph to the given path. Arguments: - path (str): The path to the file. - """ - - def serialise(self): - """ - Serialise PersistentGraph to bytes. + path (str): The path to the graph. Returns: - Bytes + None """ - def shrink_end(self, end): """ Set the end of the window to the smaller of `end` and `self.end()` @@ -4402,7 +4055,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_start(self, start): """ Set the start of the window to the larger of `start` and `self.start()` @@ -4413,7 +4065,6 @@ class PersistentGraph: Returns: A GraphView object. """ - def shrink_window(self, start, end): """ Shrink both the start and end of the window (same as calling `shrink_start` followed by `shrink_end` but more efficient) @@ -4421,7 +4072,6 @@ class PersistentGraph: Arguments: """ - @property def start(self): """ @@ -4430,7 +4080,6 @@ class PersistentGraph: Returns: The earliest time that this GraphView is valid or None if the GraphView is valid for all times. """ - @property def start_date_time(self): """ @@ -4439,7 +4088,6 @@ class PersistentGraph: Returns: The earliest datetime that this GraphView is valid or None if the GraphView is valid for all times. """ - def subgraph(self, nodes): """ Returns a subgraph given a set of nodes @@ -4450,7 +4098,6 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - def subgraph_node_types(self, node_types): """ Returns a subgraph filtered by node types given a set of node types @@ -4461,8 +4108,14 @@ class PersistentGraph: Returns: GraphView - Returns the subgraph """ - - def to_networkx(self, explode_edges=False, include_node_properties=True, include_edge_properties=True, include_update_history=True, include_property_history=True): + def to_networkx( + self, + explode_edges=False, + include_node_properties=True, + include_edge_properties=True, + include_update_history=True, + include_property_history=True, + ): """ Returns a graph with NetworkX. @@ -4480,8 +4133,18 @@ class PersistentGraph: Returns: A Networkx MultiDiGraph. """ - - def to_pyvis(self, explode_edges=False, edge_color="#000000", shape=None, node_image=None, edge_weight=None, edge_label=None, colour_nodes_by_type=False, notebook=False, **kwargs): + def to_pyvis( + self, + explode_edges=False, + edge_color="#000000", + shape=None, + node_image=None, + edge_weight=None, + edge_label=None, + colour_nodes_by_type=False, + notebook=False, + **kwargs, + ): """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -4505,11 +4168,9 @@ class PersistentGraph: Returns: A pyvis network """ - @property def unique_layers(self): """Return all the layer ids in the graph""" - def update_constant_properties(self, properties): """ Updates static properties to the graph. @@ -4520,7 +4181,6 @@ class PersistentGraph: Returns: None """ - def valid_layers(self, names): """ Return a view of GraphView containing all layers `names` @@ -4532,8 +4192,16 @@ class PersistentGraph: Returns: GraphView: The layered view """ - - def vectorise(self, embedding, cache=None, overwrite_cache=False, graph_document=None, node_document=None, edge_document=None, verbose=False): + def vectorise( + self, + embedding, + cache=None, + overwrite_cache=False, + graph_document=None, + node_document=None, + edge_document=None, + verbose=False, + ): """ Create a VectorisedGraph from the current graph @@ -4548,7 +4216,6 @@ class PersistentGraph: Returns: A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection """ - def window(self, start, end): """ Create a view of the GraphView including all events between `start` (inclusive) and `end` (exclusive) @@ -4560,27 +4227,20 @@ class PersistentGraph: Returns: r A GraphView object. """ - @property def window_size(self): - """ Get the window size (difference between start and end) for this GraphView""" - - def write_updates(self): - """Persist the new updates by appending them to the cache file.""" + """Get the window size (difference between start and end) for this GraphView""" class Properties: """A view of the properties of an entity""" def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def as_dict(self): """Convert properties view to a dict""" - @property def constant(self): """Get a view of the constant properties (meta-data) only.""" - def get(self, key): """ Get property value. @@ -4588,17 +4248,13 @@ class Properties: First searches temporal properties and returns latest value if it exists. If not, it falls back to static properties. """ - def items(self): """Get a list of key-value pairs""" - def keys(self): """Get the names for all properties (includes temporal and static properties)""" - @property def temporal(self): """Get a view of the temporal properties only.""" - def values(self): """ Get the values of the properties @@ -4612,12 +4268,9 @@ class PyDirection: def __init__(self, direction): """Initialize self. See help(type(self)) for accurate signature.""" - - def as_str(self): - ... + def as_str(self): ... class PyGraphEncoder: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" @@ -4626,10 +4279,8 @@ class TemporalProp: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def at(self, t): """Get the value of the property at time `t`""" - def average(self): """ Compute the average of all property values. Alias for mean(). @@ -4637,7 +4288,6 @@ class TemporalProp: Returns: Prop: The average of each property values, or None if count is zero. """ - def count(self): """ Count the number of properties. @@ -4645,19 +4295,14 @@ class TemporalProp: Returns: int: The number of properties. """ - def history(self): """Get the timestamps at which the property was updated""" - def history_date_time(self): """Get the timestamps at which the property was updated""" - def items(self): """List update timestamps and corresponding property values""" - def items_date_time(self): """List update timestamps and corresponding property values""" - def max(self): """ Find the maximum property value and its associated time. @@ -4665,7 +4310,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the maximum property value. """ - def mean(self): """ Compute the mean of all property values. Alias for mean(). @@ -4673,7 +4317,6 @@ class TemporalProp: Returns: Prop: The mean of each property values, or None if count is zero. """ - def median(self): """ Compute the median of all property values. @@ -4681,7 +4324,6 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the median property value, or None if empty """ - def min(self): """ Find the minimum property value and its associated time. @@ -4689,10 +4331,7 @@ class TemporalProp: Returns: (i64, Prop): A tuple containing the time and the minimum property value. """ - - def ordered_dedupe(self, latest_time): - ... - + def ordered_dedupe(self, latest_time): ... def sum(self): """ Compute the sum of all property values. @@ -4700,13 +4339,9 @@ class TemporalProp: Returns: Prop: The sum of all property values. """ - - def unique(self): - ... - + def unique(self): ... def value(self): """Get the latest value of the property""" - def values(self): """Get the property values for each update""" @@ -4715,7 +4350,6 @@ class TemporalProperties: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, key): """ get(key: str) -> Optional[TemporalProp] @@ -4725,7 +4359,6 @@ class TemporalProperties: Returns: the property view if it exists, otherwise `None` """ - def histories(self): """ Get the histories of all properties @@ -4733,7 +4366,6 @@ class TemporalProperties: Returns: dict[str, list[(int, Any)]]: the mapping of property keys to histories """ - def histories_date_time(self): """ Get the histories of all properties @@ -4741,13 +4373,10 @@ class TemporalProperties: Returns: dict[str, list[(datetime, Any)]]: the mapping of property keys to histories """ - def items(self): """List the property keys together with the corresponding values""" - def keys(self): """List the available property keys""" - def latest(self): """ Get the latest value of all properties @@ -4755,7 +4384,6 @@ class TemporalProperties: Returns: dict[str, Any]: the mapping of property keys to latest values """ - def values(self): """ List the values of the properties diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index 0466b74dde..2f58811862 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -68,9 +68,12 @@ def betweenness_centrality(g, k=None, normalized=True): AlgorithmResult[float]: Returns an `AlgorithmResult` containing the betweenness centrality of each node. """ -def cohesive_fruchterman_reingold(graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1): +def cohesive_fruchterman_reingold( + graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1 +): """Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes""" +def connected_components(g): ... def degree_centrality(g, threads=None): """ Computes the degree centrality of all nodes in the graph. The values are normalized @@ -85,7 +88,9 @@ def degree_centrality(g, threads=None): AlgorithmResult>: A result containing a mapping of node names to the computed sum of their associated degree centrality. """ -def dijkstra_single_source_shortest_paths(g, source, targets, direction=..., weight=...): +def dijkstra_single_source_shortest_paths( + g, source, targets, direction=..., weight=... +): """ Finds the shortest paths from a single source to multiple targets in a graph. @@ -115,7 +120,9 @@ def directed_graph_density(g): float : Directed graph density of G. """ -def fruchterman_reingold(graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1): +def fruchterman_reingold( + graph, iterations=100, scale=1.0, node_start_size=1.0, cooloff_factor=0.95, dt=0.1 +): """ Fruchterman Reingold layout algorithm @@ -446,7 +453,15 @@ def strongly_connected_components(g): Vec> : List of strongly connected nodes identified by ids """ -def temporal_SEIR(graph, seeds, infection_prob, initial_infection, recovery_rate=None, incubation_rate=None, rng_seed=None): +def temporal_SEIR( + graph, + seeds, + infection_prob, + initial_infection, + recovery_rate=None, + incubation_rate=None, + rng_seed=None, +): """ Simulate an SEIR dynamic on the network @@ -470,7 +485,7 @@ def temporal_SEIR(graph, seeds, infection_prob, initial_infection, recovery_rate Returns: AlgorithmResult[Infected]: Returns an `Infected` object for each infected node with attributes - + `infected`: the time stamp of the infection event `active`: the time stamp at which the node actively starts spreading the infection (i.e., the end of the incubation period) diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index 63b6858373..9232d66a23 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -54,9 +54,7 @@ def lotr_graph(): A Graph containing the LOTR dataset """ -def neo4j_movie_graph(uri, username, password, database=...): - ... - +def neo4j_movie_graph(uri, username, password, database=...): ... def reddit_hyperlink_graph(timeout_seconds=600): """ Load (a subset of) Reddit hyperlinks dataset into a graph. @@ -96,8 +94,5 @@ def reddit_hyperlink_graph(timeout_seconds=600): A Graph containing the Reddit hyperlinks dataset """ -def reddit_hyperlink_graph_local(file_path): - ... - -def stable_coin_graph(path=None, subset=None): - ... +def reddit_hyperlink_graph_local(file_path): ... +def stable_coin_graph(path=None, subset=None): ... diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index de8f0d0e21..b082a9c50c 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -15,10 +15,8 @@ class GraphqlGraphs: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def get(self, name): """Return the `VectorisedGraph` with name `name` or `None` if it doesn't exist""" - def search_graph_documents(self, query, limit, window): """ Return the top documents with the smallest cosine distance to `query` @@ -31,7 +29,6 @@ class GraphqlGraphs: # Returns A list of documents """ - def search_graph_documents_with_scores(self, query, limit, window): """Same as `search_graph_documents` but it also returns the scores alongside the documents""" @@ -40,7 +37,6 @@ class RaphtoryClient: def __init__(self, url): """Initialize self. See help(type(self)) for accurate signature.""" - def copy_graph(self, path, new_path): """ Copy graph from a path `path` on the server to a `new_path` on the server @@ -52,7 +48,6 @@ class RaphtoryClient: Returns: Copy status as boolean """ - def delete_graph(self, path): """ Delete graph from a path `path` on the server @@ -63,7 +58,6 @@ class RaphtoryClient: Returns: Delete status as boolean """ - def is_server_online(self): """ Check if the server is online. @@ -71,7 +65,6 @@ class RaphtoryClient: Returns: Returns true if server is online otherwise false. """ - def move_graph(self, path, new_path): """ Move graph from a path `path` on the server to a `new_path` on the server @@ -83,7 +76,6 @@ class RaphtoryClient: Returns: Move status as boolean """ - def query(self, query, variables=None): """ Make a graphQL query against the server. @@ -95,7 +87,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response. """ - def receive_graph(self, path): """ Receive graph from a path `path` on the server @@ -106,7 +97,6 @@ class RaphtoryClient: Returns: Graph as string """ - def send_graph(self, path, graph, overwrite=False): """ Send a graph to the server @@ -119,7 +109,6 @@ class RaphtoryClient: Returns: The `data` field from the graphQL response after executing the mutation. """ - def upload_graph(self, path, file_path, overwrite=False): """ Upload graph file from a path `file_path` on the client @@ -136,9 +125,15 @@ class RaphtoryClient: class RaphtoryServer: """A class for defining and running a Raphtory GraphQL server""" - def __init__(self, work_dir, cache_capacity=None, cache_tti_seconds=None, log_level=None, config_path=None): + def __init__( + self, + work_dir, + cache_capacity=None, + cache_tti_seconds=None, + log_level=None, + config_path=None, + ): """Initialize self. See help(type(self)) for accurate signature.""" - def run(self, port=1736, timeout_ms=...): """ Run the server until completion. @@ -146,7 +141,6 @@ class RaphtoryServer: Arguments: * `port`: the port to use (defaults to 1736). """ - def start(self, port=1736, timeout_ms=None): """ Start the server and return a handle to it. @@ -155,7 +149,6 @@ class RaphtoryServer: * `port`: the port to use (defaults to 1736). * `timeout_ms`: wait for server to be online (defaults to 5000). The server is stopped if not online within timeout_ms but manages to come online as soon as timeout_ms finishes! """ - def with_document_search_function(self, name, input, function): """ Register a function in the GraphQL schema for document search over a graph. @@ -173,7 +166,6 @@ class RaphtoryServer: Returns: A new server object containing the vectorised graphs. """ - def with_global_search_function(self, name, input, function): """ Register a function in the GraphQL schema for document search among all the graphs. @@ -191,8 +183,15 @@ class RaphtoryServer: Returns: A new server object containing the vectorised graphs. """ - - def with_vectorised(self, cache, graph_names=None, embedding=None, graph_document=None, node_document=None, edge_document=None): + def with_vectorised( + self, + cache, + graph_names=None, + embedding=None, + graph_document=None, + node_document=None, + edge_document=None, + ): """ Vectorise a subset of the graphs of the server. @@ -217,9 +216,6 @@ class RunningRaphtoryServer: def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - - def get_client(self): - ... - + def get_client(self): ... def stop(self): """Stop the server and wait for it to finish""" diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index 6f42e39246..9e09acbe87 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -8,27 +8,18 @@ ############################################################################### class Document: - def __init__(self, content, life=None): """Initialize self. See help(type(self)) for accurate signature.""" - @property - def content(self): - ... - + def content(self): ... @property - def entity(self): - ... - + def entity(self): ... @property - def life(self): - ... + def life(self): ... class VectorisedGraph: - def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" - def append(self, nodes, edges): """ Add all the documents from `nodes` and `edges` to the current selection @@ -42,7 +33,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_by_similarity(self, query, limit, window=None): """ Add the top `limit` documents to the current selection using `query` @@ -55,7 +45,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_edges(self, edges): """ Add all the documents from `edges` to the current selection @@ -68,7 +57,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_edges_by_similarity(self, query, limit, window=None): """ Add the top `limit` edge documents to the current selection using `query` @@ -81,7 +69,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_nodes(self, nodes): """ Add all the documents from `nodes` to the current selection @@ -94,7 +81,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def append_nodes_by_similarity(self, query, limit, window=None): """ Add the top `limit` node documents to the current selection using `query` @@ -107,10 +93,8 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def edges(self): """Return the edges present in the current selection""" - def expand(self, hops, window=None): """ Add all the documents `hops` hops away to the selection @@ -127,7 +111,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent documents with higher score for `query` to the selection @@ -148,7 +131,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_edges_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent edge documents with higher score for `query` to the selection @@ -163,7 +145,6 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def expand_nodes_by_similarity(self, query, limit, window=None): """ Add the top `limit` adjacent node documents with higher score for `query` to the selection @@ -178,18 +159,13 @@ class VectorisedGraph: Returns: A new vectorised graph containing the updated selection """ - def get_documents(self): """Return the documents present in the current selection""" - def get_documents_with_scores(self): """Return the documents alongside their scores present in the current selection""" - def nodes(self): """Return the nodes present in the current selection""" - def save_embeddings(self, file): """Save the embeddings present in this graph to `file` so they can be further used in a call to `vectorise`""" -def generate_property_list(entity, filter_out=..., force_static=...): - ... +def generate_property_list(entity, filter_out=..., force_static=...): ... diff --git a/python/tests/test_algorithms.py b/python/tests/test_algorithms.py index 5f671e82c2..5fccc4851e 100644 --- a/python/tests/test_algorithms.py +++ b/python/tests/test_algorithms.py @@ -258,7 +258,11 @@ def test_temporal_reachability(): actual = algorithms.temporally_reachable_nodes(g, 20, 11, [1, 2], [4, 5]) expected = { "1": [(11, "start")], - "2": [(11, "1"), (11, "start"), (12, "1"), ], + "2": [ + (11, "1"), + (11, "start"), + (12, "1"), + ], "3": [], "4": [(12, "2")], "5": [(13, "2")], @@ -321,12 +325,12 @@ def test_single_source_shortest_path(): "4": ["1", "4"], } assert ( - res_two.get_all_with_names() - == {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]} - ) or ( - res_two.get_all_with_names() - == {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]} - ) + res_two.get_all_with_names() + == {"1": ["1"], "2": ["1", "2"], "3": ["1", "2", "3"], "4": ["1", "4"]} + ) or ( + res_two.get_all_with_names() + == {"1": ["1"], "3": ["1", "4", "3"], "2": ["1", "2"], "4": ["1", "4"]} + ) def test_dijsktra_shortest_paths(): diff --git a/python/tests/test_disk_graph.py b/python/tests/test_disk_graph.py index 4b004feb67..37b4e88269 100644 --- a/python/tests/test_disk_graph.py +++ b/python/tests/test_disk_graph.py @@ -38,9 +38,12 @@ # in every test use with to create a temporary directory that will be deleted automatically # after the with block ends + def test_counts(): graph_dir = tempfile.TemporaryDirectory() - graph = DiskGraphStorage.load_from_pandas(graph_dir.name, edges, "src", "dst", "time") + graph = DiskGraphStorage.load_from_pandas( + graph_dir.name, edges, "time", "src", "dst" + ) graph = graph.to_events() assert graph.count_nodes() == 5 assert graph.count_edges() == 20 @@ -169,7 +172,7 @@ def test_disk_graph_type_filter(): read_chunk_size, concurrent_files, num_threads, - "node_type" + "node_type", ).to_events() assert g.count_nodes() == 1619 @@ -187,14 +190,20 @@ def test_disk_graph_type_filter(): assert g.nodes.type_filter([]).name.collect() == [] - neighbor_names = g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect() + neighbor_names = ( + g.nodes.type_filter(["A"]).neighbours.type_filter(["B"]).name.collect() + ) total_length = sum(len(names) for names in neighbor_names) assert total_length == 1023 - assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == ["Comp844043"] + assert g.node("Comp175846").neighbours.type_filter(["A"]).name.collect() == [ + "Comp844043" + ] assert g.node("Comp175846").neighbours.type_filter(["B"]).name.collect() == [] assert g.node("Comp175846").neighbours.type_filter([]).name.collect() == [] - assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == ["Comp844043"] + assert g.node("Comp175846").neighbours.type_filter(["A", "B"]).name.collect() == [ + "Comp844043" + ] neighbor_names = g.node("Comp175846").neighbours.neighbours.name.collect() - assert len(neighbor_names) == 193 \ No newline at end of file + assert len(neighbor_names) == 193 diff --git a/python/tests/test_graph_conversions.py b/python/tests/test_graph_conversions.py index 5e24b394ed..c881c42dc7 100644 --- a/python/tests/test_graph_conversions.py +++ b/python/tests/test_graph_conversions.py @@ -21,23 +21,26 @@ def build_graph(): nodes_df["timestamp"] = pd.to_datetime(nodes_df["timestamp"]).astype( "datetime64[ms, UTC]" ) - - return Graph.load_from_pandas( - edge_df=edges_df, - edge_src="source", - edge_dst="destination", - edge_time="timestamp", - edge_properties=["data_size_MB"], - edge_layer="transaction_type", - edge_const_properties=["is_encrypted"], - edge_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, - node_df=nodes_df, - node_id="server_id", - node_time="timestamp", - node_properties=["OS_version", "primary_function", "uptime_days"], - node_const_properties=["server_name", "hardware_type"], - node_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + g = Graph() + g.load_edges_from_pandas( + edges_df, + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB"], + layer_col="transaction_type", + constant_properties=["is_encrypted"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + ) + g.load_nodes_from_pandas( + df=nodes_df, + id="server_id", + time="timestamp", + properties=["OS_version", "primary_function", "uptime_days"], + constant_properties=["server_name", "hardware_type"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) + return g def build_graph_without_datetime_type(): @@ -47,22 +50,26 @@ def build_graph_without_datetime_type(): nodes_df = pd.read_csv(base_dir / "data/network_traffic_nodes.csv") nodes_df["timestamp"] = pd.to_datetime(nodes_df["timestamp"]) - return Graph.load_from_pandas( - edge_df=edges_df, - edge_src="source", - edge_dst="destination", - edge_time="timestamp", - edge_properties=["data_size_MB"], - edge_layer="transaction_type", - edge_const_properties=["is_encrypted"], - edge_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, - node_df=nodes_df, - node_id="server_id", - node_time="timestamp", - node_properties=["OS_version", "primary_function", "uptime_days"], - node_const_properties=["server_name", "hardware_type"], - node_shared_const_properties={"datasource": "data/network_traffic_edges.csv"}, + g = Graph() + g.load_edges_from_pandas( + edges_df, + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB"], + layer_col="transaction_type", + constant_properties=["is_encrypted"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, + ) + g.load_nodes_from_pandas( + df=nodes_df, + id="server_id", + time="timestamp", + properties=["OS_version", "primary_function", "uptime_days"], + constant_properties=["server_name", "hardware_type"], + shared_constant_properties={"datasource": "data/network_traffic_edges.csv"}, ) + return g def test_graph_timestamp_list_properties(): @@ -125,35 +132,35 @@ def test_py_vis(): [ { "color": "#97c2fc", - "id": 'ServerA', + "id": "ServerA", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerA", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerB', + "id": "ServerB", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerB", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerC', + "id": "ServerC", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerC", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerD', + "id": "ServerD", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerD", "shape": "dot", }, { "color": "#97c2fc", - "id": 'ServerE', + "id": "ServerE", "image": "https://cdn-icons-png.flaticon.com/512/7584/7584620.png", "label": "ServerE", "shape": "dot", @@ -168,63 +175,63 @@ def test_py_vis(): "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerA', + "from": "ServerA", "title": "", - "to": 'ServerB', + "to": "ServerB", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerA', + "from": "ServerA", "title": "", - "to": 'ServerC', + "to": "ServerC", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerB', + "from": "ServerB", "title": "", - "to": 'ServerD', + "to": "ServerD", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerC', + "from": "ServerC", "title": "", - "to": 'ServerA', + "to": "ServerA", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerD', + "from": "ServerD", "title": "", - "to": 'ServerC', + "to": "ServerC", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerD', + "from": "ServerD", "title": "", - "to": 'ServerE', + "to": "ServerE", "value": 0.0, }, { "arrowStrikethrough": False, "arrows": "to", "color": "#000000", - "from": 'ServerE', + "from": "ServerE", "title": "", - "to": 'ServerB', + "to": "ServerB", "value": 0.0, }, ], diff --git a/python/tests/test_graphql.py b/python/tests/test_graphql.py index a1c7e9cea8..bac9f7fb72 100644 --- a/python/tests/test_graphql.py +++ b/python/tests/test_graphql.py @@ -13,7 +13,7 @@ def normalize_path(path): - return path.replace('\\', '/') + return path.replace("\\", "/") def test_failed_server_start_in_time(): @@ -51,7 +51,9 @@ def test_server_start_on_default_port(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -70,7 +72,9 @@ def test_server_start_on_custom_port(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -125,7 +129,14 @@ def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -182,7 +193,14 @@ def test_send_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -192,7 +210,9 @@ def assert_graph_fetch(path): query = f"""{{ graph(path: "{path}") {{ nodes {{ list {{ name }} }} }} }}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -238,17 +258,23 @@ def assert_graph_fetch(path): path = "../shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str(excinfo.value) + assert "References to the parent dir are not allowed within the path:" in str( + excinfo.value + ) path = "./shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the current dir are not allowed within the path" in str(excinfo.value) + assert "References to the current dir are not allowed within the path" in str( + excinfo.value + ) path = "shivam/../../../../investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str(excinfo.value) + assert "References to the parent dir are not allowed within the path:" in str( + excinfo.value + ) path = "//shivam/investigation/g" with pytest.raises(Exception) as excinfo: @@ -296,7 +322,9 @@ def test_upload_graph_succeeds_if_no_graph_found_with_same_name(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -346,7 +374,14 @@ def test_upload_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): query = """{graph(path: "g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -369,7 +404,9 @@ def test_upload_graph_succeeds_if_no_graph_found_with_same_name_at_namespace(): query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}]} + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } } } @@ -419,7 +456,14 @@ def test_upload_graph_succeeds_if_graph_already_exists_at_namespace_with_overwri query = """{graph(path: "shivam/g") {nodes {list {name}}}}""" assert client.query(query) == { "graph": { - "nodes": {"list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}, {"name": "shivam"}]} + "nodes": { + "list": [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + {"name": "shivam"}, + ] + } } } @@ -440,7 +484,9 @@ def test_get_graph_fails_if_graph_not_found_at_namespace(): with RaphtoryServer(work_dir).start() as server: client = server.get_client() - query = """{ graph(path: "shivam/g1") { name, path, nodes { list { name } } } }""" + query = ( + """{ graph(path: "shivam/g1") { name, path, nodes { list { name } } } }""" + ) with pytest.raises(Exception) as excinfo: client.query(query) assert "Graph not found" in str(excinfo.value) @@ -461,8 +507,14 @@ def test_get_graph_succeeds_if_graph_found(): query = """{ graph(path: "g1") { name, path, nodes { list { name } } } }""" assert client.query(query) == { - 'graph': {'name': 'g1', 'nodes': {'list': [{'name': 'ben'}, {'name': 'hamza'}, {'name': 'haaroon'}]}, - 'path': 'g1'}} + "graph": { + "name": "g1", + "nodes": { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + }, + "path": "g1", + } + } def test_get_graph_succeeds_if_graph_found_at_namespace(): @@ -478,11 +530,15 @@ def test_get_graph_succeeds_if_graph_found_at_namespace(): os.makedirs(os.path.join(work_dir, "shivam"), exist_ok=True) g.save_to_file(os.path.join(work_dir, "shivam", "g2")) - query = """{ graph(path: "shivam/g2") { name, path, nodes { list { name } } } }""" + query = ( + """{ graph(path: "shivam/g2") { name, path, nodes { list { name } } } }""" + ) response = client.query(query) - assert response['graph']['name'] == 'g2' - assert response['graph']['nodes'] == {'list': [{'name': 'ben'}, {'name': 'hamza'}, {'name': 'haaroon'}]} - assert normalize_path(response['graph']['path']) == 'shivam/g2' + assert response["graph"]["name"] == "g2" + assert response["graph"]["nodes"] == { + "list": [{"name": "ben"}, {"name": "hamza"}, {"name": "haaroon"}] + } + assert normalize_path(response["graph"]["path"]) == "shivam/g2" def test_get_graphs_returns_emtpy_list_if_no_graphs_found(): @@ -492,9 +548,7 @@ def test_get_graphs_returns_emtpy_list_if_no_graphs_found(): # Assert if no graphs are discoverable query = """{ graphs { name, path } }""" - assert client.query(query) == { - 'graphs': {'name': [], 'path': []} - } + assert client.query(query) == {"graphs": {"name": [], "path": []}} def test_get_graphs_returns_graph_list_if_graphs_found(): @@ -516,15 +570,15 @@ def test_get_graphs_returns_graph_list_if_graphs_found(): query = """{ graphs { name, path } }""" response = client.query(query) sorted_response = { - 'graphs': { - 'name': sorted(response['graphs']['name']), - 'path': sorted(normalize_path(p) for p in response['graphs']['path']) + "graphs": { + "name": sorted(response["graphs"]["name"]), + "path": sorted(normalize_path(p) for p in response["graphs"]["path"]), } } assert sorted_response == { - 'graphs': { - 'name': ['g1', 'g2', 'g3'], - 'path': ['g1', 'shivam/g2', 'shivam/g3'] + "graphs": { + "name": ["g1", "g2", "g3"], + "path": ["g1", "shivam/g2", "shivam/g3"], } } @@ -553,10 +607,10 @@ def test_receive_graph_succeeds_if_graph_found(): g.save_to_file(os.path.join(work_dir, "g1")) query = """{ receiveGraph(path: "g1") }""" - received_graph = client.query(query)['receiveGraph'] + received_graph = client.query(query)["receiveGraph"] decoded_bytes = base64.b64decode(received_graph) - g = Graph.deserialise(decoded_bytes); + g = Graph.deserialise(decoded_bytes) assert g.nodes.name == ["ben", "hamza", "haaroon"] @@ -600,7 +654,7 @@ def test_receive_graph_succeeds_if_graph_found_at_namespace(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query = """{ receiveGraph(path: "shivam/g2") }""" - received_graph = client.query(query)['receiveGraph'] + received_graph = client.query(query)["receiveGraph"] decoded_bytes = base64.b64decode(received_graph) @@ -738,9 +792,18 @@ def test_move_graph_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_using_client_api_succeeds(): @@ -775,9 +838,18 @@ def test_move_graph_using_client_api_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_succeeds_at_same_namespace_as_graph(): @@ -819,9 +891,18 @@ def test_move_graph_succeeds_at_same_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_move_graph_succeeds_at_diff_namespace_as_graph(): @@ -864,9 +945,18 @@ def test_move_graph_succeeds_at_diff_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_fails_if_graph_not_found(): @@ -985,7 +1075,11 @@ def test_copy_graph_succeeds(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "g4") { nodes {list {name}} @@ -997,8 +1091,14 @@ def test_copy_graph_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_using_client_api_succeeds(): @@ -1019,7 +1119,11 @@ def test_copy_graph_using_client_api_succeeds(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "ben/g4") { nodes {list {name}} @@ -1031,8 +1135,14 @@ def test_copy_graph_using_client_api_succeeds(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_succeeds_at_same_namespace_as_graph(): @@ -1060,7 +1170,11 @@ def test_copy_graph_succeeds_at_same_namespace_as_graph(): query = """{graph(path: "shivam/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -1072,8 +1186,14 @@ def test_copy_graph_succeeds_at_same_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_copy_graph_succeeds_at_diff_namespace_as_graph(): @@ -1102,7 +1222,11 @@ def test_copy_graph_succeeds_at_diff_namespace_as_graph(): query = """{graph(path: "ben/g3") { nodes {list {name}} }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -1114,8 +1238,14 @@ def test_copy_graph_succeeds_at_diff_namespace_as_graph(): }}""" result = client.query(query) - assert result['graph']['nodes']['list'] == [{'name': 'ben'}, {"name": "hamza"}, {'name': 'haaroon'}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None + assert result["graph"]["nodes"]["list"] == [ + {"name": "ben"}, + {"name": "hamza"}, + {"name": "haaroon"}, + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) def test_delete_graph_fails_if_graph_not_found(): @@ -1332,16 +1462,35 @@ def test_create_graph_succeeds(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_create_graph_succeeds_at_namespace(): @@ -1391,16 +1540,35 @@ def test_create_graph_succeeds_at_namespace(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 # Update Graph with new graph name tests (save as new graph name) @@ -1524,16 +1692,35 @@ def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_di }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_same_namespace(): @@ -1585,16 +1772,35 @@ def test_update_graph_with_new_graph_name_succeeds_if_parent_graph_belongs_to_sa } }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_with_new_node_from_parent_graph_added_to_new_graph(): @@ -1656,16 +1862,33 @@ def test_update_graph_with_new_graph_name_succeeds_with_new_node_from_parent_gra }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'shivam', 'properties': {'temporal': {'get': {'values': ['engineering']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "shivam", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_with_new_graph_name_succeeds_with_new_node_removed_from_new_graph(): @@ -1718,16 +1941,35 @@ def test_update_graph_with_new_graph_name_succeeds_with_new_node_removed_from_ne }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['creationTime']['value'] is not None - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["creationTime"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 # Update Graph tests (save graph as same graph name) @@ -1821,15 +2063,31 @@ def test_update_graph_succeeds_if_parent_graph_belongs_to_different_namespace(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, + ] + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_if_parent_graph_belongs_to_same_namespace(): @@ -1881,15 +2139,31 @@ def test_update_graph_succeeds_if_parent_graph_belongs_to_same_namespace(): } }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'hamza', 'properties': {'temporal': {'get': {'values': ['director']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "hamza", + "properties": {"temporal": {"get": {"values": ["director"]}}}, + }, ] - assert result['graph']['edges']['list'] == [{'properties': {'temporal': {'get': {'values': ['1']}}}}] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [ + {"properties": {"temporal": {"get": {"values": ["1"]}}}} + ] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_with_new_node_from_parent_graph_added_to_new_graph(): @@ -1950,15 +2224,29 @@ def test_update_graph_succeeds_with_new_node_from_parent_graph_added_to_new_grap }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, - {'name': 'shivam', 'properties': {'temporal': {'get': {'values': ['engineering']}}}} + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, + { + "name": "shivam", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_succeeds_with_new_node_removed_from_new_graph(): @@ -2011,14 +2299,25 @@ def test_update_graph_succeeds_with_new_node_removed_from_new_graph(): }""" result = client.query(query) - assert result['graph']['nodes']['list'] == [ - {'name': 'ben', 'properties': {'temporal': {'get': {'values': ['engineering']}}}}, + assert result["graph"]["nodes"]["list"] == [ + { + "name": "ben", + "properties": {"temporal": {"get": {"values": ["engineering"]}}}, + }, ] - assert result['graph']['edges']['list'] == [] - assert result['graph']['properties']['constant']['lastOpened']['value'] is not None - assert result['graph']['properties']['constant']['lastUpdated']['value'] is not None - assert result['graph']['properties']['constant']['uiProps']['value'] == '{ "target": 6 : }' - assert result['graph']['properties']['constant']['isArchive']['value'] == 1 + assert result["graph"]["edges"]["list"] == [] + assert ( + result["graph"]["properties"]["constant"]["lastOpened"]["value"] is not None + ) + assert ( + result["graph"]["properties"]["constant"]["lastUpdated"]["value"] + is not None + ) + assert ( + result["graph"]["properties"]["constant"]["uiProps"]["value"] + == '{ "target": 6 : }' + ) + assert result["graph"]["properties"]["constant"]["isArchive"]["value"] == 1 def test_update_graph_last_opened_fails_if_graph_not_found(): @@ -2060,12 +2359,18 @@ def test_update_graph_last_opened_succeeds(): query_last_opened = """{ graph(path: "g1") { properties { constant { get(key: "lastOpened") { value } } } } }""" mutate_last_opened = """mutation { updateGraphLastOpened(path: "g1") }""" - assert client.query(query_last_opened) == {'graph': {'properties': {'constant': {'get': None}}}} - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened1 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(query_last_opened) == { + "graph": {"properties": {"constant": {"get": None}}} + } + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened1 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] time.sleep(1) - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened2 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened2 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] assert updated_last_opened2 > updated_last_opened1 @@ -2085,12 +2390,18 @@ def test_update_graph_last_opened_succeeds_at_namespace(): query_last_opened = """{ graph(path: "shivam/g2") { properties { constant { get(key: "lastOpened") { value } } } } }""" mutate_last_opened = """mutation { updateGraphLastOpened(path: "shivam/g2") }""" - assert client.query(query_last_opened) == {'graph': {'properties': {'constant': {'get': None}}}} - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened1 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(query_last_opened) == { + "graph": {"properties": {"constant": {"get": None}}} + } + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened1 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] time.sleep(1) - assert client.query(mutate_last_opened) == {'updateGraphLastOpened': True} - updated_last_opened2 = client.query(query_last_opened)['graph']['properties']['constant']['get']['value'] + assert client.query(mutate_last_opened) == {"updateGraphLastOpened": True} + updated_last_opened2 = client.query(query_last_opened)["graph"]["properties"][ + "constant" + ]["get"]["value"] assert updated_last_opened2 > updated_last_opened1 @@ -2131,13 +2442,25 @@ def test_archive_graph_succeeds(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query_is_archive = """{ graph(path: "g1") { properties { constant { get(key: "isArchive") { value } } } } }""" - assert client.query(query_is_archive) == {'graph': {'properties': {'constant': {'get': None}}}} + assert client.query(query_is_archive) == { + "graph": {"properties": {"constant": {"get": None}}} + } update_archive_graph = """mutation { archiveGraph(path: "g1", isArchive: 0) }""" assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 0 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 0 + ) update_archive_graph = """mutation { archiveGraph(path: "g1", isArchive: 1) }""" assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 1 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 1 + ) def test_archive_graph_succeeds_at_namespace(): @@ -2155,13 +2478,29 @@ def test_archive_graph_succeeds_at_namespace(): g.save_to_file(os.path.join(work_dir, "shivam", "g2")) query_is_archive = """{ graph(path: "shivam/g2") { properties { constant { get(key: "isArchive") { value } } } } }""" - assert client.query(query_is_archive) == {'graph': {'properties': {'constant': {'get': None}}}} - update_archive_graph = """mutation { archiveGraph(path: "shivam/g2", isArchive: 0) }""" + assert client.query(query_is_archive) == { + "graph": {"properties": {"constant": {"get": None}}} + } + update_archive_graph = ( + """mutation { archiveGraph(path: "shivam/g2", isArchive: 0) }""" + ) assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 0 - update_archive_graph = """mutation { archiveGraph(path: "shivam/g2", isArchive: 1) }""" + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 0 + ) + update_archive_graph = ( + """mutation { archiveGraph(path: "shivam/g2", isArchive: 1) }""" + ) assert client.query(update_archive_graph) == {"archiveGraph": True} - assert client.query(query_is_archive)['graph']['properties']['constant']['get']['value'] == 1 + assert ( + client.query(query_is_archive)["graph"]["properties"]["constant"]["get"][ + "value" + ] + == 1 + ) def test_graph_windows_and_layers_query(): @@ -2348,6 +2687,7 @@ def test_graph_properties_query(): key=lambda x: x["key"], ) + # def test_disk_graph_name(): # import pandas as pd # from raphtory import DiskGraphStorage diff --git a/python/tests/test_iterables.py b/python/tests/test_iterables.py index 8e8874a63e..c1d0edb0cd 100644 --- a/python/tests/test_iterables.py +++ b/python/tests/test_iterables.py @@ -148,7 +148,7 @@ def test_propiterable(): assert sorted(total) == [2, 17, 18, 35, 38] total = dict(zip(g.nodes.id, g.nodes.out_edges.properties.get("value_dec").sum())) - assert total == {'1': 32, '2': 5, '3': 3, '4': 15, '5': None} + assert total == {"1": 32, "2": 5, "3": 3, "4": 15, "5": None} total = g.nodes.out_edges.properties.get("value_dec").sum().sum() assert total == 55 diff --git a/python/tests/test_load_from_pandas.py b/python/tests/test_load_from_pandas.py index e966d1a21b..6df04d40e9 100644 --- a/python/tests/test_load_from_pandas.py +++ b/python/tests/test_load_from_pandas.py @@ -19,9 +19,9 @@ def test_load_from_pandas(): df = pd.DataFrame( { + "time": [1, 2, 3, 4, 5], "src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, 5, 6], - "time": [1, 2, 3, 4, 5], "weight": [1.0, 2.0, 3.0, 4.0, 5.0], "marbles": ["red", "blue", "green", "yellow", "purple"], } @@ -46,12 +46,12 @@ def assertions(g): assert g.nodes.id.collect() == expected_nodes assert edges == expected_edges - g = Graph.load_from_pandas(df, "src", "dst", "time", ["weight", "marbles"]) + g = Graph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) - g = PersistentGraph.load_from_pandas( - df, "src", "dst", "time", ["weight", "marbles"] - ) + g = PersistentGraph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) @@ -77,13 +77,13 @@ def assertions(exc_info): # Use pytest.raises to expect an exception with pytest.raises(Exception) as exc_info: - g = Graph.load_from_pandas(df, "src", "dst", "time", ["weight", "marbles"]) + g = Graph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(exc_info) with pytest.raises(Exception) as exc_info: - g = PersistentGraph.load_from_pandas( - df, "src", "dst", "time", ["weight", "marbles"] - ) + g = PersistentGraph() + g.load_edges_from_pandas(df, "time", "src", "dst", ["weight", "marbles"]) assertions(exc_info) # Optionally, you can check the exception message or type @@ -148,13 +148,17 @@ def assertions(g): assert nodes == expected_nodes g = Graph() - g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) - g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"]) + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type_col="node_type", properties=["name"] + ) + g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) g = PersistentGraph() - g.load_nodes_from_pandas(nodes_df, "id", "time", "node_type", properties=["name"]) - g.load_edges_from_pandas(edges_df, "src", "dst", "time", ["weight", "marbles"]) + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type_col="node_type", properties=["name"] + ) + g.load_edges_from_pandas(edges_df, "time", "src", "dst", ["weight", "marbles"]) assertions(g) @@ -210,32 +214,23 @@ def assertions(g): assert g.nodes.id.collect() == expected_node_ids assert edges == expected_edges - g = Graph.load_from_pandas( - edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + g = Graph() + g.load_edges_from_pandas( + edges_df, time="time", src="src", dst="dst", properties=["weight", "marbles"] + ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"], node_type="node_type" ) assertions(g) - g = PersistentGraph.load_from_pandas( - edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + g = PersistentGraph() + g.load_edges_from_pandas( + edges_df, time="time", src="src", dst="dst", properties=["weight", "marbles"] ) + g.load_nodes_from_pandas( + df=nodes_df, time="time", id="id", properties=["name"], node_type="node_type" + ) + assertions(g) @@ -296,22 +291,44 @@ def assertions1(g): g = Graph() g.load_nodes_from_pandas( nodes_df, - "id", "time", - "node_type", + "id", + node_type_col="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assertions1(g) g = PersistentGraph() g.load_nodes_from_pandas( nodes_df, - "id", "time", - "node_type", + "id", + node_type_col="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, + ) + assertions1(g) + + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas( + nodes_df, + "id", + node_type_col="node_type", + constant_properties=["name"], + shared_constant_properties={"tag": "test_tag"}, + ) + assertions1(g) + + g = PersistentGraph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas( + nodes_df, + "id", + node_type_col="node_type", + constant_properties=["name"], + shared_constant_properties={"tag": "test_tag"}, ) assertions1(g) @@ -328,11 +345,11 @@ def assertions2(g): g = Graph() g.load_nodes_from_pandas( nodes_df, - "id", "time", - "node_type", + "id", + node_type_col="node_type", properties=["name"], - const_properties=["type"], + constant_properties=["type"], ) assertions2(g) @@ -341,9 +358,9 @@ def assertions2(g): nodes_df, "id", "time", - "node_type", + node_type_col="node_type", properties=["name"], - const_properties=["type"], + constant_properties=["type"], ) assertions2(g) @@ -375,28 +392,26 @@ def assertions3(g): g = Graph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", - layer_in_df=False, ) assertions3(g) g = PersistentGraph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", - layer_in_df=False, ) assertions3(g) @@ -416,13 +431,13 @@ def assertions4(g): g = Graph() g.load_edges_from_pandas( - edges_df, "src", "dst", "time", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer_col="layers" ) assertions4(g) g = PersistentGraph() g.load_edges_from_pandas( - edges_df, "src", "dst", "time", ["weight", "marbles"], layer="layers" + edges_df, "time", "src", "dst", ["weight", "marbles"], layer_col="layers" ) assertions4(g) @@ -437,33 +452,37 @@ def assertions5(g): ] assert g.layers(["test_layer"]).edges.src.id.collect() == [1, 2, 3, 4, 5] - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", - edge_layer="test_layer", - layer_in_df=False, - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_const_properties={"type": "Person"}, + layer="test_layer", + ) + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"type": "Person"}, ) assertions5(g) - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", - edge_layer="test_layer", - layer_in_df=False, - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_const_properties={"type": "Person"}, + layer="test_layer", + ) + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"type": "Person"}, ) assertions5(g) @@ -489,31 +508,25 @@ def assertions6(g): 5, ] - g = Graph.load_from_pandas( - edges_df, - "src", - "dst", - "time", - edge_layer="layers", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_const_properties=["type"], + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="layers") + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assertions6(g) - g = PersistentGraph.load_from_pandas( - edges_df, - "src", - "dst", - "time", - edge_layer="layers", - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - node_const_properties=["type"], + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="layers") + g.load_nodes_from_pandas( + df=nodes_df, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assertions6(g) @@ -535,23 +548,21 @@ def assertions7(g): "test_tag", ] - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - edge_layer="layers", + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + layer_col="layers", ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) g.load_node_props_from_pandas( nodes_df, "id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assertions7(g) @@ -575,29 +586,28 @@ def assertions8(g): edges_df, "src", "dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, - layer="layers", + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, + layer_col="layers", ) assertions8(g) - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], - edge_layer="layers", + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + layer_col="layers", ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) g.load_node_props_from_pandas( nodes_df, "id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assertions7(g) @@ -605,9 +615,9 @@ def assertions8(g): edges_df, "src", "dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, - layer="layers", + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, + layer_col="layers", ) assertions8(g) @@ -624,35 +634,35 @@ def assertions_layers_in_df(g): assert g.layers(["layer 3"]).edges.src.id.collect() == [3] with pytest.raises( Exception, - match=re.escape("Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5"), + match=re.escape( + "Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5" + ), ): g.layers(["test_layer"]) g = Graph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", ["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, - layer="layers", - layer_in_df=True, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, + layer_col="layers", ) assertions_layers_in_df(g) g = PersistentGraph() g.load_edges_from_pandas( edges_df, + "time", "src", "dst", - "time", ["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, - layer="layers", - layer_in_df=True, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, + layer_col="layers", ) assertions_layers_in_df(g) @@ -682,11 +692,12 @@ def test_missing_columns(): "columns are not present within the dataframe: not_src, not_dst, not_time" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_src="not_src", - edge_dst="not_dst", - edge_time="not_time", + time="not_time", + src="not_src", + dst="not_dst", ) with pytest.raises( @@ -695,11 +706,12 @@ def test_missing_columns(): "columns are not present within the dataframe: not_src, not_dst, not_time" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_src="not_src", - edge_dst="not_dst", - edge_time="not_time", + time="not_time", + src="not_src", + dst="not_dst", ) with pytest.raises( @@ -708,18 +720,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_weight, bleep_bloop" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["not_weight", "marbles"], - edge_const_properties=["bleep_bloop"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["not_weight", "marbles"], + constant_properties=["bleep_bloop"], ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) with pytest.raises( Exception, @@ -727,18 +737,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_weight, bleep_bloop" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["not_weight", "marbles"], - edge_const_properties=["bleep_bloop"], - node_df=nodes_df, - node_id="id", - node_time="time", - node_properties=["name"], + time="time", + src="src", + dst="dst", + properties=["not_weight", "marbles"], + constant_properties=["bleep_bloop"], ) + g.load_nodes_from_pandas(df=nodes_df, time="time", id="id", properties=["name"]) with pytest.raises( Exception, @@ -746,16 +754,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_id, not_time, not_name" ), ): - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="not_id", - node_time="not_time", - node_properties=["not_name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_pandas( + df=nodes_df, time="not_time", id="not_id", properties=["not_name"] ) with pytest.raises( @@ -764,16 +772,16 @@ def test_missing_columns(): "columns are not present within the dataframe: not_id, not_time, not_name" ), ): - g = PersistentGraph.load_from_pandas( + g = PersistentGraph() + g.load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_df=nodes_df, - node_id="not_id", - node_time="not_time", - node_properties=["not_name"], + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_pandas( + df=nodes_df, id="not_id", time="not_time", properties=["not_name"] ) with pytest.raises( @@ -787,7 +795,7 @@ def test_missing_columns(): edges_df, src="sauce", dst="dist", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -801,7 +809,7 @@ def test_missing_columns(): edges_df, src="sauce", dst="dist", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -814,7 +822,7 @@ def test_missing_columns(): g.load_node_props_from_pandas( nodes_df, id="sauce", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) with pytest.raises( @@ -827,7 +835,7 @@ def test_missing_columns(): g.load_node_props_from_pandas( nodes_df, id="sauce", - const_properties=["wait", "marples"], + constant_properties=["wait", "marples"], ) @@ -838,12 +846,13 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, None, 6], "time": [1, 2, 3, 4, 5]} @@ -851,11 +860,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + Graph().load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") edges_df = pd.DataFrame( {"src": [1, 2, 3, 4, 5], "dst": [2, 3, 4, 5, 6], "time": [1, 2, None, 4, 5]} @@ -863,11 +872,11 @@ def test_none_columns_edges(): with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - Graph.load_from_pandas(edges_df, "src", "dst", "time") + Graph().load_edges_from_pandas(edges_df, "time", "src", "dst") with pytest.raises( Exception, match=re.escape("Ensure these contain no NaN, Null or None values.") ): - PersistentGraph.load_from_pandas(edges_df, "src", "dst", "time") + PersistentGraph().load_edges_from_pandas(edges_df, "time", "src", "dst") def test_loading_list_as_properties(): @@ -881,12 +890,13 @@ def test_loading_list_as_properties(): } ) - g = Graph.load_from_pandas( + g = Graph() + g.load_edges_from_pandas( df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["marbles"], + time="time", + src="src", + dst="dst", + properties=["marbles"], ) assert g.edge(1, 2).properties["marbles"] == ["red"] @@ -902,8 +912,8 @@ def test_loading_list_as_properties(): g = Graph() g.load_nodes_from_pandas( df=df, - id="id", time="time", + id="id", properties=["marbles"], ) @@ -927,12 +937,12 @@ def test_unparsable_props(): """"Could not convert '2.0' with type str: tried to convert to double", 'Conversion failed for column weight with type object'""" ), ): - Graph.load_from_pandas( + Graph().load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight"], + time="time", + src="src", + dst="dst", + properties=["weight"], ) with pytest.raises( Exception, @@ -940,12 +950,12 @@ def test_unparsable_props(): """"Could not convert '2.0' with type str: tried to convert to double", 'Conversion failed for column weight with type object'""" ), ): - PersistentGraph.load_from_pandas( + PersistentGraph().load_edges_from_pandas( edges_df, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight"], + time="time", + src="src", + dst="dst", + properties=["weight"], ) @@ -995,48 +1005,38 @@ def edges_assertions(g): assert g.count_nodes() == 6 g = Graph() - g.load_nodes_from_pandas(nodes_df, "id", "time") + g.load_nodes_from_pandas(nodes_df, "time", "id") nodes_assertions(g) g = PersistentGraph() - g.load_nodes_from_pandas(nodes_df, "id", "time") + g.load_nodes_from_pandas(nodes_df, "time", "id") nodes_assertions(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type", node_type_in_df=False - ) + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") nodes_assertions2(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df, "id", "time", node_type="node_type", node_type_in_df=False - ) + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") nodes_assertions2(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=False - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type="node_type") nodes_assertions2(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=False - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type="node_type") nodes_assertions2(g) g = Graph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=True - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type_col="node_type") nodes_assertions3(g) g = PersistentGraph() - g.load_nodes_from_pandas( - nodes_df2, "id", "time", node_type="node_type", node_type_in_df=True - ) + g.load_nodes_from_pandas(nodes_df2, "time", "id", node_type_col="node_type") nodes_assertions3(g) - g = Graph.load_from_pandas(edges_df, "src", "dst", "time") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) - g = Graph.load_from_pandas(edges_df, "src", "dst", "time") + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst") edges_assertions(g) @@ -1057,7 +1057,278 @@ def test_load_edge_deletions_from_pandas(): ) g = PersistentGraph() - g.load_edges_from_pandas(edges_df, "src", "dst", "time") + g.load_edges_from_pandas(edges_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] - g.load_edges_deletions_from_pandas(edge_dels_df, "src", "dst", "time") + g.load_edge_deletions_from_pandas(edge_dels_df, "time", "src", "dst") assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] + + +def test_edge_both_option_failures_pandas(): + edges_df = pd.DataFrame( + { + "src": [1, 2, 3, 4, 5], + "dst": [2, 3, 4, 5, 6], + "time": [1, 2, 3, 4, 5], + "weight": [1.0, 2.0, 3.0, 4.0, 5.0], + "marbles": ["red", "blue", "green", "yellow", "purple"], + } + ) + # CHECK ALL EDGE FUNCTIONS ON GRAPH FAIL WITH BOTH LAYER AND LAYER_COL + g = Graph() + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edges_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + # CHECK IF JUST LAYER_COL WORKS + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = Graph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer_col="marbles", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edges_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)'\)", + ): + g.load_edge_deletions_from_pandas( + edges_df, "time", "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer="blah") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer="blah", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + g = PersistentGraph() + g.load_edge_deletions_from_pandas(edges_df, "time", "src", "dst", layer="blah") + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + # CHECK IF JUST LAYER_COL WORKS + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = PersistentGraph() + g.load_edges_from_pandas(edges_df, "time", "src", "dst", layer_col="marbles") + g.load_edge_props_from_pandas( + edges_df, "src", "dst", layer_col="marbles", constant_properties=["marbles"] + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + g.load_edge_deletions_from_pandas( + edges_df, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + +def test_node_both_option_failures_pandas(): + nodes_df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6], + "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], + "time": [1, 2, 3, 4, 5, 6], + "node_type": ["P1", "P2", "P3", "P4", "P5", "P6"], + } + ) + # CHECK ALL NODE FUNCTIONS ON GRAPH FAIL WITH BOTH NODE_TYPE AND NODE_TYPE_COL + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)'\)", + ): + g = Graph() + g.load_nodes_from_pandas( + nodes_df, "time", "id", node_type="node_type", node_type_col="node_type" + ) + + with pytest.raises( + Exception, + match=r"GraphLoadException\('WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)'\)", + ): + g = Graph() + g.load_node_props_from_pandas( + nodes_df, "id", node_type="node_type", node_type_col="node_type" + ) + + # CHECK IF JUST NODE_TYPE WORKS + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas(nodes_df, "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + + # CHECK IF JUST NODE_TYPE_COL WORKS + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id", node_type_col="node_type") + assert g.nodes.node_type.collect() == ["P1", "P2", "P3", "P4", "P5", "P6"] + g = Graph() + g.load_nodes_from_pandas(nodes_df, "time", "id") + g.load_node_props_from_pandas(nodes_df, "id", node_type_col="node_type") + assert g.nodes.node_type.collect() == ["P1", "P2", "P3", "P4", "P5", "P6"] diff --git a/python/tests/test_load_from_parquet.py b/python/tests/test_load_from_parquet.py index 6785df097a..5b8b62e48a 100644 --- a/python/tests/test_load_from_parquet.py +++ b/python/tests/test_load_from_parquet.py @@ -1,10 +1,10 @@ import os import re -import tempfile - import pyarrow as pa import pyarrow.parquet as pq import pytest +import tempfile +import pandas as pd from raphtory import Graph, PersistentGraph @@ -14,7 +14,9 @@ def parquet_files(): dirname = tempfile.TemporaryDirectory() nodes_parquet_file_path = os.path.join(dirname.name, "parquet", "nodes.parquet") edges_parquet_file_path = os.path.join(dirname.name, "parquet", "edges.parquet") - edge_deletions_parquet_file_path = os.path.join(dirname.name, "parquet", "edges_deletions.parquet") + edge_deletions_parquet_file_path = os.path.join( + dirname.name, "parquet", "edges_deletions.parquet" + ) os.makedirs(os.path.dirname(nodes_parquet_file_path), exist_ok=True) @@ -22,8 +24,15 @@ def parquet_files(): "id": [1, 2, 3, 4, 5, 6], "name": ["Alice", "Bob", "Carol", "Dave", "Eve", "Frank"], "time": [1, 2, 3, 4, 5, 6], - "type": ["Person 1", "Person 2", "Person 3", "Person 4", "Person 5", "Person 6"], - "node_type": ["p", "p", "p", "p", "p", "p"], + "type": [ + "Person 1", + "Person 2", + "Person 3", + "Person 4", + "Person 5", + "Person 6", + ], + "node_type": ["p1", "p2", "p3", "p4", "p5", "p6"], } table = pa.table(data) @@ -52,7 +61,11 @@ def parquet_files(): table = pa.table(data) pq.write_table(table, edge_deletions_parquet_file_path) - print("""Created edges_deletions.parquet at loc = {}""".format(edge_deletions_parquet_file_path)) + print( + """Created edges_deletions.parquet at loc = {}""".format( + edge_deletions_parquet_file_path + ) + ) yield nodes_parquet_file_path, edges_parquet_file_path, edge_deletions_parquet_file_path @@ -96,12 +109,12 @@ def assert_expected_edges(g): def assert_expected_node_types(g): assert g.nodes.node_type == [ - "p", - "p", - "p", - "p", - "p", - "p", + "p1", + "p2", + "p3", + "p4", + "p5", + "p6", ] @@ -139,13 +152,13 @@ def assert_expected_node_property_dept(g): def assert_expected_edge_properties(g): - assert g.layers( - ["layer 1", "layer 2", "layer 3"] - ).edges.properties.constant.get("marbles_const").collect() == [ - {"layer 1": "red"}, - {"layer 2": "blue"}, - {"layer 3": "green"}, - ] + assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.properties.constant.get( + "marbles_const" + ).collect() == [ + {"layer 1": "red"}, + {"layer 2": "blue"}, + {"layer 3": "green"}, + ] assert g.edges.properties.constant.get("tag").collect() == [ {"layer 1": "test_tag"}, {"layer 2": "test_tag"}, @@ -180,14 +193,31 @@ def assert_expected_edge_properties_test_layer(g): def assert_expected_layers(g): - assert g.unique_layers == ["_default", "layer 1", "layer 2", "layer 3", "layer 4", "layer 5"] + assert g.unique_layers == [ + "_default", + "layer 1", + "layer 2", + "layer 3", + "layer 4", + "layer 5", + ] assert g.layers(["layer 1"]).edges.src.id.collect() == [1] assert g.layers(["layer 1", "layer 2"]).edges.src.id.collect() == [1, 2] - assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.src.id.collect() == [1, 2, 3] - assert g.layers(["layer 1", "layer 4", "layer 5"]).edges.src.id.collect() == [1, 4, 5] + assert g.layers(["layer 1", "layer 2", "layer 3"]).edges.src.id.collect() == [ + 1, + 2, + 3, + ] + assert g.layers(["layer 1", "layer 4", "layer 5"]).edges.src.id.collect() == [ + 1, + 4, + 5, + ] with pytest.raises( - Exception, - match=re.escape("Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5"), + Exception, + match=re.escape( + "Invalid layer: test_layer. Valid layers: _default, layer 1, layer 2, layer 3, layer 4, layer 5" + ), ): g.layers(["test_layer"]) @@ -198,19 +228,26 @@ def assert_expected_test_layer(g): def test_load_from_parquet_graphs(parquet_files): - nodes_parquet_file_path, edges_parquet_file_path, edges_deletions_parquet_file_path = parquet_files - - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + time="time", + src="src", + dst="dst", + properties=["weight", "marbles"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + node_type_col="node_type", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -218,10 +255,10 @@ def test_load_from_parquet_graphs(parquet_files): g = Graph() g.load_nodes_from_parquet( parquet_path=nodes_parquet_file_path, - id="id", time="time", - node_type="node_type", - properties=["name"] + id="id", + node_type_col="node_type", + properties=["name"], ) g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, @@ -229,7 +266,7 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer="layers" + layer_col="layers", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -238,8 +275,8 @@ def test_load_from_parquet_graphs(parquet_files): g.load_node_props_from_parquet( parquet_path=nodes_parquet_file_path, id="id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_property_tag(g) assert_expected_node_property_type(g) @@ -248,9 +285,9 @@ def test_load_from_parquet_graphs(parquet_files): parquet_path=edges_parquet_file_path, src="src", dst="dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, - layer="layers", + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, + layer_col="layers", ) assert_expected_edge_properties(g) assert_expected_layers(g) @@ -260,9 +297,9 @@ def test_load_from_parquet_graphs(parquet_files): parquet_path=nodes_parquet_file_path, id="id", time="time", - node_type="node_type", + node_type_col="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_types(g) assert_expected_node_property_tag(g) @@ -274,60 +311,71 @@ def test_load_from_parquet_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", - layer_in_df=False, ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_layer="test_layer", - layer_in_df=False, - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_const_properties={"dept": "Sales"}, + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + time="time", + src="src", + dst="dst", + layer="test_layer", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_layer="layers", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_const_properties=["type"], + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + layer_col="layers", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) def test_load_from_parquet_persistent_graphs(parquet_files): - nodes_parquet_file_path, edges_parquet_file_path, edges_deletions_parquet_file_path = parquet_files - - g = PersistentGraph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_properties=["weight", "marbles"], - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_type="node_type", + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + g = PersistentGraph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + properties=["weight", "marbles"], + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + node_type_col="node_type", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -338,7 +386,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): id="id", time="time", node_type="node_type", - properties=["name"] + properties=["name"], ) g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, @@ -346,7 +394,7 @@ def test_load_from_parquet_persistent_graphs(parquet_files): dst="dst", time="time", properties=["weight", "marbles"], - layer="layers" + layer_col="layers", ) assert_expected_nodes(g) assert_expected_edges(g) @@ -355,8 +403,8 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g.load_node_props_from_parquet( parquet_path=nodes_parquet_file_path, id="id", - const_properties=["type"], - shared_const_properties={"tag": "test_tag"}, + constant_properties=["type"], + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_property_tag(g) assert_expected_node_property_type(g) @@ -365,9 +413,9 @@ def test_load_from_parquet_persistent_graphs(parquet_files): parquet_path=edges_parquet_file_path, src="src", dst="dst", - const_properties=["marbles_const"], - shared_const_properties={"tag": "test_tag"}, - layer="layers", + constant_properties=["marbles_const"], + shared_constant_properties={"tag": "test_tag"}, + layer_col="layers", ) assert_expected_edge_properties(g) assert_expected_layers(g) @@ -375,11 +423,11 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_nodes_from_parquet( parquet_path=nodes_parquet_file_path, - id="id", time="time", - node_type="node_type", + id="id", + node_type_col="node_type", properties=["name"], - shared_const_properties={"tag": "test_tag"}, + shared_constant_properties={"tag": "test_tag"}, ) assert_expected_node_types(g) assert_expected_node_property_tag(g) @@ -387,45 +435,49 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, + time="time", src="src", dst="dst", - time="time", properties=["weight", "marbles"], - const_properties=["marbles_const"], - shared_const_properties={"type": "Edge", "tag": "test_tag"}, + constant_properties=["marbles_const"], + shared_constant_properties={"type": "Edge", "tag": "test_tag"}, layer="test_layer", - layer_in_df=False, ) assert_expected_edge_properties_test_layer(g) assert_expected_test_layer(g) - g = Graph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_layer="test_layer", - layer_in_df=False, - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_shared_const_properties={"dept": "Sales"}, + g = Graph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + layer="test_layer", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + shared_constant_properties={"dept": "Sales"}, ) assert_expected_test_layer(g) assert_expected_node_property_dept(g) - g = PersistentGraph.load_from_parquet( - edge_parquet_path=edges_parquet_file_path, - edge_src="src", - edge_dst="dst", - edge_time="time", - edge_layer="layers", - node_parquet_path=nodes_parquet_file_path, - node_id="id", - node_time="time", - node_properties=["name"], - node_const_properties=["type"], + g = PersistentGraph() + g.load_edges_from_parquet( + parquet_path=edges_parquet_file_path, + src="src", + dst="dst", + time="time", + layer_col="layers", + ) + g.load_nodes_from_parquet( + parquet_path=nodes_parquet_file_path, + time="time", + id="id", + properties=["name"], + constant_properties=["type"], ) assert_expected_node_property_type(g) assert_expected_layers(g) @@ -433,16 +485,342 @@ def test_load_from_parquet_persistent_graphs(parquet_files): g = PersistentGraph() g.load_edges_from_parquet( parquet_path=edges_parquet_file_path, + time="time", src="src", dst="dst", - time="time", ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 3, 4, 5] - g.load_edges_deletions_from_parquet( + g.load_edge_deletions_from_parquet( parquet_path=edges_deletions_parquet_file_path, + time="time", src="src", dst="dst", - time="time" ) assert g.window(10, 12).edges.src.id.collect() == [1, 2, 5] + +def test_edge_both_option_failures_parquet(parquet_files): + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + # CHECK ALL EDGE FUNCTIONS ON GRAPH FAIL WITH BOTH LAYER AND LAYER_COL + g = Graph() + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edges_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_props_from_parquet( + edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" + ) + + # CHECK IF JUST LAYER WORKS + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer="blah", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + # CHECK IF JUST LAYER_COL WORKS + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = Graph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer_col="marbles", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edges_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_props_from_parquet( + edges_parquet_file_path, "src", "dst", layer="blah", layer_col="marbles" + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"layer_name\", \"layer_col\"\)", + ): + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, + "time", + "src", + "dst", + layer="blah", + layer_col="marbles", + ) + + # CHECK IF JUST LAYER WORKS + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer="blah", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + assert g.layer("blah").edges.properties.get("marbles") == [ + "red", + "blue", + "green", + "yellow", + "purple", + ] + + g = PersistentGraph() + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer="blah" + ) + assert g.edges.layer_names.collect() == [ + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ["blah"], + ] + assert g.unique_layers == ["_default", "blah"] + + # CHECK IF JUST LAYER_COL WORKS + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + g = PersistentGraph() + g.load_edges_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + g.load_edge_props_from_parquet( + edges_parquet_file_path, + "src", + "dst", + layer_col="marbles", + constant_properties=["marbles"], + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + assert g.edges.properties.get("marbles").collect() == [ + {"red": "red"}, + {"blue": "blue"}, + {"green": "green"}, + {"yellow": "yellow"}, + {"purple": "purple"}, + ] + + g = PersistentGraph() + g.load_edge_deletions_from_parquet( + edges_parquet_file_path, "time", "src", "dst", layer_col="marbles" + ) + assert g.edges.layer_names.collect() == [ + ["red"], + ["blue"], + ["green"], + ["yellow"], + ["purple"], + ] + assert g.unique_layers == ["_default", "red", "blue", "green", "yellow", "purple"] + + +def test_node_both_option_failures_parquet(parquet_files): + ( + nodes_parquet_file_path, + edges_parquet_file_path, + edges_deletions_parquet_file_path, + ) = parquet_files + + # CHECK ALL NODE FUNCTIONS ON GRAPH FAIL WITH BOTH NODE_TYPE AND NODE_TYPE_COL + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)", + ): + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, + "time", + "id", + node_type="node_type", + node_type_col="node_type", + ) + + with pytest.raises( + Exception, + match=r"Failed to load graph: Failed to load graph WrongNumOfArgs\(\"node_type\", \"node_type_col\"\)", + ): + g = Graph() + g.load_node_props_from_parquet( + nodes_parquet_file_path, + "id", + node_type="node_type", + node_type_col="node_type", + ) + + # CHECK IF JUST NODE_TYPE WORKS + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, "time", "id", node_type="node_type" + ) + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + g = Graph() + g.load_nodes_from_parquet(nodes_parquet_file_path, "time", "id") + g.load_node_props_from_parquet(nodes_parquet_file_path, "id", node_type="node_type") + assert g.nodes.node_type.collect() == [ + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + "node_type", + ] + + # CHECK IF JUST NODE_TYPE_COL WORKS + g = Graph() + g.load_nodes_from_parquet( + nodes_parquet_file_path, "time", "id", node_type_col="node_type" + ) + assert g.nodes.node_type.collect() == ["p1", "p2", "p3", "p4", "p5", "p6"] + g = Graph() + g.load_nodes_from_parquet(nodes_parquet_file_path, "time", "id") + g.load_node_props_from_parquet( + nodes_parquet_file_path, "id", node_type_col="node_type" + ) + assert g.nodes.node_type.collect() == ["p1", "p2", "p3", "p4", "p5", "p6"] diff --git a/raphtory-cypher/src/lib.rs b/raphtory-cypher/src/lib.rs index 6661c86a67..23a1f5fa52 100644 --- a/raphtory-cypher/src/lib.rs +++ b/raphtory-cypher/src/lib.rs @@ -312,7 +312,7 @@ mod cypher { let edge_lists = vec![chunk]; let graph = - DiskGraphStorage::load_from_edge_lists(&edge_lists, 20, 20, graph_dir, 0, 1, 2) + DiskGraphStorage::load_from_edge_lists(&edge_lists, 20, 20, graph_dir, 2, 0, 1) .unwrap(); let df = run_cypher("match ()-[e]->() RETURN *", &graph, true) diff --git a/raphtory/src/core/utils/errors.rs b/raphtory/src/core/utils/errors.rs index 6cdb238df0..4dfcb66f57 100644 --- a/raphtory/src/core/utils/errors.rs +++ b/raphtory/src/core/utils/errors.rs @@ -40,6 +40,8 @@ pub enum InvalidPathReason { #[derive(thiserror::Error, Debug)] pub enum GraphError { + #[error("You cannot set ‘{0}’ and ‘{1}’ at the same time. Please pick one or the other.")] + WrongNumOfArgs(String, String), #[cfg(feature = "arrow")] #[error("Arrow error: {0}")] Arrow(#[from] error::PolarsError), diff --git a/raphtory/src/disk_graph/mod.rs b/raphtory/src/disk_graph/mod.rs index 2bed4f3050..0504307ead 100644 --- a/raphtory/src/disk_graph/mod.rs +++ b/raphtory/src/disk_graph/mod.rs @@ -208,9 +208,9 @@ impl DiskGraphStorage { chunk_size, t_props_chunk_size, graph_dir.as_ref(), + 2, 0, 1, - 2, ) .expect("failed to create graph") } @@ -300,9 +300,9 @@ impl DiskGraphStorage { chunk_size: usize, t_props_chunk_size: usize, graph_dir: impl AsRef + Sync, + time_col_idx: usize, src_col_idx: usize, dst_col_idx: usize, - time_col_idx: usize, ) -> Result { let inner = TemporalGraph::from_sorted_edge_list( graph_dir, diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 9de175c559..a7aeeca5ca 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -36,48 +36,53 @@ pub(crate) fn load_nodes_from_df< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( df_view: DFView>>, - node_id: &str, time: &str, + node_id: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, node_type: Option<&str>, - node_type_in_df: bool, + node_type_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); - let const_properties = const_properties.unwrap_or(&[]); + let constant_properties = constant_properties.unwrap_or(&[]); let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let const_properties_indices = const_properties + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let node_type_index = node_type - .filter(|_| node_type_in_df) - .map(|node_type| df_view.get_index(node_type)) - .transpose()?; + let node_type_index = if let Some(node_type_col) = node_type_col { + Some(df_view.get_index(node_type_col.as_ref())) + } else { + None + }; + let node_type_index = node_type_index.transpose()?; + let node_id_index = df_view.get_index(node_id)?; let time_index = df_view.get_index(time)?; - let mut pb = build_progress_bar("Loading nodes".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; let prop_iter = combine_properties(properties, &properties_indices, &df)?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; - - let node_type: Box>> = match node_type { - Some(node_type) => match node_type_index { - Some(index) => { + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; + + let node_type: Result>>, GraphError> = + match (node_type, node_type_index) { + (None, None) => Ok(Box::new(iter::repeat(None))), + (Some(node_type), None) => Ok(Box::new(iter::repeat(Some(node_type)))), + (None, Some(node_type_index)) => { let iter_res: Result>>, GraphError> = - if let Some(node_types) = df.utf8::(index) { + if let Some(node_types) = df.utf8::(node_type_index) { Ok(Box::new(node_types)) - } else if let Some(node_types) = df.utf8::(index) { + } else if let Some(node_types) = df.utf8::(node_type_index) { Ok(Box::new(node_types)) } else { Err(GraphError::LoadFailure( @@ -85,12 +90,14 @@ pub(crate) fn load_nodes_from_df< .to_string(), )) }; - iter_res? + iter_res } - None => Box::new(iter::repeat(Some(node_type))), - }, - None => Box::new(iter::repeat(None)), - }; + _ => Err(GraphError::WrongNumOfArgs( + "node_type".to_string(), + "node_type_col".to_string(), + )), + }; + let node_type = node_type?; if let (Some(node_id), Some(time)) = ( df.iter_col::(node_id_index), @@ -107,7 +114,7 @@ pub(crate) fn load_nodes_from_df< iter, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, )?; } else if let (Some(node_id), Some(time)) = ( df.iter_col::(node_id_index), @@ -124,7 +131,7 @@ pub(crate) fn load_nodes_from_df< iter, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, )?; } else if let (Some(node_id), Some(time)) = (df.utf8::(node_id_index), df.time_iter_col(time_index)) @@ -141,7 +148,7 @@ pub(crate) fn load_nodes_from_df< let actual_type = extract_out_default_type(n_t); let v = graph.add_node(time, node_id, props, actual_type)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } } @@ -162,7 +169,7 @@ pub(crate) fn load_nodes_from_df< if let (Some(node_id), Some(time), n_t) = (node_id, time, actual_type) { let v = graph.add_node(time, node_id, props, n_t)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = shared_const_properties { + if let Some(shared_const_props) = shared_constant_properties { v.add_constant_properties(shared_const_props)?; } } @@ -182,24 +189,24 @@ pub(crate) fn load_edges_from_df< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps, >( df_view: DFView>>, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: bool, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let properties = properties.unwrap_or(&[]); - let const_properties = const_properties.unwrap_or(&[]); + let constant_properties = constant_properties.unwrap_or(&[]); let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; - let const_properties_indices = const_properties + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; @@ -207,19 +214,21 @@ pub(crate) fn load_edges_from_df< let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; - + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; let mut pb = build_progress_bar("Loading edges".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; let prop_iter = combine_properties(properties, &properties_indices, &df)?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -236,7 +245,7 @@ pub(crate) fn load_edges_from_df< triplets, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, layer, )?; } else if let (Some(src), Some(dst), Some(time)) = ( @@ -254,7 +263,7 @@ pub(crate) fn load_edges_from_df< triplets, prop_iter, const_prop_iter, - shared_const_properties, + shared_constant_properties, layer, )?; } else if let (Some(src), Some(dst), Some(time)) = ( @@ -270,7 +279,7 @@ pub(crate) fn load_edges_from_df< if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -288,7 +297,7 @@ pub(crate) fn load_edges_from_df< if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -304,31 +313,32 @@ pub(crate) fn load_edges_from_df< Ok(()) } -pub(crate) fn load_edges_deletions_from_df< +pub(crate) fn load_edge_deletions_from_df< 'a, G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( df_view: DFView>>, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, - layer_in_df: bool, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; - + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; let mut pb = build_progress_bar("Loading edge deletions".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst), Some(time)) = ( df.iter_col::(src_index), @@ -404,75 +414,120 @@ pub(crate) fn load_node_props_from_df< >( df_view: DFView>>, node_id: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, graph: &G, ) -> Result<(), GraphError> { - let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties + let constant_properties = constant_properties.unwrap_or(&[]); + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; let node_id_index = df_view.get_index(node_id)?; + let node_type_index = if let Some(node_type_col) = node_type_col { + Some(df_view.get_index(node_type_col.as_ref())) + } else { + None + }; + let node_type_index = node_type_index.transpose()?; let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; - for chunk in df_view.chunks { let df = chunk?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; + + let node_type: Result>>, GraphError> = + match (node_type, node_type_index) { + (None, None) => Ok(Box::new(iter::repeat(None))), + (Some(node_type), None) => Ok(Box::new(iter::repeat(Some(node_type)))), + (None, Some(node_type_index)) => { + let iter_res: Result>>, GraphError> = + if let Some(node_types) = df.utf8::(node_type_index) { + Ok(Box::new(node_types)) + } else if let Some(node_types) = df.utf8::(node_type_index) { + Ok(Box::new(node_types)) + } else { + Err(GraphError::LoadFailure( + "Unable to convert / find node_type column in dataframe." + .to_string(), + )) + }; + iter_res + } + _ => Err(GraphError::WrongNumOfArgs( + "node_type".to_string(), + "node_type_col".to_string(), + )), + }; + let node_type = node_type?; if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(|i| i.copied()); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) .ok_or(GraphError::NodeIdError(node_id))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.iter_col::(node_id_index) { let iter = node_id.map(i64_opt_into_u64_opt); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) .ok_or(GraphError::NodeIdError(node_id))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.utf8::(node_id_index) { let iter = node_id.into_iter(); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } } else if let Some(node_id) = df.utf8::(node_id_index) { let iter = node_id.into_iter(); - for (node_id, const_props) in iter.zip(const_prop_iter) { + for ((node_id, const_props), node_type) in iter.zip(const_prop_iter).zip(node_type) { if let Some(node_id) = node_id { let v = graph .node(node_id) .ok_or_else(|| GraphError::NodeNameError(node_id.to_owned()))?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } + if let Some(node_type) = node_type { + v.set_node_type(node_type)?; + } } let _ = pb.update(1); } @@ -492,31 +547,33 @@ pub(crate) fn load_edges_props_from_df< df_view: DFView>>, src: &str, dst: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: bool, + layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { - let const_properties = const_properties.unwrap_or(&[]); - let const_properties_indices = const_properties + let constant_properties = constant_properties.unwrap_or(&[]); + let constant_properties_indices = constant_properties .iter() .map(|name| df_view.get_index(name)) .collect::, GraphError>>()?; let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; - let layer_index = layer - .filter(|_| layer_in_df) - .map(|layer| df_view.get_index(layer.as_ref())) - .transpose()?; - + let layer_index = if let Some(layer_col) = layer_col { + Some(df_view.get_index(layer_col.as_ref())) + } else { + None + }; + let layer_index = layer_index.transpose()?; let mut pb = build_progress_bar("Loading edge properties".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; - let const_prop_iter = combine_properties(const_properties, &const_properties_indices, &df)?; + let const_prop_iter = + combine_properties(constant_properties, &constant_properties_indices, &df)?; - let layer = lift_layer(layer, layer_index, &df); + let layer = lift_layer(layer, layer_index, &df)?; if let (Some(src), Some(dst)) = (df.iter_col::(src_index), df.iter_col::(dst_index)) @@ -529,7 +586,7 @@ pub(crate) fn load_edges_props_from_df< .edge(src, dst) .ok_or(GraphError::EdgeIdError { src, dst })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -548,7 +605,7 @@ pub(crate) fn load_edges_props_from_df< .edge(src, dst) .ok_or(GraphError::EdgeIdError { src, dst })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -567,7 +624,7 @@ pub(crate) fn load_edges_props_from_df< dst: dst.to_owned(), })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -587,7 +644,7 @@ pub(crate) fn load_edges_props_from_df< dst: dst.to_owned(), })?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -619,17 +676,17 @@ fn load_edges_from_num_iter< pb: &mut Bar, edges: I, properties: PI, - const_properties: PI, - shared_const_properties: Option<&HashMap>, + constant_properties: PI, + shared_constant_properties: Option<&HashMap>, layer: IL, ) -> Result<(), GraphError> { for (((((src, dst), time), edge_props), const_props), layer) in - edges.zip(properties).zip(const_properties).zip(layer) + edges.zip(properties).zip(constant_properties).zip(layer) { if let (Some(src), Some(dst), Some(time)) = (src, dst, time) { let e = graph.add_edge(time, src, dst, edge_props, layer.as_deref())?; e.add_constant_properties(const_props, layer.as_deref())?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { e.add_constant_properties(shared_const_props.iter(), layer.as_deref())?; } } @@ -649,11 +706,11 @@ fn load_nodes_from_num_iter< pb: &mut Bar, nodes: I, properties: PI, - const_properties: PI, - shared_const_properties: Option<&HashMap>, + constant_properties: PI, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { for (((node, time, node_type), props), const_props) in - nodes.zip(properties).zip(const_properties) + nodes.zip(properties).zip(constant_properties) { if let (Some(v), Some(t), n_t, props, const_props) = (node, time, node_type, props, const_props) @@ -662,7 +719,7 @@ fn load_nodes_from_num_iter< let v = graph.add_node(t, v, props, actual_node_type)?; v.add_constant_properties(const_props)?; - if let Some(shared_const_props) = &shared_const_properties { + if let Some(shared_const_props) = &shared_constant_properties { v.add_constant_properties(shared_const_props.iter())?; } let _ = pb.update(1); diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index 0bd4078ce5..64a1a95e9f 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -45,18 +45,18 @@ mod test { num_rows: 3, }; let graph = Graph::new(); - let layer: Option<&str> = None; - let layer_in_df: bool = true; + let layer_name: Option<&str> = None; + let layer_col: Option<&str> = None; load_edges_from_df( df, + "time", "src", "dst", - "time", Some(&*vec!["prop1", "prop2"]), None, None, - layer, - layer_in_df, + layer_name, + layer_col, &graph, ) .expect("failed to load edges from pretend df"); @@ -141,13 +141,13 @@ mod test { load_nodes_from_df( df, - "id", "time", + "id", Some(&*vec!["name"]), None, None, Some("node_type"), - false, + None, &graph, ) .expect("failed to load nodes from pretend df"); diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index acbda631d4..2d91f6e542 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -343,25 +343,26 @@ pub(crate) fn lift_property<'a: 'b, 'b>( } pub(crate) fn lift_layer<'a>( - layer: Option<&str>, + layer_name: Option<&str>, layer_index: Option, df: &'a DFChunk, -) -> Box> + 'a> { - if let Some(layer) = layer { - match layer_index { - Some(index) => { - if let Some(col) = df.utf8::(index) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else if let Some(col) = df.utf8::(index) { - Box::new(col.map(|v| v.map(|v| v.to_string()))) - } else { - Box::new(std::iter::repeat(None)) - } +) -> Result> + 'a>, GraphError> { + match (layer_name, layer_index) { + (None, None) => Ok(Box::new(std::iter::repeat(None))), + (Some(layer_name), None) => Ok(Box::new(std::iter::repeat(Some(layer_name.to_string())))), + (None, Some(layer_index)) => { + if let Some(col) = df.utf8::(layer_index) { + Ok(Box::new(col.map(|v| v.map(|v| v.to_string())))) + } else if let Some(col) = df.utf8::(layer_index) { + Ok(Box::new(col.map(|v| v.map(|v| v.to_string())))) + } else { + Ok(Box::new(std::iter::repeat(None))) } - None => Box::new(std::iter::repeat(Some(layer.to_string()))), } - } else { - Box::new(std::iter::repeat(None)) + _ => Err(GraphError::WrongNumOfArgs( + "layer_name".to_string(), + "layer_col".to_string(), + )), } } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 1a80e00aad..ee5a828eda 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -28,21 +28,19 @@ pub fn load_nodes_from_parquet< >( graph: &G, parquet_path: &Path, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); - if node_type_in_df.unwrap_or(true) { - if let Some(ref node_type) = node_type { - cols_to_check.push(node_type.as_ref()); - } + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); } for path in get_parquet_file_paths(parquet_path)? { @@ -50,13 +48,13 @@ pub fn load_nodes_from_parquet< df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, - id, time, + id, properties, - const_properties, - shared_const_properties, + constant_properties, + shared_constant_properties, node_type, - node_type_in_df.unwrap_or(true), + node_type_col, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -70,23 +68,21 @@ pub fn load_edges_from_parquet< >( graph: &G, parquet_path: impl AsRef, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let parquet_path = parquet_path.as_ref(); let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } for path in get_parquet_file_paths(parquet_path)? { @@ -94,14 +90,14 @@ pub fn load_edges_from_parquet< df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( df_view, + time, src, dst, - time, properties, - const_properties, - shared_const_properties, + constant_properties, + shared_constant_properties, layer, - layer_in_df.unwrap_or(true), + layer_col, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -116,11 +112,16 @@ pub fn load_node_props_from_parquet< graph: &G, parquet_path: &Path, id: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; @@ -129,8 +130,10 @@ pub fn load_node_props_from_parquet< load_node_props_from_df( df_view, id, - const_properties, - shared_const_properties, + node_type, + node_type_col, + constant_properties, + shared_constant_properties, graph, ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -146,18 +149,16 @@ pub fn load_edge_props_from_parquet< parquet_path: &Path, src: &str, dst: &str, - const_properties: Option<&[&str]>, + constant_properties: Option<&[&str]>, shared_const_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; @@ -166,10 +167,10 @@ pub fn load_edge_props_from_parquet< df_view, src, dst, - const_properties, + constant_properties, shared_const_properties, layer, - layer_in_df.unwrap_or(true), + layer_col, graph.core_graph(), ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -178,36 +179,27 @@ pub fn load_edge_props_from_parquet< Ok(()) } -pub fn load_edges_deletions_from_parquet< +pub fn load_edge_deletions_from_parquet< G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps + DeletionOps, >( graph: &G, parquet_path: &Path, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; - if layer_in_df.unwrap_or(true) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } + for path in get_parquet_file_paths(parquet_path)? { let df_view = process_parquet_file_to_df(path.as_path(), &cols_to_check)?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df( - df_view, - src, - dst, - time, - layer, - layer_in_df.unwrap_or(true), - graph, - ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + load_edge_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) + .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } Ok(()) } diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index b67fd91b14..06f662d343 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -129,13 +129,13 @@ impl PyDiskGraph { } #[staticmethod] - #[pyo3(signature = (graph_dir, edge_df, src_col, dst_col, time_col))] + #[pyo3(signature = (graph_dir, edge_df, time_col, src_col, dst_col))] pub fn load_from_pandas( graph_dir: &str, edge_df: &PyAny, + time_col: &str, src_col: &str, dst_col: &str, - time_col: &str, ) -> Result { let graph: Result = Python::with_gil(|py| { let cols_to_check = vec![src_col, dst_col, time_col]; @@ -145,7 +145,7 @@ impl PyDiskGraph { let df_view = process_pandas_py_df(edge_df, py, df_columns)?; df_view.check_cols_exist(&cols_to_check)?; - let graph = Self::from_pandas(graph_dir, df_view, src_col, dst_col, time_col)?; + let graph = Self::from_pandas(graph_dir, df_view, time_col, src_col, dst_col)?; Ok::<_, GraphError>(graph) }); @@ -222,9 +222,9 @@ impl PyDiskGraph { fn from_pandas( graph_dir: &str, df_view: DFView>>, + time: &str, src: &str, dst: &str, - time: &str, ) -> Result { let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; @@ -264,9 +264,9 @@ impl PyDiskGraph { chunk_size, chunk_size, graph_dir, + time_index, src_index, dst_index, - time_index, ) .map_err(|err| GraphError::LoadFailure(format!("Failed to load graph {err:?}"))) } diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index a5643f624f..45e464cb88 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -378,201 +378,46 @@ impl PyGraph { PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph()) } - /// Load a graph from a Pandas DataFrame. - /// - /// Args: - /// edge_df (pandas.DataFrame): The DataFrame containing the edges. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3( - signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) - )] - fn load_from_pandas( - edge_df: &PyAny, - edge_src: &str, - edge_dst: &str, - edge_time: &str, - edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, - node_df: Option<&PyAny>, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, - node_type: Option<&str>, - node_type_in_df: Option, - ) -> Result { - let graph = Graph::new(); - if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { - load_nodes_from_pandas( - &graph.core_graph(), - node_df, - node_id, - node_time, - node_type, - node_type_in_df, - node_properties.as_ref().map(|props| props.as_ref()), - node_const_properties.as_ref().map(|props| props.as_ref()), - node_shared_const_properties.as_ref(), - )?; - } - load_edges_from_pandas( - &graph.core_graph(), - edge_df, - edge_src, - edge_dst, - edge_time, - edge_properties.as_ref().map(|props| props.as_ref()), - edge_const_properties.as_ref().map(|props| props.as_ref()), - edge_shared_const_properties.as_ref(), - edge_layer, - layer_in_df, - )?; - Ok(graph) - } - - /// Load a graph from Parquet file. - /// - /// Args: - /// edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3( - signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true) - )] - fn load_from_parquet( - edge_parquet_path: PathBuf, - edge_src: &str, - edge_dst: &str, - edge_time: &str, - edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, - node_parquet_path: Option, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, - node_type: Option<&str>, - node_type_in_df: Option, - ) -> Result { - let graph = Graph::new(); - - if let (Some(node_parquet_path), Some(node_id), Some(node_time)) = - (node_parquet_path, node_id, node_time) - { - load_nodes_from_parquet( - &graph, - &node_parquet_path, - node_id, - node_time, - node_type, - node_type_in_df, - node_properties.as_ref().map(|props| props.as_ref()), - node_const_properties.as_ref().map(|props| props.as_ref()), - node_shared_const_properties.as_ref(), - )?; - } - load_edges_from_parquet( - &graph, - edge_parquet_path, - edge_src, - edge_dst, - edge_time, - edge_properties.as_ref().map(|props| props.as_ref()), - edge_const_properties.as_ref().map(|props| props.as_ref()), - edge_shared_const_properties.as_ref(), - edge_layer, - layer_in_df, - )?; - - Ok(graph) - } - /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + signature = (df,time, id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_pandas( &self, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_pandas( self.graph.core_graph(), df, - id, time, + id, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -580,39 +425,42 @@ impl PyGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None) + signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None) )] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_parquet( &self.graph, parquet_path.as_path(), - id, time, + id, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -620,43 +468,45 @@ impl PyGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dateframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edges_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( self.graph.core_graph(), df, + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -664,43 +514,45 @@ impl PyGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing edges + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edges_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -709,25 +561,34 @@ impl PyGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, id, node_type=None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_pandas( self.graph.core_graph(), df, id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + node_type, + node_type_col, + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -736,25 +597,34 @@ impl PyGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, id, node_type=None,node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + node_type, + node_type_col, + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -764,35 +634,38 @@ impl PyGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edge_props_from_pandas( &self, df: &PyAny, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( self.graph.core_graph(), df, src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -802,35 +675,38 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true) + signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None) )] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( &self.graph, parquet_path.as_path(), src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } } diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index b0518110fc..654350ce80 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -339,193 +339,44 @@ impl PyPersistentGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } - /// Load a graph from a Pandas DataFrame. - /// - /// Args: - /// edge_df (pandas.DataFrame): The DataFrame containing the edges. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - /// node_df (pandas.DataFrame): The DataFrame containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3(signature = (edge_df, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_df = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] - fn load_from_pandas( - edge_df: &PyAny, - edge_src: &str, - edge_dst: &str, - edge_time: &str, - edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, - node_df: Option<&PyAny>, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, - node_type: Option<&str>, - node_type_in_df: Option, - ) -> Result { - let graph = PyPersistentGraph { - graph: PersistentGraph::new(), - }; - graph.load_edges_from_pandas( - edge_df, - edge_src, - edge_dst, - edge_time, - edge_properties, - edge_const_properties, - edge_shared_const_properties, - edge_layer, - layer_in_df, - )?; - if let (Some(node_df), Some(node_id), Some(node_time)) = (node_df, node_id, node_time) { - graph.load_nodes_from_pandas( - node_df, - node_id, - node_time, - node_type, - node_type_in_df, - node_properties, - node_const_properties, - node_shared_const_properties, - )?; - } - Ok(graph.graph) - } - - /// Load a graph from Parquet file. - /// - /// Args: - /// edge_parquet_path (str): Parquet file or directory of Parquet files containing the edges. - /// edge_src (str): The column name for the source node ids. - /// edge_dst (str): The column name for the destination node ids. - /// edge_time (str): The column name for the timestamps. - /// edge_properties (list): The column names for the temporal properties (optional) Defaults to None. - /// edge_const_properties (list): The column names for the constant properties (optional) Defaults to None. - /// edge_shared_const_properties (dict): A dictionary of constant properties that will be added to every edge (optional) Defaults to None. - /// edge_layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the edge_df or if it should be used directly as the layer for all edges (optional) defaults to True. - /// node_parquet_path (str): Parquet file or directory of Parquet files containing the nodes (optional) Defaults to None. - /// node_id (str): The column name for the node ids (optional) Defaults to None. - /// node_time (str): The column name for the node timestamps (optional) Defaults to None. - /// node_properties (list): The column names for the node temporal properties (optional) Defaults to None. - /// node_const_properties (list): The column names for the node constant properties (optional) Defaults to None. - /// node_shared_const_properties (dict): A dictionary of constant properties that will be added to every node (optional) Defaults to None. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// - /// Returns: - /// Graph: The loaded Graph object. - #[staticmethod] - #[pyo3(signature = (edge_parquet_path, edge_src, edge_dst, edge_time, edge_properties = None, edge_const_properties = None, edge_shared_const_properties = None, - edge_layer = None, layer_in_df = true, node_parquet_path = None, node_id = None, node_time = None, node_properties = None, - node_const_properties = None, node_shared_const_properties = None, node_type = None, node_type_in_df = true))] - fn load_from_parquet( - edge_parquet_path: PathBuf, - edge_src: &str, - edge_dst: &str, - edge_time: &str, - edge_properties: Option>, - edge_const_properties: Option>, - edge_shared_const_properties: Option>, - edge_layer: Option<&str>, - layer_in_df: Option, - node_parquet_path: Option, - node_id: Option<&str>, - node_time: Option<&str>, - node_properties: Option>, - node_const_properties: Option>, - node_shared_const_properties: Option>, - node_type: Option<&str>, - node_type_in_df: Option, - ) -> Result { - let graph = PyPersistentGraph { - graph: PersistentGraph::new(), - }; - if let (Some(node_parquet_file_path), Some(node_id), Some(node_time)) = - (node_parquet_path, node_id, node_time) - { - graph.load_nodes_from_parquet( - node_parquet_file_path, - node_id, - node_time, - node_type, - node_type_in_df, - node_properties, - node_const_properties, - node_shared_const_properties, - )?; - } - graph.load_edges_from_parquet( - edge_parquet_path, - edge_src, - edge_dst, - edge_time, - edge_properties, - edge_const_properties, - edge_shared_const_properties, - edge_layer, - layer_in_df, - )?; - Ok(graph.graph) - } - /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: /// df (pandas.DataFrame): The Pandas DataFrame containing the nodes. - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df,time,id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_pandas( &self, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_pandas( self.graph.core_graph(), df, - id, time, + id, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -533,37 +384,40 @@ impl PyPersistentGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// id (str): The column name for the node IDs. /// time (str): The column name for the timestamps. - /// node_type (str): the column name for the node type - /// node_type_in_df (bool): whether the node type should be used to look up the values in a column of the df or if it should be used directly as the node type - /// properties (List): List of node property column names. Defaults to None. (optional) - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (Dictionary/Hashmap of properties): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// id (str): The column name for the node IDs. + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str]): List of node property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, time, node_type = None, node_type_in_df = true, properties = None, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time,id, node_type = None, node_type_col = None, properties = None, constant_properties = None, shared_constant_properties = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_nodes_from_parquet( &self.graph, parquet_path.as_path(), - id, time, + id, node_type, - node_type_in_df, + node_type_col, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -571,41 +425,43 @@ impl PyPersistentGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edges_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_pandas( self.graph.core_graph(), df, + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -613,41 +469,43 @@ impl PyPersistentGraph { /// /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing edges + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// properties (List): List of edge property column names. Defaults to None. (optional) - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// properties (List[str]): List of edge property column names. Defaults to None. (optional) + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, time, properties = None, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, properties: Option>, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edges_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, properties.as_ref().map(|props| props.as_ref()), - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -655,32 +513,34 @@ impl PyPersistentGraph { /// /// Arguments: /// df (Dataframe): The Pandas DataFrame containing the edges. + /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, time, layer = None, layer_in_df = true))] - fn load_edges_deletions_from_pandas( + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None))] + fn load_edge_deletions_from_pandas( &self, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_pandas( + load_edge_deletions_from_pandas( self.graph.core_graph(), df, + time, src, dst, - time, layer, - layer_in_df, + layer_col, ) } @@ -691,29 +551,31 @@ impl PyPersistentGraph { /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. - /// layer (str): The edge layer name (optional) Defaults to None. - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the dataframe or if it should be used directly as the layer for all edges (optional) defaults to True. - /// + /// layer (str): A constant value to use as the layer for all edges (optional) Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. (cannot be used in combination with layer) /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, time, layer = None, layer_in_df = true))] - fn load_edges_deletions_from_parquet( + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None))] + fn load_edge_deletions_from_parquet( &self, parquet_path: PathBuf, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { - load_edges_deletions_from_parquet( + load_edge_deletions_from_parquet( &self.graph, parquet_path.as_path(), + time, src, dst, - time, layer, - layer_in_df, + layer_col, ) } @@ -722,25 +584,34 @@ impl PyPersistentGraph { /// Arguments: /// df (Dataframe): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, id, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, id, node_type=None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_pandas( &self, df: &PyAny, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_pandas( self.graph.core_graph(), df, id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + node_type, + node_type_col, + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -749,25 +620,34 @@ impl PyPersistentGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// const_properties (List): List of constant node property column names. Defaults to None. (optional) - /// shared_const_properties (>): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) + /// node_type (str): A constant value to use as the node type for all nodes (optional). Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str): The node type col name in dataframe (optional) Defaults to None. (cannot be used in combination with node_type) + /// constant_properties (List[str]): List of constant node property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every node. Defaults to None. (optional) /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, id, const_properties = None, shared_const_properties = None))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, id, node_type = None, node_type_col=None, constant_properties = None, shared_constant_properties = None))] fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, - const_properties: Option>, - shared_const_properties: Option>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option>, + shared_constant_properties: Option>, ) -> Result<(), GraphError> { load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + node_type, + node_type_col, + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), ) } @@ -777,33 +657,36 @@ impl PyPersistentGraph { /// df (Dataframe): The Pandas DataFrame containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (df, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (df, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edge_props_from_pandas( &self, df: &PyAny, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_pandas( self.graph.core_graph(), df, src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } @@ -813,33 +696,36 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. - /// const_properties (List): List of constant edge property column names. Defaults to None. (optional) - /// shared_const_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) - /// layer (str): Layer name. Defaults to None. (optional) - /// layer_in_df (bool): Whether the layer name should be used to look up the values in a column of the data frame or if it should be used directly as the layer for all edges (optional) defaults to True. + /// constant_properties (List[str]): List of constant edge property column names. Defaults to None. (optional) + /// shared_constant_properties (dict): A dictionary of constant properties that will be added to every edge. Defaults to None. (optional) + /// layer (str): The edge layer name (optional) Defaults to None. + /// layer_col (str): The edge layer col name in dataframe (optional) Defaults to None. /// /// Returns: - /// Result<(), GraphError>: Result of the operation. - #[pyo3(signature = (parquet_path, src, dst, const_properties = None, shared_const_properties = None, layer = None, layer_in_df = true))] + /// None: If the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (parquet_path, src, dst, constant_properties = None, shared_constant_properties = None, layer = None, layer_col = None))] fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, src: &str, dst: &str, - const_properties: Option>, - shared_const_properties: Option>, + constant_properties: Option>, + shared_constant_properties: Option>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { load_edge_props_from_parquet( &self.graph, parquet_path.as_path(), src, dst, - const_properties.as_ref().map(|props| props.as_ref()), - shared_const_properties.as_ref(), + constant_properties.as_ref().map(|props| props.as_ref()), + shared_constant_properties.as_ref(), layer, - layer_in_df, + layer_col, ) } } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index e165391c70..38c37af890 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -15,35 +15,33 @@ use std::collections::HashMap; pub fn load_nodes_from_pandas( graph: &GraphStorage, df: &PyAny, - id: &str, time: &str, + id: &str, node_type: Option<&str>, - node_type_in_df: Option, + node_type_col: Option<&str>, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![id, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); - if node_type_in_df.unwrap_or(true) { - if let Some(ref node_type) = node_type { - cols_to_check.push(node_type.as_ref()); - } + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, - id, time, + id, properties, - const_properties, - shared_const_properties, + constant_properties, + shared_constant_properties, node_type, - node_type_in_df.unwrap_or(true), + node_type_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -56,37 +54,35 @@ pub fn load_nodes_from_pandas( pub fn load_edges_from_pandas( graph: &GraphStorage, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, properties: Option<&[&str]>, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend(properties.unwrap_or(&Vec::new())); - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( df_view, + time, src, dst, - time, properties, - const_properties, - shared_const_properties, + constant_properties, + shared_constant_properties, layer, - layer_in_df.unwrap_or(true), + layer_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -100,19 +96,26 @@ pub fn load_node_props_from_pandas( graph: &GraphStorage, df: &PyAny, id: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + node_type: Option<&str>, + node_type_col: Option<&str>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![id]; - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( df_view, id, - const_properties, - shared_const_properties, + node_type, + node_type_col, + constant_properties, + shared_constant_properties, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -127,29 +130,27 @@ pub fn load_edge_props_from_pandas( df: &PyAny, src: &str, dst: &str, - const_properties: Option<&[&str]>, - shared_const_properties: Option<&HashMap>, + constant_properties: Option<&[&str]>, + shared_constant_properties: Option<&HashMap>, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![src, dst]; - if layer_in_df.unwrap_or(false) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } - cols_to_check.extend(const_properties.unwrap_or(&Vec::new())); + cols_to_check.extend(constant_properties.unwrap_or(&Vec::new())); let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( df_view, src, dst, - const_properties, - shared_const_properties, + constant_properties, + shared_constant_properties, layer, - layer_in_df.unwrap_or(true), + layer_col, graph, ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?; @@ -159,32 +160,30 @@ pub fn load_edge_props_from_pandas( Ok(()) } -pub fn load_edges_deletions_from_pandas( +pub fn load_edge_deletions_from_pandas( graph: &GraphStorage, df: &PyAny, + time: &str, src: &str, dst: &str, - time: &str, layer: Option<&str>, - layer_in_df: Option, + layer_col: Option<&str>, ) -> Result<(), GraphError> { Python::with_gil(|py| { let mut cols_to_check = vec![src, dst, time]; - if layer_in_df.unwrap_or(true) { - if let Some(ref layer) = layer { - cols_to_check.push(layer.as_ref()); - } + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); } let df_view = process_pandas_py_df(df, py, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_deletions_from_df( + load_edge_deletions_from_df( df_view, + time, src, dst, - time, layer, - layer_in_df.unwrap_or(true), + layer_col, graph.core_graph(), ) .map_err(|e| GraphLoadException::new_err(format!("{:?}", e)))?;