Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change the windowing semantics to not enforce nesting by default #1445

Merged
merged 14 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 118 additions & 85 deletions python/tests/test_graphdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from raphtory import graph_loader
import tempfile
from math import isclose
import datetime
from datetime import datetime, timezone
import string

edges = [(1, 1, 2), (2, 1, 3), (-1, 2, 1), (0, 1, 1), (7, 3, 2), (1, 1, 1)]

utc = timezone.utc

def create_graph():
g = Graph()
Expand Down Expand Up @@ -248,8 +248,6 @@ def test_getitem():


def test_entity_history_date_time():
import datetime

g = Graph()
g.add_node(0, 1)
g.add_node(1, 1)
Expand All @@ -261,22 +259,22 @@ def test_entity_history_date_time():
e = g.add_edge(3, 1, 2)

full_history_1 = [
datetime.datetime(1970, 1, 1, 0, 0),
datetime.datetime(1970, 1, 1, 0, 0, 0, 1000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 2000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 3000),
datetime(1970, 1, 1, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 1000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 2000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 3000, tzinfo=utc),
]

full_history_2 = [
datetime.datetime(1970, 1, 1, 0, 0, 0, 4000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 5000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 6000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 7000),
datetime(1970, 1, 1, 0, 0, 0, 4000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 5000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 6000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc),
]

windowed_history = [
datetime.datetime(1970, 1, 1, 0, 0),
datetime.datetime(1970, 1, 1, 0, 0, 0, 1000),
datetime(1970, 1, 1, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 1000, tzinfo=utc),
]

assert v.history_date_time() == full_history_1
Expand All @@ -297,32 +295,32 @@ def test_entity_history_date_time():
]

assert g.nodes.earliest_date_time == [
datetime.datetime(1970, 1, 1, 0, 0),
datetime.datetime(1970, 1, 1, 0, 0),
datetime.datetime(1970, 1, 1, 0, 0, 0, 4000),
datetime(1970, 1, 1, tzinfo=utc),
datetime(1970, 1, 1, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 4000, tzinfo=utc),
]
assert g.nodes.latest_date_time == [
datetime.datetime(1970, 1, 1, 0, 0, 0, 7000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 3000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 7000),
datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 3000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc),
]

assert g.nodes.neighbours.latest_date_time.collect() == [
[
datetime.datetime(1970, 1, 1, 0, 0, 0, 3000),
datetime.datetime(1970, 1, 1, 0, 0, 0, 7000),
datetime(1970, 1, 1, 0, 0, 0, 3000, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc),
],
[datetime.datetime(1970, 1, 1, 0, 0, 0, 7000)],
[datetime.datetime(1970, 1, 1, 0, 0, 0, 7000)],
[datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc)],
[datetime(1970, 1, 1, 0, 0, 0, 7000, tzinfo=utc)],
]

assert g.nodes.neighbours.earliest_date_time.collect() == [
[
datetime.datetime(1970, 1, 1, 0, 0),
datetime.datetime(1970, 1, 1, 0, 0, 0, 4000),
datetime(1970, 1, 1, tzinfo=utc),
datetime(1970, 1, 1, 0, 0, 0, 4000, tzinfo=utc),
],
[datetime.datetime(1970, 1, 1, 0, 0)],
[datetime.datetime(1970, 1, 1, 0, 0)],
[datetime(1970, 1, 1, tzinfo=utc)],
[datetime(1970, 1, 1, tzinfo=utc)],
]


Expand Down Expand Up @@ -1026,14 +1024,14 @@ def test_all_degrees_window():
view = g.before(5)
v = view.node(2)
assert v.window(0, 4).in_degree() == 3
assert v.window(start=2).in_degree() == 2
assert v.window(end=3).in_degree() == 2
assert v.after(1).in_degree() == 2
assert v.before(3).in_degree() == 2
assert v.window(0, 4).out_degree() == 1
assert v.window(start=2).out_degree() == 1
assert v.window(end=3).out_degree() == 1
assert v.after(1).out_degree() == 1
assert v.before(end=3).out_degree() == 1
assert v.window(0, 4).degree() == 3
assert v.window(start=2).degree() == 2
assert v.window(end=3).degree() == 2
assert v.after(1).degree() == 2
assert v.before(end=3).degree() == 2


def test_all_edge_window():
Expand All @@ -1049,24 +1047,24 @@ def test_all_edge_window():
view = g.before(5)
v = view.node(2)
assert sorted(v.window(0, 4).in_edges.src.id) == [1, 3, 4]
assert sorted(v.window(end=4).in_edges.src.id) == [1, 3, 4]
assert sorted(v.window(start=2).in_edges.src.id) == [3, 4]
assert sorted(v.before(end=4).in_edges.src.id) == [1, 3, 4]
assert sorted(v.after(start=1).in_edges.src.id) == [3, 4]
assert sorted(v.window(0, 4).out_edges.dst.id) == [3]
assert sorted(v.window(end=3).out_edges.dst.id) == [3]
assert sorted(v.window(start=2).out_edges.dst.id) == [4]
assert sorted(v.before(end=3).out_edges.dst.id) == [3]
assert sorted(v.after(start=1).out_edges.dst.id) == [4]
assert sorted((e.src.id, e.dst.id) for e in v.window(0, 4).edges) == [
(1, 2),
(2, 3),
(3, 2),
(4, 2),
]
assert sorted((e.src.id, e.dst.id) for e in v.window(end=4).edges) == [
assert sorted((e.src.id, e.dst.id) for e in v.before(end=4).edges) == [
(1, 2),
(2, 3),
(3, 2),
(4, 2),
]
assert sorted((e.src.id, e.dst.id) for e in v.window(start=1).edges) == [
assert sorted((e.src.id, e.dst.id) for e in v.after(start=0).edges) == [
(1, 2),
(2, 3),
(2, 4),
Expand Down Expand Up @@ -1410,7 +1408,9 @@ def test_window_size():
g.add_node(1, 1)
g.add_node(4, 4)

assert g.window_size == 4
assert g.window_size is None
assert g.window(1, 5).window_size == 4



def test_time_index():
Expand All @@ -1420,8 +1420,8 @@ def test_time_index():
rolling = w.rolling("1 day")
time_index = rolling.time_index()
assert list(time_index) == [
datetime.datetime(2020, 1, 1, 23, 59, 59, 999000),
datetime.datetime(2020, 1, 2, 23, 59, 59, 999000),
datetime(2020, 1, 1, 23, 59, 59, 999000, tzinfo=utc),
datetime(2020, 1, 2, 23, 59, 59, 999000, tzinfo=utc),
]

w = g.window(1, 3)
Expand All @@ -1437,11 +1437,11 @@ def test_time_index():

def test_datetime_props():
g = Graph()
dt1 = datetime.datetime(2020, 1, 1, 23, 59, 59, 999000)
dt1 = datetime(2020, 1, 1, 23, 59, 59, 999000)
g.add_node(0, 0, {"time": dt1})
assert g.node(0).properties.get("time") == dt1

dt2 = datetime.datetime(2020, 1, 1, 23, 59, 59, 999999)
dt2 = datetime(2020, 1, 1, 23, 59, 59, 999999)
g.add_node(0, 1, {"time": dt2})
assert g.node(1).properties.get("time") == dt2

Expand All @@ -1454,19 +1454,19 @@ def test_date_time():
g.add_edge("2014-02-04", 1, 4)
g.add_edge("2014-02-05", 1, 2)

assert g.earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert g.latest_date_time == datetime.datetime(2014, 2, 5, 0, 0)
assert g.earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert g.latest_date_time == datetime(2014, 2, 5, 0, 0, tzinfo=utc)

e = g.edge(1, 3)
exploded_edges = []
for edge in e.explode():
exploded_edges.append(edge.date_time)
assert exploded_edges == [datetime.datetime(2014, 2, 3)]
assert g.edge(1, 2).earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert g.edge(1, 2).latest_date_time == datetime.datetime(2014, 2, 5, 0, 0)
assert exploded_edges == [datetime(2014, 2, 3, tzinfo=utc)]
assert g.edge(1, 2).earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert g.edge(1, 2).latest_date_time == datetime(2014, 2, 5, 0, 0, tzinfo=utc)

assert g.node(1).earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert g.node(1).latest_date_time == datetime.datetime(2014, 2, 5, 0, 0)
assert g.node(1).earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert g.node(1).latest_date_time == datetime(2014, 2, 5, 0, 0, tzinfo=utc)


def test_date_time_window():
Expand All @@ -1481,51 +1481,50 @@ def test_date_time_window():
view = g.window("2014-02-02", "2014-02-04")
view2 = g.window("2014-02-02", "2014-02-05")

assert view.start_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view.end_date_time == datetime.datetime(2014, 2, 4, 0, 0)
assert view.start_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view.end_date_time == datetime(2014, 2, 4, 0, 0, tzinfo=utc)

assert view.earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view.latest_date_time == datetime.datetime(2014, 2, 3, 0, 0)
assert view.earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view.latest_date_time == datetime(2014, 2, 3, 0, 0, tzinfo=utc)

assert view2.edge(1, 2).start_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view2.edge(1, 2).end_date_time == datetime.datetime(2014, 2, 5, 0, 0)
assert view2.edge(1, 2).start_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view2.edge(1, 2).end_date_time == datetime(2014, 2, 5, 0, 0, tzinfo=utc)

assert view.node(1).earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view.node(1).latest_date_time == datetime.datetime(2014, 2, 3, 0, 0)
assert view.node(1).earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view.node(1).latest_date_time == datetime(2014, 2, 3, 0, 0, tzinfo=utc)

e = view.edge(1, 2)
exploded_edges = []
for edge in e.explode():
exploded_edges.append(edge.date_time)
assert exploded_edges == [datetime.datetime(2014, 2, 2)]
assert exploded_edges == [datetime(2014, 2, 2, tzinfo=utc)]


def test_datetime_add_node():
g = Graph()
g.add_node(datetime.datetime(2014, 2, 2), 1)
g.add_node(datetime.datetime(2014, 2, 3), 2)
g.add_node(datetime.datetime(2014, 2, 4), 2)
g.add_node(datetime.datetime(2014, 2, 5), 4)
g.add_node(datetime.datetime(2014, 2, 6), 5)
g.add_node(datetime(2014, 2, 2), 1)
g.add_node(datetime(2014, 2, 3), 2)
g.add_node(datetime(2014, 2, 4), 2)
g.add_node(datetime(2014, 2, 5), 4)
g.add_node(datetime(2014, 2, 6), 5)

view = g.window("2014-02-02", "2014-02-04")
view2 = g.window("2014-02-02", "2014-02-05")

assert view.start_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view.end_date_time == datetime.datetime(2014, 2, 4, 0, 0)
assert view.start_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view.end_date_time == datetime(2014, 2, 4, 0, 0, tzinfo=utc)

assert view2.earliest_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view2.latest_date_time == datetime.datetime(2014, 2, 4, 0, 0)
assert view2.earliest_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view2.latest_date_time == datetime(2014, 2, 4, 0, 0, tzinfo=utc)

assert view2.node(1).start_date_time == datetime.datetime(2014, 2, 2, 0, 0)
assert view2.node(1).end_date_time == datetime.datetime(2014, 2, 5, 0, 0)
assert view2.node(1).start_date_time == datetime(2014, 2, 2, 0, 0, tzinfo=utc)
assert view2.node(1).end_date_time == datetime(2014, 2, 5, 0, 0, tzinfo=utc)

assert view.node(2).earliest_date_time == datetime.datetime(2014, 2, 3, 0, 0)
assert view.node(2).latest_date_time == datetime.datetime(2014, 2, 3, 0, 0)
assert view.node(2).earliest_date_time == datetime(2014, 2, 3, 0, 0, tzinfo=utc)
assert view.node(2).latest_date_time == datetime(2014, 2, 3, 0, 0, tzinfo=utc)


def test_datetime_with_timezone():
from datetime import datetime
from raphtory import Graph
import pytz

Expand All @@ -1540,12 +1539,12 @@ def test_datetime_with_timezone():
"Africa/Johannesburg",
]
results = [
datetime(2024, 1, 5, 1, 0),
datetime(2024, 1, 5, 6, 30),
datetime(2024, 1, 5, 10, 0),
datetime(2024, 1, 5, 12, 0),
datetime(2024, 1, 5, 17, 0),
datetime(2024, 1, 5, 18, 0),
datetime(2024, 1, 5, 1, 0, tzinfo=utc),
datetime(2024, 1, 5, 6, 30, tzinfo=utc),
datetime(2024, 1, 5, 10, 0, tzinfo=utc),
datetime(2024, 1, 5, 12, 0, tzinfo=utc),
datetime(2024, 1, 5, 17, 0, tzinfo=utc),
datetime(2024, 1, 5, 18, 0, tzinfo=utc),
]

for tz in timezones:
Expand Down Expand Up @@ -1649,14 +1648,48 @@ def test_materialize_graph():

def test_deletions():
g = create_graph_with_deletions()
deleted_edge = g.edge(edges[0][1], edges[0][2])
for e in edges:
assert g.at(e[0]).has_edge(e[1], e[2])

assert not g.window(start=11).has_edge(edges[0][1], edges[0][2])
assert g.after(e[0]).has_edge(e[1], e[2])

for e in edges[:-1]:
# last update is an existing edge
assert not g.before(e[0]).has_edge(e[1], e[2])

# deleted at window start
assert deleted_edge.window(10, 20).is_deleted()
assert not deleted_edge.window(10, 20).is_valid()
assert deleted_edge.window(10, 20).earliest_time is None
assert deleted_edge.window(10, 20).latest_time is None

# deleted before window start
assert deleted_edge.window(15, 20).is_deleted()
assert not deleted_edge.window(15, 20).is_valid()
assert deleted_edge.window(15, 20).earliest_time is None
assert deleted_edge.window(15, 20).latest_time is None

# deleted in window
assert deleted_edge.window(5, 20).is_deleted()
assert not deleted_edge.window(5, 20).is_valid()
assert deleted_edge.window(5, 20).earliest_time == 5
assert deleted_edge.window(5, 20).latest_time == 10

# check deleted edge is gone at 10
assert not g.after(start=10).has_edge(edges[0][1], edges[0][2])
assert not g.at(10).has_edge(edges[0][1], edges[0][2])
assert g.before(10).has_edge(edges[0][1], edges[0][2])

# check not deleted edges are still there
for e in edges[1:]:
assert g.window(start=11).has_edge(e[1], e[2])
assert g.after(start=10).has_edge(e[1], e[2])

assert list(deleted_edge.explode().latest_time) == [10]
assert list(deleted_edge.explode().earliest_time) == [edges[0][0]]

assert list(g.edge(edges[0][1], edges[0][2]).explode().latest_time) == [10]
# check rolling and expanding behaviour
assert not list(g.before(1).node(1).after(1).rolling(1))
assert not list(g.after(0).edge(1, 1).before(1).expanding(1))


def test_edge_layer():
Expand Down
4 changes: 2 additions & 2 deletions raphtory-benchmark/benches/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ pub fn base(c: &mut Criterion) {
);
graph_window_group_100.finish();
let mut graph_window_group_10 = c.benchmark_group("lotr_graph_window_10");
let latest = graph.end().expect("non-empty graph");
let earliest = graph.start().expect("non-empty graph");
let latest = graph.latest_time().expect("non-empty graph");
let earliest = graph.earliest_time().expect("non-empty graph");
let start = latest - (latest - earliest) / 10;
graph_window_group_10.sample_size(10);
run_analysis_benchmarks(
Expand Down
4 changes: 2 additions & 2 deletions raphtory-benchmark/benches/graph_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ pub fn graph(c: &mut Criterion) {
);
graph_window_group_100.finish();
let mut graph_window_group_10 = c.benchmark_group("analysis_graph_window_10");
let latest = graph.end().expect("non-empty graph");
let earliest = graph.start().expect("non-empty graph");
let latest = graph.latest_time().expect("non-empty graph");
let earliest = graph.earliest_time().expect("non-empty graph");
let start = latest - (latest - earliest) / 10;
graph_window_group_10.sample_size(10);
run_analysis_benchmarks(
Expand Down
Loading
Loading