Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Document template fixes #1894

Merged
merged 17 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/reference/core/properties.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Properties
:private-members:
:inherited-members:

.. autoclass:: raphtory.ConstProperties
.. autoclass:: raphtory.ConstantProperties
:autosummary:
:members:
:undoc-members:
Expand Down
5 changes: 3 additions & 2 deletions python/tests/graphql/misc/test_graphql_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ def setup_server(work_dir):
server = server.set_embeddings(
cache="/tmp/graph-cache",
embedding=embedding,
node_template="{{ name }}",
graph_template="{{ props.name }}",
nodes="{{ name }}",
graphs="{{ properties.name }}",
edges=False,
)
return server

Expand Down
20 changes: 19 additions & 1 deletion python/tests/test_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_graph() -> VectorisedGraph:
g.add_edge(4, "node3", "node4", {"name": "edge3"})

vg = g.vectorise(
embedding, node_template="{{ name }}", edge_template="{{ props.name }}"
embedding, nodes="{{ name }}", edges="{{ properties.name }}", graph=False
)

return vg
Expand Down Expand Up @@ -217,6 +217,24 @@ def test_filtering_by_entity_type():
contents = [doc.content for doc in selection.get_documents()]
assert contents == ["edge1", "edge2", "edge3"]

def constant_embedding(texts):
return [[1, 0, 0] for text in texts]

def test_default_template():
g = Graph()
g.add_node(1, "node1")
g.add_edge(2, "node1", "node1")

vg = g.vectorise(constant_embedding)

node_docs = vg.nodes_by_similarity(query="whatever", limit=10).get_documents()
assert len(node_docs) == 1
assert node_docs[0].content == "Node node1 has the following properties:\n"

edge_docs = vg.edges_by_similarity(query="whatever", limit=10).get_documents()
assert len(edge_docs) == 1
assert edge_docs[0].content == "There is an edge from node1 to node1 with events at:\n- Jan 1 1970 00:00\n"


### MULTI-DOCUMENT VERSION TO BE RE-ENABLED

Expand Down
8 changes: 6 additions & 2 deletions raphtory-graphql/src/model/graph/property.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ fn prop_to_gql(prop: &Prop) -> GqlValue {
Prop::I64(u) => GqlValue::Number(Number::from(*u)),
Prop::U32(u) => GqlValue::Number(Number::from(*u)),
Prop::U64(u) => GqlValue::Number(Number::from(*u)),
Prop::F32(u) => GqlValue::Number(Number::from_f64(*u as f64).unwrap()),
Prop::F64(u) => GqlValue::Number(Number::from_f64(*u).unwrap()),
Prop::F32(u) => Number::from_f64(*u as f64)
.map(|number| GqlValue::Number(number))
.unwrap_or(GqlValue::Null),
Prop::F64(u) => Number::from_f64(*u as f64)
.map(|number| GqlValue::Number(number))
.unwrap_or(GqlValue::Null),
Prop::Bool(b) => GqlValue::Boolean(*b),
Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(pp)).collect()),
Prop::Map(m) => GqlValue::Object(
Expand Down
2 changes: 1 addition & 1 deletion raphtory-graphql/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use raphtory::{
serialise::GraphFolder,
};

#[derive(Clone)]
#[derive(Clone, Debug)]
pub struct ExistingGraphFolder {
folder: ValidGraphFolder,
}
Expand Down
87 changes: 40 additions & 47 deletions raphtory-graphql/src/python/server/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ use pyo3::{
IntoPyObjectExt,
};
use raphtory::{
python::types::wrappers::document::PyDocument,
vectors::{embeddings::openai_embedding, template::DocumentTemplate, EmbeddingFunction},
python::{packages::vectors::TemplateConfig, types::wrappers::document::PyDocument},
vectors::{
embeddings::openai_embedding,
template::{
DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_GRAPH_TEMPLATE, DEFAULT_NODE_TEMPLATE,
},
EmbeddingFunction,
},
};
use std::{collections::HashMap, path::PathBuf, sync::Arc, thread};

Expand All @@ -43,17 +49,17 @@ impl<'py> IntoPyObject<'py> for GraphServer {
}

fn template_from_python(
graph_template: Option<String>,
node_template: Option<String>,
edge_template: Option<String>,
graphs: TemplateConfig,
nodes: TemplateConfig,
edges: TemplateConfig,
) -> Option<DocumentTemplate> {
if graph_template.is_none() && node_template.is_none() && edge_template.is_none() {
if graphs.is_disabled() && nodes.is_disabled() && edges.is_disabled() {
None
} else {
Some(DocumentTemplate {
graph_template,
node_template,
edge_template,
graph_template: graphs.get_template_or(DEFAULT_GRAPH_TEMPLATE),
node_template: nodes.get_template_or(DEFAULT_NODE_TEMPLATE),
edge_template: edges.get_template_or(DEFAULT_EDGE_TEMPLATE),
})
}
}
Expand All @@ -67,11 +73,11 @@ impl PyGraphServer {
slf: PyRefMut<Self>,
cache: String,
embedding: F,
graph_template: Option<String>,
node_template: Option<String>,
edge_template: Option<String>,
graphs: TemplateConfig,
nodes: TemplateConfig,
edges: TemplateConfig,
) -> PyResult<GraphServer> {
let global_template = template_from_python(graph_template, node_template, edge_template);
let global_template = template_from_python(graphs, nodes, edges);
let server = take_server_ownership(slf)?;
let cache = PathBuf::from(cache);
Ok(server.set_embeddings(embedding, &cache, global_template))
Expand Down Expand Up @@ -208,72 +214,59 @@ impl PyGraphServer {
/// Arguments:
/// cache (str): the directory to use as cache for the embeddings.
/// embedding (Callable, optional): the embedding function to translate documents to embeddings.
/// graph_template (str, optional): the template to use for graphs.
/// node_template (str, optional): the template to use for nodes.
/// edge_template (str, optional): the template to use for edges.
/// graphs (bool | str): if graphs have to be embedded or not or the custom template to use if a str is provided (defaults to True)
/// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided (defaults to True)
/// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided (defaults to True)
///
/// Returns:
/// GraphServer: A new server object with embeddings setup.
#[pyo3(
signature = (cache, embedding = None, graph_template = None, node_template = None, edge_template = None)
signature = (cache, embedding = None, graphs = TemplateConfig::Bool(true), nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true))
)]
fn set_embeddings(
slf: PyRefMut<Self>,
cache: String,
embedding: Option<Py<PyFunction>>,
graph_template: Option<String>,
node_template: Option<String>,
edge_template: Option<String>,
graphs: TemplateConfig,
nodes: TemplateConfig,
edges: TemplateConfig,
) -> PyResult<GraphServer> {
match embedding {
Some(embedding) => {
let embedding: Arc<dyn EmbeddingFunction> = Arc::new(embedding);
Self::set_generic_embeddings(
slf,
cache,
embedding,
graph_template,
node_template,
edge_template,
)
Self::set_generic_embeddings(slf, cache, embedding, graphs, nodes, edges)
}
None => {
Self::set_generic_embeddings(slf, cache, openai_embedding, graphs, nodes, edges)
}
None => Self::set_generic_embeddings(
slf,
cache,
openai_embedding,
graph_template,
node_template,
edge_template,
),
}
}

/// Vectorise a subset of the graphs of the server.
///
/// Arguments:
/// graph_names (list[str]): the names of the graphs to vectorise. All by default.
/// graph_template (str, optional): the template to use for graphs.
/// node_template (str, optional): the template to use for nodes.
/// edge_template (str, optional): the template to use for edges.
/// graphs (bool | str): if graphs have to be embedded or not or the custom template to use if a str is provided (defaults to True)
/// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided (defaults to True)
/// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided (defaults to True)
///
/// Returns:
/// GraphServer: A new server object containing the vectorised graphs.
#[pyo3(
signature = (graph_names, graph_template = None, node_template = None, edge_template = None)
signature = (graph_names, graphs = TemplateConfig::Bool(true), nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true))
)]
fn with_vectorised_graphs(
slf: PyRefMut<Self>,
graph_names: Vec<String>,
// TODO: support more models by just providing a string, e.g. "openai", here and in the VectorisedGraph API
graph_template: Option<String>,
node_template: Option<String>,
edge_template: Option<String>,
graphs: TemplateConfig,
nodes: TemplateConfig,
edges: TemplateConfig,
) -> PyResult<GraphServer> {
let template = template_from_python(graph_template, node_template, edge_template).ok_or(
PyAttributeError::new_err(
let template =
template_from_python(graphs, nodes, edges).ok_or(PyAttributeError::new_err(
"some of graph_template, node_template, edge_template has to be set",
),
)?;
))?;
let server = take_server_ownership(slf)?;
Ok(server.with_vectorised_graphs(graph_names, template))
}
Expand Down
10 changes: 5 additions & 5 deletions raphtory/src/db/api/properties/constant_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ use crate::{
use raphtory_api::core::storage::arc_str::ArcStr;
use std::collections::HashMap;

pub struct ConstProperties<'a, P: ConstPropertiesOps> {
pub struct ConstantProperties<'a, P: ConstPropertiesOps> {
pub(crate) props: P,
_marker: std::marker::PhantomData<&'a P>,
}

impl<'a, P: ConstPropertiesOps + Sync> ConstProperties<'a, P> {
impl<'a, P: ConstPropertiesOps + Sync> ConstantProperties<'a, P> {
pub(crate) fn new(props: P) -> Self {
Self {
props,
Expand Down Expand Up @@ -47,7 +47,7 @@ impl<'a, P: ConstPropertiesOps + Sync> ConstProperties<'a, P> {
}
}

impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstProperties<'a, P> {
impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstantProperties<'a, P> {
type Item = (ArcStr, Prop);
type IntoIter = BoxedLIter<'a, Self::Item>;

Expand All @@ -60,7 +60,7 @@ impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstProperties<'a,
}
}

impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstProperties<'a, P> {
impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstantProperties<'a, P> {
type Item = (ArcStr, Prop);
type IntoIter = Box<dyn Iterator<Item = (ArcStr, Prop)> + 'a>;

Expand All @@ -71,7 +71,7 @@ impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstProperties<'a,
}
}

impl<'a, P: ConstPropertiesOps + Sync> PartialEq for ConstProperties<'a, P> {
impl<'a, P: ConstPropertiesOps + Sync> PartialEq for ConstantProperties<'a, P> {
fn eq(&self, other: &Self) -> bool {
self.as_map() == other.as_map()
}
Expand Down
10 changes: 5 additions & 5 deletions raphtory/src/db/api/properties/dyn_props.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::db::api::{
properties::{
internal::{InheritPropertiesOps, PropertiesOps},
ConstProperties, Properties, TemporalProperties, TemporalPropertyView,
ConstantProperties, Properties, TemporalProperties, TemporalPropertyView,
},
view::{internal::Static, DynamicGraph},
};
Expand All @@ -26,13 +26,13 @@ impl From<Properties<DynamicGraph>> for DynProperties {
}
}

pub type DynConstProperties = ConstProperties<'static, DynProps>;
pub type DynConstProperties = ConstantProperties<'static, DynProps>;

impl<P: PropertiesOps + Send + Sync + Static + 'static> From<ConstProperties<'static, P>>
impl<P: PropertiesOps + Send + Sync + Static + 'static> From<ConstantProperties<'static, P>>
for DynConstProperties
{
fn from(value: ConstProperties<P>) -> Self {
ConstProperties::new(Arc::new(value.props))
fn from(value: ConstantProperties<P>) -> Self {
ConstantProperties::new(Arc::new(value.props))
}
}

Expand Down
6 changes: 3 additions & 3 deletions raphtory/src/db/api/properties/props.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
core::Prop,
db::api::properties::{
constant_props::ConstProperties, internal::*, temporal_props::TemporalProperties,
constant_props::ConstantProperties, internal::*, temporal_props::TemporalProperties,
},
};
use raphtory_api::core::storage::arc_str::ArcStr;
Expand Down Expand Up @@ -69,8 +69,8 @@ impl<P: PropertiesOps + Clone> Properties<P> {
}

/// Get a view of the constant properties (meta-data) only.
pub fn constant<'a>(&self) -> ConstProperties<'a, P> {
ConstProperties::new(self.props.clone())
pub fn constant<'a>(&self) -> ConstantProperties<'a, P> {
ConstantProperties::new(self.props.clone())
}

/// Collect properties into vector
Expand Down
6 changes: 3 additions & 3 deletions raphtory/src/db/graph/edge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,15 @@ impl<G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps>
///
/// # Arguments
///
/// * `props` - Property key-value pairs to add
/// * `properties` - Property key-value pairs to add
/// * `layer` - The layer to which properties should be added. If the edge view is restricted to a
/// single layer, 'None' will add the properties to that layer and 'Some("name")'
/// fails unless the layer matches the edge view. If the edge view is not restricted
/// to a single layer, 'None' sets the properties on the default layer and 'Some("name")'
/// sets the properties on layer '"name"' and fails if that layer doesn't exist.
pub fn add_constant_properties<C: CollectProperties>(
&self,
props: C,
properties: C,
layer: Option<&str>,
) -> Result<(), GraphError> {
let input_layer_id = self.resolve_layer(layer, false)?;
Expand All @@ -300,7 +300,7 @@ impl<G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps>
dst: self.dst().name(),
});
}
let properties: Vec<(usize, Prop)> = props.collect_properties(|name, dtype| {
let properties: Vec<(usize, Prop)> = properties.collect_properties(|name, dtype| {
Ok(self.graph.resolve_edge_property(name, dtype, true)?.inner())
})?;

Expand Down
4 changes: 2 additions & 2 deletions raphtory/src/db/graph/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> BaseNodeViewOps<
impl<G: StaticGraphViewOps + InternalPropertyAdditionOps + InternalAdditionOps> NodeView<G, G> {
pub fn add_constant_properties<C: CollectProperties>(
&self,
props: C,
properties: C,
) -> Result<(), GraphError> {
let properties: Vec<(usize, Prop)> = props.collect_properties(|name, dtype| {
let properties: Vec<(usize, Prop)> = properties.collect_properties(|name, dtype| {
Ok(self.graph.resolve_node_property(name, dtype, true)?.inner())
})?;
self.graph
Expand Down
3 changes: 2 additions & 1 deletion raphtory/src/db/graph/views/node_subgraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ impl<'graph, G: GraphViewOps<'graph>> NodeSubgraph<G> {
nodes.collect()
};
nodes.sort();
nodes.dedup();
ricopinazo marked this conversation as resolved.
Show resolved Hide resolved
let nodes = Index::new(nodes, graph.unfiltered_num_nodes());
Self { graph, nodes }
}
Expand Down Expand Up @@ -132,7 +133,7 @@ mod subgraph_tests {
graph.add_node(2, 2, NO_PROPS, None).unwrap();

test_storage!(&graph, |graph| {
let sg = graph.subgraph([1, 2]);
let sg = graph.subgraph([1, 2, 1]); // <- duplicated nodes should have no effect

let actual = sg.materialize().unwrap().into_events().unwrap();
assert_graph_equal(&actual, &sg);
Expand Down
Loading
Loading