-
Notifications
You must be signed in to change notification settings - Fork 6
/
create_knowledge_graph.py
69 lines (59 loc) · 2.05 KB
/
create_knowledge_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from biocypher import BioCypher, Resource
from template_package.adapters.example_adapter import (
ExampleAdapter,
ExampleAdapterNodeType,
ExampleAdapterEdgeType,
ExampleAdapterProteinField,
ExampleAdapterDiseaseField,
)
# Instantiate the BioCypher interface
# You can use `config/biocypher_config.yaml` to configure the framework or
# supply settings via parameters below
bc = BioCypher()
# Download and cache resources (change the directory in the options if needed)
urls = "https://file-examples.com/wp-content/storage/2017/02/file_example_CSV_5000.csv"
resource = Resource(
name="Example resource", # Name of the resource
url_s=urls, # URL to the resource(s)
lifetime=7, # seven days cache lifetime
)
paths = bc.download(resource) # Downloads to '.cache' by default
print(paths)
# You can use the list of paths returned to read the resource into your adapter
# Choose node types to include in the knowledge graph.
# These are defined in the adapter (`adapter.py`).
node_types = [
ExampleAdapterNodeType.PROTEIN,
ExampleAdapterNodeType.DISEASE,
]
# Choose protein adapter fields to include in the knowledge graph.
# These are defined in the adapter (`adapter.py`).
node_fields = [
# Proteins
ExampleAdapterProteinField.ID,
ExampleAdapterProteinField.SEQUENCE,
ExampleAdapterProteinField.DESCRIPTION,
ExampleAdapterProteinField.TAXON,
# Diseases
ExampleAdapterDiseaseField.ID,
ExampleAdapterDiseaseField.NAME,
ExampleAdapterDiseaseField.DESCRIPTION,
]
edge_types = [
ExampleAdapterEdgeType.PROTEIN_PROTEIN_INTERACTION,
ExampleAdapterEdgeType.PROTEIN_DISEASE_ASSOCIATION,
]
# Create a protein adapter instance
adapter = ExampleAdapter(
node_types=node_types,
node_fields=node_fields,
edge_types=edge_types,
# we can leave edge fields empty, defaulting to all fields in the adapter
)
# Create a knowledge graph from the adapter
bc.write_nodes(adapter.get_nodes())
bc.write_edges(adapter.get_edges())
# Write admin import statement
bc.write_import_call()
# Print summary
bc.summary()