-
Notifications
You must be signed in to change notification settings - Fork 5
/
Q3-RDFframes.py
45 lines (28 loc) · 1.65 KB
/
Q3-RDFframes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
''' Get a list of American actors available in both DBpedia and YAGO graphs. '''
from rdfframes.knowledge_graph import KnowledgeGraph
from rdfframes.utils.constants import JoinType
from rdfframes.client.http_client import HttpClientDataFormat, HttpClient
from time import time
graph1 = KnowledgeGraph(graph_name='dbpedia')
graph2 = KnowledgeGraph(graph_name='yago',
graph_uri='http://yago-knowledge.org/',
prefixes={
'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'yago': 'http://yago-knowledge.org/resource/',
'yagoinfo': 'http://yago-knowledge.org/resource/infobox/en/'
})
def join(join_type):
dbpedia_actors = graph1.feature_domain_range('dbpp:starring', 'film1', 'actor1') \
.expand('actor1', [('dbpp:birthPlace', 'actor_country1'), ('dbpp:name', 'name')]) \
.filter({'actor_country1': ['regex(str(?actor_country1), "USA")']})
yago_actors = graph2.feature_domain_range('yago:actedIn', 'actor2', 'film2') \
.expand('actor2', [('yago:isCitizenOf', 'actor_country2'), ('yagoinfo:name', 'name')]) \
.filter({'actor_country2': ['= yago:United_States']})
actors = dbpedia_actors.join(yago_actors, 'name', join_type=join_type)
output_format = HttpClientDataFormat.PANDAS_DF
client = HttpClient(endpoint_url=endpoint, return_format=output_format)
df = actors.execute(client, return_format=output_format)
print(df.shape)
print(actors.to_sparql())
join(JoinType.InnerJoin)