diff --git a/node2vec/__init__.py b/node2vec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/node2vec.py b/node2vec/node2vec.py similarity index 79% rename from src/node2vec.py rename to node2vec/node2vec.py index 0293411..10da36e 100644 --- a/src/node2vec.py +++ b/node2vec/node2vec.py @@ -1,7 +1,7 @@ import numpy as np import networkx as nx import random - +from gensim.models import Word2Vec class Graph(): def __init__(self, nx_G, is_directed, p, q): @@ -43,9 +43,9 @@ def simulate_walks(self, num_walks, walk_length): G = self.G walks = [] nodes = list(G.nodes()) - print 'Walk iteration:' + print('Walk iteration:') for walk_iter in range(num_walks): - print str(walk_iter+1), '/', str(num_walks) + print(str(walk_iter+1), '/', str(num_walks)) random.shuffle(nodes) for node in nodes: walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node)) @@ -146,4 +146,31 @@ def alias_draw(J, q): if np.random.rand() < q[kk]: return kk else: - return J[kk] \ No newline at end of file + return J[kk] + +def read_graph(input,weighted=False,directed=False): + ''' + Reads the input network in networkx. + ''' + if weighted: + G = nx.read_edgelist(input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) + else: + G = nx.read_edgelist(input, nodetype=int, create_using=nx.DiGraph()) + for edge in G.edges(): + G[edge[0]][edge[1]]['weight'] = 1 + + if not directed: + G = G.to_undirected() + + return G + +def learn_embeddings(walks,output=None,dimensions=128,window_size=10,workers=8,iter=1): + ''' + Learn embeddings by optimizing the Skipgram objective using SGD. + ''' + walks = [map(str, walk) for walk in walks] + model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=workers, iter=iter) + if output: + model.save_word2vec_format(output) + + return model,output diff --git a/src/main.py b/scripts/run_node2vec.py similarity index 73% rename from src/main.py rename to scripts/run_node2vec.py index 7caa4e5..a46778d 100644 --- a/src/main.py +++ b/scripts/run_node2vec.py @@ -1,3 +1,4 @@ +#!/usr/bin/env pythonZ ''' Reference implementation of node2vec. @@ -10,10 +11,8 @@ ''' import argparse -import numpy as np -import networkx as nx -import node2vec -from gensim.models import Word2Vec +from node2vec import node2vec +from node2vec.node2vec import read_graph,learn_embeddings def parse_args(): ''' @@ -63,44 +62,21 @@ def parse_args(): return parser.parse_args() -def read_graph(): - ''' - Reads the input network in networkx. - ''' - if args.weighted: - G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) - else: - G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph()) - for edge in G.edges(): - G[edge[0]][edge[1]]['weight'] = 1 - - if not args.directed: - G = G.to_undirected() - - return G - -def learn_embeddings(walks): - ''' - Learn embeddings by optimizing the Skipgram objective using SGD. - ''' - walks = [map(str, walk) for walk in walks] - model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter) - model.save_word2vec_format(args.output) - - return def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' - nx_G = read_graph() + nx_G = read_graph(args.input,args.weighted,args.directed) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) - learn_embeddings(walks) + learn_embeddings(walks,args.output,args.dimensions, + args.window_size,args.workers,args.iter) -args = parse_args() -main(args) +if __name__=='__main__': + args = parse_args() + main(args) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ed56ca1 --- /dev/null +++ b/setup.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python +# +# Copyright (C) 2016 Russell Poldrack +# some portions borrowed from https://github.com/mwaskom/lyman/blob/master/setup.py + + +descr = """node2vec: algorithm for learning continuous representations for nodes in any (un)directed, (un)weighted graph""" + +import os +from setuptools import setup +from sys import version + +if version > '2.8.0': + raise Exception('Currently only works in Python 2.7') + +DISTNAME="node2vec" +DESCRIPTION=descr +MAINTAINER='node2vec team' +LICENSE='MIT' +URL='http://snap.stanford.edu/node2vec/' +DOWNLOAD_URL='https://github.com/aditya-grover/node2vec' +VERSION='0.1' + +if __name__ == "__main__": + + if os.path.exists('MANIFEST'): + os.remove('MANIFEST') + + import sys + + setup(name=DISTNAME, + maintainer=MAINTAINER, + description=DESCRIPTION, + include_package_data=True, + package_data={'node2vec.tests':['emb/karate.emb','graph/karate.edgelist']}, + license=LICENSE, + version=VERSION, + url=URL, + download_url=DOWNLOAD_URL, + install_requires=['gensim','networkx'], + packages=['node2vec'], + scripts=['scripts/run_node2vec.py'], + classifiers=[ + 'Intended Audience :: Science/Research', + 'Programming Language :: Python :: 2.7', + 'License :: OSI Approved :: BSD License', + 'Operating System :: POSIX', + 'Operating System :: Unix', + 'Operating System :: MacOS'], + ) diff --git a/tests/test_node2vec.py b/tests/test_node2vec.py new file mode 100644 index 0000000..eda37ce --- /dev/null +++ b/tests/test_node2vec.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +tests for node2vec +""" + +import os +from node2vec import node2vec +from node2vec.node2vec import read_graph,learn_embeddings +import tempfile +import numpy + +datafile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'graph', 'karate.edgelist') +outfile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'emb', 'karate.emb') + +def test_node2vec_datafile(): + print('datafile:%s'%datafile) + assert os.path.exists(datafile) + print('outfile:%s'%outfile) + assert os.path.exists(outfile) + +def test_node2vec_run(): + # use defaults from main script + weighted=False + directed=False + p=1 + q=1 + dimensions=128 + window_size=10 + workers=8 + iter=1 + num_walks=10 + walk_length=10 + test_outfile='/tmp/node2vec_test.txt' + nx_G = read_graph(datafile,weighted,directed) + G = node2vec.Graph(nx_G, directed, p, q) + G.preprocess_transition_probs() + walks = G.simulate_walks(num_walks, walk_length) + learn_embeddings(walks,test_outfile,dimensions, + window_size,workers,iter)