From 3e27ad5ea3f9102b4df97fbb951d57a48652ff72 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 11:35:28 -0700 Subject: [PATCH 1/9] added setup.py and reformatted to make into package --- {src => node2vec}/node2vec.py | 0 src/main.py => scripts/run_node2vec.py | 0 setup.py | 68 ++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) rename {src => node2vec}/node2vec.py (100%) rename src/main.py => scripts/run_node2vec.py (100%) create mode 100644 setup.py diff --git a/src/node2vec.py b/node2vec/node2vec.py similarity index 100% rename from src/node2vec.py rename to node2vec/node2vec.py diff --git a/src/main.py b/scripts/run_node2vec.py similarity index 100% rename from src/main.py rename to scripts/run_node2vec.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9e2d8d5 --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python +# +# Copyright (C) 2016 Russell Poldrack +# some portions borrowed from https://github.com/mwaskom/lyman/blob/master/setup.py + + +descr = """node2vec: algorithm for learning continuous representations for nodes in any (un)directed, (un)weighted graph""" + +import os +from setuptools import setup + +DISTNAME="node2vec" +DESCRIPTION=descr +MAINTAINER='Russ Poldrack' +MAINTAINER_EMAIL='poldrack@stanford.edu' +LICENSE='MIT' +URL='http://snap.stanford.edu/node2vec/' +DOWNLOAD_URL='https://github.com/aditya-grover/node2vec' +VERSION='0.1' + +def check_dependencies(): + + # Just make sure dependencies exist, I haven't rigorously + # tested what the minimal versions that will work are + needed_deps = ["gensim", "numpy", "networkx"] + missing_deps = [] + for dep in needed_deps: + try: + __import__(dep) + except ImportError: + missing_deps.append(dep) + + if missing_deps: + missing = (", ".join(missing_deps) + .replace("sklearn", "scikit-learn")) + raise ImportError("Missing dependencies: %s" % missing) + +if __name__ == "__main__": + + if os.path.exists('MANIFEST'): + os.remove('MANIFEST') + + import sys + if not (len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or + sys.argv[1] in ('--help-commands', + '--version', + 'egg_info', + 'clean'))): + check_dependencies() + + setup(name=DISTNAME, + maintainer=MAINTAINER, + maintainer_email=MAINTAINER_EMAIL, + description=DESCRIPTION, + license=LICENSE, + version=VERSION, + url=URL, + download_url=DOWNLOAD_URL, + packages=['node2vec'], + scripts=['scripts/run_node2vec.py'], + classifiers=[ + 'Intended Audience :: Science/Research', + 'Programming Language :: Python :: 2.7', + 'License :: OSI Approved :: BSD License', + 'Operating System :: POSIX', + 'Operating System :: Unix', + 'Operating System :: MacOS'], + ) From 6461f963451ba7e18c98a3f0057bd7eb834611f9 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 11:41:39 -0700 Subject: [PATCH 2/9] added hashbang --- scripts/run_node2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_node2vec.py b/scripts/run_node2vec.py index 7caa4e5..715fe21 100644 --- a/scripts/run_node2vec.py +++ b/scripts/run_node2vec.py @@ -12,7 +12,7 @@ import argparse import numpy as np import networkx as nx -import node2vec +from node2vec import node2vec from gensim.models import Word2Vec def parse_args(): From bd4f8c6a91654f284a3013c7a96716bb03d5a79e Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:47:30 -0700 Subject: [PATCH 3/9] moved read_graph and learn_embeddings into here, for more portable use --- node2vec/node2vec.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/node2vec/node2vec.py b/node2vec/node2vec.py index 0293411..671fd18 100644 --- a/node2vec/node2vec.py +++ b/node2vec/node2vec.py @@ -1,7 +1,7 @@ import numpy as np import networkx as nx import random - +from gensim.models import Word2Vec class Graph(): def __init__(self, nx_G, is_directed, p, q): @@ -146,4 +146,30 @@ def alias_draw(J, q): if np.random.rand() < q[kk]: return kk else: - return J[kk] \ No newline at end of file + return J[kk] + +def read_graph(input,weighted=False,directed=False): + ''' + Reads the input network in networkx. + ''' + if weighted: + G = nx.read_edgelist(input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) + else: + G = nx.read_edgelist(input, nodetype=int, create_using=nx.DiGraph()) + for edge in G.edges(): + G[edge[0]][edge[1]]['weight'] = 1 + + if not directed: + G = G.to_undirected() + + return G + +def learn_embeddings(walks,output,dimensions=128,window_size=10,workers=8,iter=1): + ''' + Learn embeddings by optimizing the Skipgram objective using SGD. + ''' + walks = [map(str, walk) for walk in walks] + model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=workers, iter=iter) + model.save_word2vec_format(output) + + return From 9e550adbaf17c6dbf8a58c8e2d56b0c4da021671 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:48:12 -0700 Subject: [PATCH 4/9] moved helper functions to main node2vec.py file, added test for __main__ --- scripts/run_node2vec.py | 40 ++++++++-------------------------------- 1 file changed, 8 insertions(+), 32 deletions(-) diff --git a/scripts/run_node2vec.py b/scripts/run_node2vec.py index 715fe21..a46778d 100644 --- a/scripts/run_node2vec.py +++ b/scripts/run_node2vec.py @@ -1,3 +1,4 @@ +#!/usr/bin/env pythonZ ''' Reference implementation of node2vec. @@ -10,10 +11,8 @@ ''' import argparse -import numpy as np -import networkx as nx from node2vec import node2vec -from gensim.models import Word2Vec +from node2vec.node2vec import read_graph,learn_embeddings def parse_args(): ''' @@ -63,44 +62,21 @@ def parse_args(): return parser.parse_args() -def read_graph(): - ''' - Reads the input network in networkx. - ''' - if args.weighted: - G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) - else: - G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph()) - for edge in G.edges(): - G[edge[0]][edge[1]]['weight'] = 1 - - if not args.directed: - G = G.to_undirected() - - return G - -def learn_embeddings(walks): - ''' - Learn embeddings by optimizing the Skipgram objective using SGD. - ''' - walks = [map(str, walk) for walk in walks] - model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter) - model.save_word2vec_format(args.output) - - return def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' - nx_G = read_graph() + nx_G = read_graph(args.input,args.weighted,args.directed) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) - learn_embeddings(walks) + learn_embeddings(walks,args.output,args.dimensions, + args.window_size,args.workers,args.iter) -args = parse_args() -main(args) +if __name__=='__main__': + args = parse_args() + main(args) From c41af127e6985d30c90b7e8c6df8d511d9754225 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:49:08 -0700 Subject: [PATCH 5/9] leave email blank --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 9e2d8d5..aa0a59e 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,7 @@ DISTNAME="node2vec" DESCRIPTION=descr -MAINTAINER='Russ Poldrack' -MAINTAINER_EMAIL='poldrack@stanford.edu' +MAINTAINER='node2vec team' LICENSE='MIT' URL='http://snap.stanford.edu/node2vec/' DOWNLOAD_URL='https://github.com/aditya-grover/node2vec' @@ -50,8 +49,9 @@ def check_dependencies(): setup(name=DISTNAME, maintainer=MAINTAINER, - maintainer_email=MAINTAINER_EMAIL, description=DESCRIPTION, + include_package_data=True, + package_data={'node2vec.tests':['emb/karate.emb','graph/karate.edgelist']}, license=LICENSE, version=VERSION, url=URL, From d3733af044ba27f0cd080fb134f5eadbb2d33a62 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:49:34 -0700 Subject: [PATCH 6/9] initial add --- node2vec/__init__.py | 0 tests/test_node2vec.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 node2vec/__init__.py create mode 100644 tests/test_node2vec.py diff --git a/node2vec/__init__.py b/node2vec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_node2vec.py b/tests/test_node2vec.py new file mode 100644 index 0000000..eda37ce --- /dev/null +++ b/tests/test_node2vec.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +tests for node2vec +""" + +import os +from node2vec import node2vec +from node2vec.node2vec import read_graph,learn_embeddings +import tempfile +import numpy + +datafile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'graph', 'karate.edgelist') +outfile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'emb', 'karate.emb') + +def test_node2vec_datafile(): + print('datafile:%s'%datafile) + assert os.path.exists(datafile) + print('outfile:%s'%outfile) + assert os.path.exists(outfile) + +def test_node2vec_run(): + # use defaults from main script + weighted=False + directed=False + p=1 + q=1 + dimensions=128 + window_size=10 + workers=8 + iter=1 + num_walks=10 + walk_length=10 + test_outfile='/tmp/node2vec_test.txt' + nx_G = read_graph(datafile,weighted,directed) + G = node2vec.Graph(nx_G, directed, p, q) + G.preprocess_transition_probs() + walks = G.simulate_walks(num_walks, walk_length) + learn_embeddings(walks,test_outfile,dimensions, + window_size,workers,iter) From 35b5d60e28d351d37bf23ee9efe9241089271b43 Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:59:05 -0700 Subject: [PATCH 7/9] added exception for python 3 --- setup.py | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/setup.py b/setup.py index aa0a59e..ed56ca1 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,10 @@ import os from setuptools import setup +from sys import version + +if version > '2.8.0': + raise Exception('Currently only works in Python 2.7') DISTNAME="node2vec" DESCRIPTION=descr @@ -17,35 +21,12 @@ DOWNLOAD_URL='https://github.com/aditya-grover/node2vec' VERSION='0.1' -def check_dependencies(): - - # Just make sure dependencies exist, I haven't rigorously - # tested what the minimal versions that will work are - needed_deps = ["gensim", "numpy", "networkx"] - missing_deps = [] - for dep in needed_deps: - try: - __import__(dep) - except ImportError: - missing_deps.append(dep) - - if missing_deps: - missing = (", ".join(missing_deps) - .replace("sklearn", "scikit-learn")) - raise ImportError("Missing dependencies: %s" % missing) - if __name__ == "__main__": if os.path.exists('MANIFEST'): os.remove('MANIFEST') import sys - if not (len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or - sys.argv[1] in ('--help-commands', - '--version', - 'egg_info', - 'clean'))): - check_dependencies() setup(name=DISTNAME, maintainer=MAINTAINER, @@ -56,6 +37,7 @@ def check_dependencies(): version=VERSION, url=URL, download_url=DOWNLOAD_URL, + install_requires=['gensim','networkx'], packages=['node2vec'], scripts=['scripts/run_node2vec.py'], classifiers=[ From a87f524e28771eb56fa0af4c9eac80d134fc78cc Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 14:59:23 -0700 Subject: [PATCH 8/9] fixed print statements for py3 - though it's still not working --- node2vec/node2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node2vec/node2vec.py b/node2vec/node2vec.py index 671fd18..cf4e319 100644 --- a/node2vec/node2vec.py +++ b/node2vec/node2vec.py @@ -43,9 +43,9 @@ def simulate_walks(self, num_walks, walk_length): G = self.G walks = [] nodes = list(G.nodes()) - print 'Walk iteration:' + print('Walk iteration:') for walk_iter in range(num_walks): - print str(walk_iter+1), '/', str(num_walks) + print(str(walk_iter+1), '/', str(num_walks)) random.shuffle(nodes) for node in nodes: walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node)) From ce8fcc204d14f2a01139680b3248c1131677e4cd Mon Sep 17 00:00:00 2001 From: poldrack Date: Thu, 25 Aug 2016 15:39:40 -0700 Subject: [PATCH 9/9] added option to not save output from learn_embeddings, and return model and output from fn --- node2vec/node2vec.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/node2vec/node2vec.py b/node2vec/node2vec.py index cf4e319..10da36e 100644 --- a/node2vec/node2vec.py +++ b/node2vec/node2vec.py @@ -164,12 +164,13 @@ def read_graph(input,weighted=False,directed=False): return G -def learn_embeddings(walks,output,dimensions=128,window_size=10,workers=8,iter=1): +def learn_embeddings(walks,output=None,dimensions=128,window_size=10,workers=8,iter=1): ''' Learn embeddings by optimizing the Skipgram objective using SGD. ''' walks = [map(str, walk) for walk in walks] model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=workers, iter=iter) - model.save_word2vec_format(output) + if output: + model.save_word2vec_format(output) - return + return model,output