Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

significant reorganization #1

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Empty file added node2vec/__init__.py
Empty file.
35 changes: 31 additions & 4 deletions src/node2vec.py → node2vec/node2vec.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import networkx as nx
import random

from gensim.models import Word2Vec

class Graph():
def __init__(self, nx_G, is_directed, p, q):
Expand Down Expand Up @@ -43,9 +43,9 @@ def simulate_walks(self, num_walks, walk_length):
G = self.G
walks = []
nodes = list(G.nodes())
print 'Walk iteration:'
print('Walk iteration:')
for walk_iter in range(num_walks):
print str(walk_iter+1), '/', str(num_walks)
print(str(walk_iter+1), '/', str(num_walks))
random.shuffle(nodes)
for node in nodes:
walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node))
Expand Down Expand Up @@ -146,4 +146,31 @@ def alias_draw(J, q):
if np.random.rand() < q[kk]:
return kk
else:
return J[kk]
return J[kk]

def read_graph(input,weighted=False,directed=False):
'''
Reads the input network in networkx.
'''
if weighted:
G = nx.read_edgelist(input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
else:
G = nx.read_edgelist(input, nodetype=int, create_using=nx.DiGraph())
for edge in G.edges():
G[edge[0]][edge[1]]['weight'] = 1

if not directed:
G = G.to_undirected()

return G

def learn_embeddings(walks,output=None,dimensions=128,window_size=10,workers=8,iter=1):
'''
Learn embeddings by optimizing the Skipgram objective using SGD.
'''
walks = [map(str, walk) for walk in walks]
model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=workers, iter=iter)
if output:
model.save_word2vec_format(output)

return model,output
42 changes: 9 additions & 33 deletions src/main.py → scripts/run_node2vec.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env pythonZ
'''
Reference implementation of node2vec.

Expand All @@ -10,10 +11,8 @@
'''

import argparse
import numpy as np
import networkx as nx
import node2vec
from gensim.models import Word2Vec
from node2vec import node2vec
from node2vec.node2vec import read_graph,learn_embeddings

def parse_args():
'''
Expand Down Expand Up @@ -63,44 +62,21 @@ def parse_args():

return parser.parse_args()

def read_graph():
'''
Reads the input network in networkx.
'''
if args.weighted:
G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
else:
G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
for edge in G.edges():
G[edge[0]][edge[1]]['weight'] = 1

if not args.directed:
G = G.to_undirected()

return G

def learn_embeddings(walks):
'''
Learn embeddings by optimizing the Skipgram objective using SGD.
'''
walks = [map(str, walk) for walk in walks]
model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter)
model.save_word2vec_format(args.output)

return

def main(args):
'''
Pipeline for representational learning for all nodes in a graph.
'''
nx_G = read_graph()
nx_G = read_graph(args.input,args.weighted,args.directed)
G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
G.preprocess_transition_probs()
walks = G.simulate_walks(args.num_walks, args.walk_length)
learn_embeddings(walks)
learn_embeddings(walks,args.output,args.dimensions,
args.window_size,args.workers,args.iter)

args = parse_args()
main(args)
if __name__=='__main__':
args = parse_args()
main(args)



Expand Down
50 changes: 50 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#! /usr/bin/env python
#
# Copyright (C) 2016 Russell Poldrack <[email protected]>
# some portions borrowed from https://github.com/mwaskom/lyman/blob/master/setup.py


descr = """node2vec: algorithm for learning continuous representations for nodes in any (un)directed, (un)weighted graph"""

import os
from setuptools import setup
from sys import version

if version > '2.8.0':
raise Exception('Currently only works in Python 2.7')

DISTNAME="node2vec"
DESCRIPTION=descr
MAINTAINER='node2vec team'
LICENSE='MIT'
URL='http://snap.stanford.edu/node2vec/'
DOWNLOAD_URL='https://github.com/aditya-grover/node2vec'
VERSION='0.1'

if __name__ == "__main__":

if os.path.exists('MANIFEST'):
os.remove('MANIFEST')

import sys

setup(name=DISTNAME,
maintainer=MAINTAINER,
description=DESCRIPTION,
include_package_data=True,
package_data={'node2vec.tests':['emb/karate.emb','graph/karate.edgelist']},
license=LICENSE,
version=VERSION,
url=URL,
download_url=DOWNLOAD_URL,
install_requires=['gensim','networkx'],
packages=['node2vec'],
scripts=['scripts/run_node2vec.py'],
classifiers=[
'Intended Audience :: Science/Research',
'Programming Language :: Python :: 2.7',
'License :: OSI Approved :: BSD License',
'Operating System :: POSIX',
'Operating System :: Unix',
'Operating System :: MacOS'],
)
40 changes: 40 additions & 0 deletions tests/test_node2vec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
tests for node2vec
"""

import os
from node2vec import node2vec
from node2vec.node2vec import read_graph,learn_embeddings
import tempfile
import numpy

datafile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'graph', 'karate.edgelist')
outfile = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'emb', 'karate.emb')

def test_node2vec_datafile():
print('datafile:%s'%datafile)
assert os.path.exists(datafile)
print('outfile:%s'%outfile)
assert os.path.exists(outfile)

def test_node2vec_run():
# use defaults from main script
weighted=False
directed=False
p=1
q=1
dimensions=128
window_size=10
workers=8
iter=1
num_walks=10
walk_length=10
test_outfile='/tmp/node2vec_test.txt'
nx_G = read_graph(datafile,weighted,directed)
G = node2vec.Graph(nx_G, directed, p, q)
G.preprocess_transition_probs()
walks = G.simulate_walks(num_walks, walk_length)
learn_embeddings(walks,test_outfile,dimensions,
window_size,workers,iter)