Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aho-Corasick algorithm implementation in Python #436

Merged
merged 13 commits into from
Dec 16, 2020
65 changes: 65 additions & 0 deletions Aho-Corasick Algorithm/aho_corasick1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Python implementation of Aho-Corasick string matching
FAIL = -1

def aho_corasick(string, keywords):
transitions = {}
outputs = {}
fails = {}

new_state = 0

for keyword in keywords:
state = 0

for j, char in enumerate(keyword):
res = transitions.get((state, char), FAIL)
if res == FAIL:
break
state = res

for char in keyword[j:]:
new_state += 1
transitions[(state, char)] = new_state
state = new_state

outputs[state] = [keyword]

queue = []
for (from_state, char), to_state in transitions.items():
if from_state == 0 and to_state != 0:
queue.append(to_state)
fails[to_state] = 0

while queue:
r = queue.pop(0)
for (from_state, char), to_state in transitions.items():
if from_state == r:
queue.append(to_state)
state = fails[from_state]

while True:
res = transitions.get((state, char), state and FAIL)
if res != FAIL:
break
state = fails[state]

failure = transitions.get((state, char), state and FAIL)
fails[to_state] = failure
outputs.setdefault(to_state, []).extend(
outputs.get(failure, []))

state = 0
results = []
for i, char in enumerate(string):
while True:
res = transitions.get((state, char), state and FAIL)
if res != FAIL:
state = res
break
state = fails[state]

for match in outputs.get(state, ()):
pos = i - len(match) + 1
results.append((pos, match))

return results
192 changes: 192 additions & 0 deletions Aho-Corasick Algorithm/requirements
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
alabaster==0.7.12
asn1crypto==0.24.0
astroid==2.2.5
astropy==3.2.1
attrs==19.1.0
Babel==2.7.0
backcall==0.1.0
backports.functools-lru-cache==1.5
backports.os==0.1.1
backports.shutil-get-terminal-size==1.0.0
backports.tempfile==1.0
bitarray==0.9.3
bkcharts==0.2
bleach==3.1.0
bokeh==1.2.0
boto==2.49.0
Bottleneck==1.2.1
certifi==2019.6.16
cffi==1.12.3
chardet==3.0.4
Click==7.0
cloudpickle==1.2.1
clyent==1.2.2
colorama==0.4.1
comtypes==1.1.7
contextlib2==0.5.5
cycler==0.10.0
Cython==0.29.12
cytoolz==0.10.0
dask==2.1.0
decorator==4.4.0
defusedxml==0.6.0
distributed==2.1.0
docutils==0.14
entrypoints==0.3
et-xmlfile==1.0.1
fastcache==1.1.0
filelock==3.0.12
Flask==1.1.1
future==0.17.1
gevent==1.4.0
glob2==0.7
greenlet==0.4.15
h5py==2.9.0
heapdict==1.0.0
idna==2.8
imageio==2.5.0
imagesize==1.1.0
importlib-metadata==0.17
ipykernel==5.1.1
ipython==7.6.1
ipython-genutils==0.2.0
ipywidgets==7.5.0
isort==4.3.21
itsdangerous==1.1.0
jdcal==1.4.1
jedi==0.13.3
Jinja2==2.10.1
joblib==0.13.2
json5==0.8.4
jsonschema==3.0.1
jupyter==1.0.0
jupyter-client==5.3.1
jupyter-console==6.0.0
jupyter-core==4.5.0
jupyterlab==1.0.2
jupyterlab-server==1.0.0
keyring==18.0.0
kiwisolver==1.1.0
lazy-object-proxy==1.4.1
libarchive-c==2.8
llvmlite==0.29.0
locket==0.2.0
lxml==4.3.4
MarkupSafe==1.1.1
matplotlib==3.1.0
mccabe==0.6.1
menuinst==1.4.16
mistune==0.8.4
mkl-fft==1.0.12
mkl-random==1.0.2
mkl-service==2.0.2
mock==3.0.5
more-itertools==7.0.0
mpmath==1.1.0
msgpack==0.6.1
multipledispatch==0.6.0
navigator-updater==0.2.1
nbconvert==5.5.0
nbformat==4.4.0
networkx==2.3
nltk==3.4.4
nose==1.3.7
notebook==6.0.0
numba==0.44.1
numexpr==2.6.9
numpy==1.16.4
numpydoc==0.9.1
olefile==0.46
openpyxl==2.6.2
packaging==19.0
pandas==0.24.2
pandocfilters==1.4.2
parso==0.5.0
partd==1.0.0
path.py==12.0.1
pathlib2==2.3.4
patsy==0.5.1
pep8==1.7.1
pickleshare==0.7.5
Pillow==6.1.0
pkginfo==1.5.0.1
pluggy==0.12.0
ply==3.11
prometheus-client==0.7.1
prompt-toolkit==2.0.9
psutil==5.6.3
py==1.8.0
pycodestyle==2.5.0
pycosat==0.6.3
pycparser==2.19
pycrypto==2.6.1
pycurl==7.43.0.3
pyflakes==2.1.1
Pygments==2.4.2
pylint==2.3.1
pyodbc==4.0.26
pyOpenSSL==19.0.0
pyparsing==2.4.0
pyreadline==2.1
pyrsistent==0.14.11
PySocks==1.7.0
pytest==5.0.1
pytest-arraydiff==0.3
pytest-astropy==0.5.0
pytest-doctestplus==0.3.0
pytest-openfiles==0.3.2
pytest-remotedata==0.3.1
python-dateutil==2.8.0
pytz==2019.1
PyWavelets==1.0.3
pywin32==223
pywinpty==0.5.5
PyYAML==5.1.1
pyzmq==18.0.0
QtAwesome==0.5.7
qtconsole==4.5.1
QtPy==1.8.0
requests==2.22.0
rope==0.14.0
ruamel-yaml==0.15.46
scikit-image==0.15.0
scikit-learn==0.21.2
scipy==1.2.1
seaborn==0.9.0
Send2Trash==1.5.0
simplegeneric==0.8.1
singledispatch==3.4.0.3
six==1.12.0
snowballstemmer==1.9.0
sortedcollections==1.1.2
sortedcontainers==2.1.0
soupsieve==1.8
spyder-kernels==0.5.1
SQLAlchemy==1.3.5
statsmodels==0.10.0
stopwords==0.1.3
sympy==1.4
tables==3.5.2
tblib==1.4.0
terminado==0.8.2
testpath==0.4.2
toolz==0.10.0
tornado==6.0.3
tqdm==4.32.1
traitlets==4.3.2
unicodecsv==0.14.1
urllib3==1.24.2
wcwidth==0.1.7
webencodings==0.5.1
Werkzeug==0.15.4
widgetsnbextension==3.5.0
win-inet-pton==1.1.0
win-unicode-console==0.5
wincertstore==0.2
wrapt==1.11.2
xlrd==1.2.0
XlsxWriter==1.1.8
xlwings==0.15.8
xlwt==1.3.0
zict==1.0.0
zipp==0.5.1
Loading