diff --git a/contexts/installed/.coveragerc b/contexts/installed/.coveragerc deleted file mode 100644 index 335a156755..0000000000 --- a/contexts/installed/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - dffml_contexts_installed - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/contexts/installed/.gitignore b/contexts/installed/.gitignore deleted file mode 100644 index 3af0b3e081..0000000000 --- a/contexts/installed/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ -built_html_docs/ diff --git a/contexts/installed/LICENSE b/contexts/installed/LICENSE deleted file mode 100644 index 26902189c2..0000000000 --- a/contexts/installed/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2021 pdxjohnny - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/contexts/installed/MANIFEST.in b/contexts/installed/MANIFEST.in deleted file mode 100644 index dac1426fc1..0000000000 --- a/contexts/installed/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.rst -include LICENSE -recursive-include dffml_contexts_installed * diff --git a/contexts/installed/README.rst b/contexts/installed/README.rst deleted file mode 100644 index f687c62017..0000000000 --- a/contexts/installed/README.rst +++ /dev/null @@ -1,10 +0,0 @@ -Package Name -============ - -Package description - -Install from pip - -.. code-block:: console - - $ pip install package diff --git a/contexts/installed/dffml_contexts_installed/__init__.py b/contexts/installed/dffml_contexts_installed/__init__.py deleted file mode 100644 index 5f7cf45a59..0000000000 --- a/contexts/installed/dffml_contexts_installed/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .generate_namespace import * diff --git a/contexts/installed/dffml_contexts_installed/generate_namespace.py b/contexts/installed/dffml_contexts_installed/generate_namespace.py deleted file mode 100644 index 6fae220e21..0000000000 --- a/contexts/installed/dffml_contexts_installed/generate_namespace.py +++ /dev/null @@ -1,46 +0,0 @@ -import sys - -import dffml - -DEFAULT_DEPLOYMENT: str = "python.native" - -for sysctx in dffml.SystemContext.load(): - # Ideally we would have load not setting propreties on the loaded classes. - # TODO for name, sysctx in SystemContext.load_dict().items(): - setattr( - sys.modules[__name__], - sysctx.ENTRY_POINT_LABEL, - # TODO(alice) Should probably set origin / use origin as python.caller - # or something like that. - sysctx.deployment(deployment_environment=DEFAULT_DEPLOYMENT), - ) - -delattr(sys.modules[__name__], "dffml") -delattr(sys.modules[__name__], "sys") - -# **system_contexts/__init__.py** -# **wonderland/async.py** - -# from wonderland import Alice, alice -# from wonderland.async import Alice - -# async with AliceSystemContext() as alice: -# async with alice() as alice_ctx: -# async for thought in alice_ctx.thoughts(): -# # async for thought in alice_ctx(): # .thoughts is the default - -# async with Alice() as alice: -# async for thought in alice: - -# for thought in alice: -# print(thought) - -# TODO Pick this work back up later when we have more of an idea about how the -# CLI is working and how we do overlays on an entity to create a different -# version / evolution of that entity. - -# alice = Alice() -# print(alice) -# breakpoint() -# for thought in alice: -# print(thought) diff --git a/contexts/installed/dffml_contexts_installed/version.py b/contexts/installed/dffml_contexts_installed/version.py deleted file mode 100644 index 901e5110b2..0000000000 --- a/contexts/installed/dffml_contexts_installed/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "0.0.1" diff --git a/contexts/installed/pyproject.toml b/contexts/installed/pyproject.toml deleted file mode 100644 index f68b321a55..0000000000 --- a/contexts/installed/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] -build-backend = "setuptools.build_meta" - -# [tool.setuptools_scm] - -[tool.black] -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/contexts/installed/setup.cfg b/contexts/installed/setup.cfg deleted file mode 100644 index b3791da12e..0000000000 --- a/contexts/installed/setup.cfg +++ /dev/null @@ -1,40 +0,0 @@ -[metadata] -name = dffml-contexts-installed -version = attr: dffml_contexts_installed.version.VERSION -description = dffml.sysctx entrypoints within the global namespace as Python objects via deployment python.native -long_description = file: README.rst -author = John Andersen -author_email = johnandersenpdx@gmail.com -maintainer = John Andersen -maintainer_email = johnandersenpdx@gmail.com -url = https://github.com/dffml/dffml-contexts-installed -license = MIT -keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -packages = find: -entry_points = file: entry_points.txt -setup_requires = - setuptools_scm[toml]>=3.4.3 -install_requires = - dffml>=0.4.0 - -[options.extras_require] -dev = - coverage - codecov - sphinx - twine - setuptools_scm[toml]>=3.4.3 - black==19.10b0 - importlib_metadata>=4.8.1;python_version<"3.8" diff --git a/contexts/installed/setup.py b/contexts/installed/setup.py deleted file mode 100644 index 17542f4d0e..0000000000 --- a/contexts/installed/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup() diff --git a/contexts/installed/tests/__init__.py b/contexts/installed/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/data/.coveragerc b/operations/data/.coveragerc deleted file mode 100644 index e86d09d1a2..0000000000 --- a/operations/data/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - dffml_operations_data - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/operations/data/.gitignore b/operations/data/.gitignore deleted file mode 100644 index 070ee81c83..0000000000 --- a/operations/data/.gitignore +++ /dev/null @@ -1,20 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ diff --git a/operations/data/Dockerfile b/operations/data/Dockerfile deleted file mode 100644 index b7e990ac99..0000000000 --- a/operations/data/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Usage -# docker build -t gitpod/dffml_operations_data . -# docker run --rm -ti -p 80:8080 gitpod/dffml_operations_data -insecure -log debug -# -# curl -v http://127.0.0.1:80/list/sources -FROM ubuntu:22.04@sha256:6042500cf4b44023ea1894effe7890666b0c5c7871ed83a97c36c76ae560bb9b - -RUN apt-get update && \ - apt-get install -y \ - gcc \ - python3-dev \ - python3-pip \ - python3 \ - ca-certificates && \ - python3 -m pip install -U pip && \ - python3 -m pip install dffml-service-http && \ - apt-get purge -y \ - gcc \ - python3-dev && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /usr/src/app -COPY . /usr/src/app - -RUN python3 -m pip install -e . - -ENTRYPOINT ["python3", "-m", "dffml", "service", "http", "server", "-addr", "0.0.0.0"] -CMD ["-mc-config", "dffml_operations_data/deploy"] diff --git a/operations/data/LICENSE b/operations/data/LICENSE deleted file mode 100644 index 91bb615394..0000000000 --- a/operations/data/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2019 Intel, Sudhanshu - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/operations/data/MANIFEST.in b/operations/data/MANIFEST.in deleted file mode 100644 index 19f3196490..0000000000 --- a/operations/data/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.md -include LICENSE -include setup_common.py diff --git a/operations/data/README.md b/operations/data/README.md deleted file mode 100644 index f90edac6a5..0000000000 --- a/operations/data/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# DFFML dffml-operations-data Operations - -dffml-operations-data description. - -## Usage - -Example usage - -```console -``` - -## License - -DFFML dffml-operations-data is distributed under the [MIT License](LICENSE). diff --git a/operations/data/dffml_operations_data/__init__.py b/operations/data/dffml_operations_data/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/data/dffml_operations_data/definitions.py b/operations/data/dffml_operations_data/definitions.py deleted file mode 100644 index 2d32697f93..0000000000 --- a/operations/data/dffml_operations_data/definitions.py +++ /dev/null @@ -1,16 +0,0 @@ -import sys -from dffml.df.types import Definition - -definitions = [ - Definition(name="input_data", primitive="List[List[int]]"), - Definition(name="output_data", primitive="List[List[int]]"), - Definition(name="n_components", primitive="int"), - Definition(name="n_iter", primitive="int"), - Definition(name="random_state", primitive="int"), - Definition(name="missing_values", primitive="Any"), - Definition(name="strategy", primitive="str"), - Definition(name="categories", primitive="List[List[Any]]"), -] - -for definition in definitions: - setattr(sys.modules[__name__], definition.name, definition) diff --git a/operations/data/dffml_operations_data/operations.py b/operations/data/dffml_operations_data/operations.py deleted file mode 100644 index 1d0cede0d5..0000000000 --- a/operations/data/dffml_operations_data/operations.py +++ /dev/null @@ -1,213 +0,0 @@ -import numpy as np -from sklearn.decomposition import PCA, TruncatedSVD -from sklearn.preprocessing import OneHotEncoder, StandardScaler -from sklearn.impute import SimpleImputer - -from dffml.df.base import op - -from .definitions import ( - n_iter, - strategy, - input_data, - categories, - output_data, - random_state, - n_components, - missing_values, -) - - -@op( - inputs={"data": input_data, "n_components": n_components}, - outputs={"result": output_data}, -) -async def principal_component_analysis( - data, n_components=None, -): - """ - Decomposes the data into (n_samples, n_components) - using PCA method - - Parameters - ---------- - data : List[List[int]] - data to be decomposed. - - n_components : int - number of colums the data should have after decomposition. - - Returns - ------- - result: Data having dimensions (n_samples, n_components) - """ - pca = PCA(n_components=n_components) - new_data = pca.fit_transform(data) - return {"result": new_data} - - -@op( - inputs={ - "data": input_data, - "n_components": n_components, - "n_iter": n_iter, - "random_state": random_state, - }, - outputs={"result": output_data}, -) -async def singular_value_decomposition( - data, n_components=2, n_iter=5, random_state=None, -): - """ - Decomposes the data into (n_samples, n_components) - using SVD method. - - Parameters - ---------- - data : List[List[int]] - data to be decomposed. - - n_components : int - number of colums the data should have after decomposition. - - Returns - ------- - result: Data having dimensions (n_samples, n_components) - """ - svd = TruncatedSVD( - n_components=n_components, n_iter=n_iter, random_state=random_state - ) - new_data = svd.fit_transform(data) - return {"result": new_data} - - -@op( - inputs={ - "data": input_data, - "missing_values": missing_values, - "strategy": strategy, - }, - outputs={"result": output_data}, -) -async def simple_imputer(data, missing_values=np.nan, strategy="mean"): - """ - Imputation method for missing values - - Parameters - ---------- - data : List[List[int]] - data in which missing values are present - - missing_values : Any str, int, float, None default = np.nan - The value present in place of missing value - - strategy : str "mean", "median", "constant", "most_frequent" default = "mean" - The value present in place of missing value - - Returns - ------- - result: Dataset having missing values imputed with the strategy - """ - if missing_values not in (int, float, str, None, np.nan): - raise Exception( - f"Missing values should be one of: str, float, int, None, np.nan got {missing_values}" - ) - - if strategy not in ("mean", "median", "constant", "most_frequent"): - raise Exception( - f"Strategy should be one of mean, median, constant, most_frequent got {strategy}" - ) - - imp = SimpleImputer(missing_values=missing_values, strategy=strategy) - new_data = imp.fit_transform(data) - return {"result": new_data} - - -@op( - inputs={"data": input_data, "categories": categories}, - outputs={"result": output_data}, -) -async def one_hot_encoder(data, categories): - """ - One hot encoding for categorical data columns - - Parameters - ---------- - data : List[List[int]] - data to be encoded. - - categories : List[List[str]] - Categorical values which needs to be encoded - - Returns - ------- - result: Encoded data for categorical values - """ - enc = OneHotEncoder(categories=categories) - enc.fit(data) - new_data = enc.transform(data).toarray() - return {"result": new_data} - - -@op(inputs={"data": input_data}, outputs={"result": output_data}) -async def standard_scaler(data): - """ - Standardize features by removing the mean and - scaling to unit variance. - - Parameters - ---------- - data: List[List[int]] - data that needs to be standardized - - Returns - ------- - result: Standardized data - """ - scaler = StandardScaler() - new_data = scaler.fit_transform(data) - return {"result": new_data.tolist()} - - -@op( - inputs={"data": input_data}, outputs={"result": output_data}, -) -async def remove_whitespaces(data): - """ - Removes white-spaces from the dataset - - Parameters - ---------- - data : List[List[int]] - dataset. - - Returns - ------- - result: dataset having whitespaces removed - """ - new_data = np.char.strip(data) - return {"result": new_data} - - -@op( - inputs={"data": input_data}, outputs={"result": output_data}, -) -async def ordinal_encoder(data): - """ - One hot encoding for categorical data columns - - Parameters - ---------- - data : List[List[int]] - data to be encoded. - - categories : List[List[str]] - Categorical values which needs to be encoded - - Returns - ------- - result: Encoded data for categorical values - """ - enc = OneHotEncoder() - enc.fit(data) - new_data = enc.transform(data).toarray() - return {"result": new_data} diff --git a/operations/data/dffml_operations_data/version.py b/operations/data/dffml_operations_data/version.py deleted file mode 100644 index 901e5110b2..0000000000 --- a/operations/data/dffml_operations_data/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "0.0.1" diff --git a/operations/data/entry_points.txt b/operations/data/entry_points.txt deleted file mode 100644 index 1027d8d62f..0000000000 --- a/operations/data/entry_points.txt +++ /dev/null @@ -1,8 +0,0 @@ -[dffml.operation] -principal_component_analysis = dffml_operations_data.operations:principal_component_analysis -singular_value_decomposition = dffml_operations_data.operations:singular_value_decomposition -simple_imputer = dffml_operations_data.operations:simple_imputer -one_hot_encoder = dffml_operations_data.operations:one_hot_encoder -standard_scaler = dffml_operations_data.operations:standard_scaler -remove_whitespaces = dffml_operations_data.operations:remove_whitespaces -ordinal_encoder = dffml_operations_data.operations:ordinal_encoder \ No newline at end of file diff --git a/operations/data/pyproject.toml b/operations/data/pyproject.toml deleted file mode 100644 index 2630964232..0000000000 --- a/operations/data/pyproject.toml +++ /dev/null @@ -1,24 +0,0 @@ -[build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" - -[tool.black] -line-length = 79 -target-version = ['py37'] - -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/operations/data/setup.cfg b/operations/data/setup.cfg deleted file mode 100644 index 7981bcb7e3..0000000000 --- a/operations/data/setup.cfg +++ /dev/null @@ -1,33 +0,0 @@ -[metadata] -name = dffml-operations-data -version = attr: dffml_operations_data.version.VERSION -description = DFFML operations dffml-operations-data -long_description = file: README.md -long_description_content_type = text/markdown -author = "Sudhanshu kumar " -author_email = sudhanshukumar5459@gmail.com -maintainer = "Sudhanshu kumar " -maintainer_email = sudhanshukumar5459@gmail.com -url = https://github.com/gitpod/dffml-operations-data -license = MIT -keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -zip_safe = False -include_package_data = True -packages = find: -entry_points = file: entry_points.txt -install_requires = - dffml>=0.4.0 - scikit-learn>=0.21.2 - numpy>=1.19.2 diff --git a/operations/data/setup.py b/operations/data/setup.py deleted file mode 100644 index 17542f4d0e..0000000000 --- a/operations/data/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup() diff --git a/operations/data/tests/__init__.py b/operations/data/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/data/tests/test_operations.py b/operations/data/tests/test_operations.py deleted file mode 100644 index 5db5fbbd9a..0000000000 --- a/operations/data/tests/test_operations.py +++ /dev/null @@ -1,227 +0,0 @@ -import numpy as np -from sklearn.datasets import make_classification - -from dffml.df.types import Input, DataFlow -from dffml.operation.output import GetSingle -from dffml.df.memory import MemoryOrchestrator -from dffml.util.asynctestcase import AsyncTestCase - -from dffml_operations_data.operations import * - - -class TestOperations(AsyncTestCase): - async def test_principal_component_analysis(self): - input_data, _ = make_classification( - n_samples=10, - n_features=10, - n_informative=8, - n_redundant=2, - random_state=7, - ) - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(principal_component_analysis, GetSingle), - [ - Input( - value=[ - principal_component_analysis.op.outputs["result"].name - ], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=principal_component_analysis.op.inputs["data"], - ), - Input( - value=8, - definition=principal_component_analysis.op.inputs[ - "n_components" - ], - ), - ], - ): - self.assertTrue( - (10, 8) - == results[ - principal_component_analysis.op.outputs["result"].name - ].shape - ) - - async def test_singular_value_decomposition(self): - input_data, _ = make_classification( - n_samples=10, - n_features=10, - n_informative=8, - n_redundant=2, - random_state=7, - ) - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(singular_value_decomposition, GetSingle), - [ - Input( - value=[ - singular_value_decomposition.op.outputs["result"].name - ], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=singular_value_decomposition.op.inputs["data"], - ), - Input( - value=8, - definition=singular_value_decomposition.op.inputs[ - "n_components" - ], - ), - Input( - value=1, - definition=singular_value_decomposition.op.inputs[ - "n_iter" - ], - ), - Input( - value=7, - definition=singular_value_decomposition.op.inputs[ - "random_state" - ], - ), - ], - ): - self.assertTrue( - (10, 8) - == results[ - singular_value_decomposition.op.outputs["result"].name - ].shape, - ) - - async def test_simple_imputer(self): - input_data = [[np.nan, 2], [6, np.nan], [7, 6]] - output_data = [[6.5, 2], [6, 4], [7, 6]] - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(simple_imputer, GetSingle), - [ - Input( - value=[simple_imputer.op.outputs["result"].name], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=simple_imputer.op.inputs["data"], - ), - Input( - value=np.nan, - definition=simple_imputer.op.inputs["missing_values"], - ), - Input( - value="mean", - definition=simple_imputer.op.inputs["strategy"], - ), - ], - ): - self.assertTrue( - ( - results[simple_imputer.op.outputs["result"].name] - == output_data - ).all() - ) - - async def test_one_hot_encoder(self): - categories = [["Male", "Female"], [1, 2, 3]] - input_data = [["Female", 1], ["Male", 3]] - output_data = [[0.0, 1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 1.0]] - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(one_hot_encoder, GetSingle), - [ - Input( - value=[one_hot_encoder.op.outputs["result"].name], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=one_hot_encoder.op.inputs["data"], - ), - Input( - value=categories, - definition=one_hot_encoder.op.inputs["categories"], - ), - ], - ): - self.assertTrue( - ( - results[one_hot_encoder.op.outputs["result"].name] - == output_data - ).all() - ) - - async def test_standard_scaler(self): - input_data = [[0, 0], [0, 0], [1, 1], [1, 1]] - output_data = [[-1, -1], [-1, -1], [1, 1], [1, 1]] - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(standard_scaler, GetSingle), - [ - Input( - value=[standard_scaler.op.outputs["result"].name], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=standard_scaler.op.inputs["data"], - ), - ], - ): - self.assertTrue( - ( - results[standard_scaler.op.outputs["result"].name] - == output_data - ) - ) - - async def test_remove_whitespaces(self): - input_data = [[" ABC ", "XYD "], [" ABC", " XYD "]] - output_data = [["ABC", "XYD"], ["ABC", "XYD"]] - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(remove_whitespaces, GetSingle), - [ - Input( - value=[remove_whitespaces.op.outputs["result"].name], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=remove_whitespaces.op.inputs["data"], - ), - ], - ): - self.assertTrue( - ( - results[remove_whitespaces.op.outputs["result"].name] - == output_data - ).all() - ) - - async def test_ordinal_encoder(self): - input_data = [["x", "a"], ["x", "b"], ["y", "a"]] - output_data = [ - [1.0, 0.0, 1.0, 0.0], - [1.0, 0.0, 0.0, 1.0], - [0.0, 1.0, 1.0, 0.0], - ] - async for ctx, results in MemoryOrchestrator.run( - DataFlow.auto(ordinal_encoder, GetSingle), - [ - Input( - value=[ordinal_encoder.op.outputs["result"].name], - definition=GetSingle.op.inputs["spec"], - ), - Input( - value=input_data, - definition=ordinal_encoder.op.inputs["data"], - ), - ], - ): - self.assertTrue( - ( - results[ordinal_encoder.op.outputs["result"].name] - == output_data - ).all() - ) diff --git a/operations/innersource/.coveragerc b/operations/innersource/.coveragerc deleted file mode 100644 index 6c2f46b02b..0000000000 --- a/operations/innersource/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - dffml_operations_innersource - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/operations/innersource/.gitignore b/operations/innersource/.gitignore deleted file mode 100644 index 0edd3d2ad4..0000000000 --- a/operations/innersource/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ -built_html_docs/ -.tools/ diff --git a/operations/innersource/LICENSE b/operations/innersource/LICENSE deleted file mode 100644 index 61731767c7..0000000000 --- a/operations/innersource/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2021 johnsa1 - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/operations/innersource/MANIFEST.in b/operations/innersource/MANIFEST.in deleted file mode 100644 index 4a485c3c31..0000000000 --- a/operations/innersource/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.rst -include LICENSE -recursive-include dffml_operations_innersource * diff --git a/operations/innersource/README.rst b/operations/innersource/README.rst deleted file mode 100644 index f6eeba643b..0000000000 --- a/operations/innersource/README.rst +++ /dev/null @@ -1 +0,0 @@ -../common/README.rst \ No newline at end of file diff --git a/operations/innersource/dffml_operations_innersource/__init__.py b/operations/innersource/dffml_operations_innersource/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/innersource/dffml_operations_innersource/actions_validator.py b/operations/innersource/dffml_operations_innersource/actions_validator.py deleted file mode 100644 index 8cca11790d..0000000000 --- a/operations/innersource/dffml_operations_innersource/actions_validator.py +++ /dev/null @@ -1,86 +0,0 @@ -import logging -from typing import NewType - -import dffml - -# from .operations import ( -from dffml_operations_innersource.operations import ( - RepoDirectory, - ActionYAMLFileWorkflowUnixStylePath, -) - - -ActionsValidatorBinary = NewType("ActionsValidatorBinary", str) -ActionsValidatorResult = NewType("ActionsValidatorResult", dict) - - -@dffml.op -async def actions_validator( - repo_directory: RepoDirectory, - yaml_file_path: ActionYAMLFileWorkflowUnixStylePath, - actions_validator_binary: ActionsValidatorBinary, - *, - logger: logging.Logger = None, -) -> ActionsValidatorResult: - """ - >>> import asyncio - >>> import pathlib - >>> import tempfile - >>> - >>> import dffml - >>> from dffml_operations_innersource.actions_validator import ( - ... actions_validator, - ... ) - >>> from alice_test.shouldi.contribute.actions_validator import ( - ... ensure_actions_validator, - ... ) - >>> - >>> async def main(): - ... with tempfile.TemporaryDirectory() as tempdir: - ... yaml_file_path = pathlib.Path(tempdir).joinpath("action.yml") - ... await dffml.cached_download( - ... "https://raw.githubusercontent.com/mpalmer/action-validator/dd49fc0db4fc423b32704cc70ad80564d285ded7/test/002_basic_action/action.yml", - ... yaml_file_path, - ... "fddbaceb0c2d1779438f149db76896764c45a1adea3221b92e481c7a6a72c5ece33ccbb4ef42afc8d03d23b83d02ada9", - ... ) - ... actions_validator_binary = await ensure_actions_validator() - ... return await actions_validator( - ... tempdir, - ... yaml_file_path, - ... actions_validator_binary, - ... ) - >>> - >>> print(asyncio.run(main())) - True - """ - exit_code = -1 - stderr = "" - items = None - async for event, result in dffml.run_command_events( - [ - str(actions_validator_binary), - str(yaml_file_path), - ], - cwd=repo_directory, - logger=logger, - events=[ - dffml.Subprocess.STDOUT, - dffml.Subprocess.STDERR, - dffml.Subprocess.COMPLETED, - ], - raise_on_failure=False, - ): - if event is dffml.Subprocess.STDOUT and logger: - logger.debug("Passed validation: %s", result.decode()) - elif event is dffml.Subprocess.STDERR and logger: - stderr = result.decode() - logger.debug("Failed validation: %s", stderr) - # TODO Parse output into dict or data model - items = stderr - elif event is dffml.Subprocess.COMPLETED: - exit_code = result - return { - "pass": bool(exit_code == 0), - "exit_code": exit_code, - "items": items, - } diff --git a/operations/innersource/dffml_operations_innersource/cli.py b/operations/innersource/dffml_operations_innersource/cli.py deleted file mode 100644 index 7e215f6a8a..0000000000 --- a/operations/innersource/dffml_operations_innersource/cli.py +++ /dev/null @@ -1,232 +0,0 @@ -import sys -import json -import pathlib -import tempfile -import platform -import itertools -from typing import Dict, NewType - -import dffml - -import dffml_feature_git.feature.definitions -import dffml_feature_git.feature.operations - -from . import operations - - -@dffml.config -class EnsureTokeiConfig: - cache_dir: pathlib.Path = dffml.field("Cache directory to store downloads in",) - platform_urls: Dict[str, Dict[str, str]] = dffml.field( - "Mapping of platform.system() return values to tokei download URLs with hashes", - default_factory=lambda: { - "Linux": { - "url": "https://github.com/XAMPPRocky/tokei/releases/download/v10.1.1/tokei-v10.1.1-x86_64-unknown-linux-gnu.tar.gz", - "expected_hash": "b54fa0959e7a3a8935bd5cd86795b92e14d0a7b2cb6fb8f362b7b48198ce83e6dedc35a87e7c8fa405328f19d0ea6c47", - }, - "Darwin": { - "url": "https://github.com/XAMPPRocky/tokei/releases/download/v10.1.1/tokei-v10.1.1-x86_64-apple-darwin.tar.gz", - "expected_hash": "8c8a1d8d8dd4d8bef93dabf5d2f6e27023777f8553393e269765d7ece85e68837cba4374a2615d83f071dfae22ba40e2", - }, - }, - ) - - -import contextlib - - -@dffml.op( - config_cls=EnsureTokeiConfig, imp_enter={"stack": contextlib.AsyncExitStack,}, -) -async def ensure_tokei(self) -> str: - tokei = await dffml.cached_download_unpack_archive( - **{ - "file_path": self.parent.config.cache_dir.joinpath("tokei.tar.gz"), - "directory_path": self.parent.config.cache_dir.joinpath("tokei-download"), - # Use whatever values are appropriate for the system we are on - **self.parent.config.platform_urls[platform.system()], - } - ) - self.parent.stack.enter_context(dffml.prepend_to_path(tokei)) - return tokei.joinpath("tokei") - - -GitHubRepoID = NewType("GitHubRepoID", str) - - -@dffml.op -async def github_repo_id_to_clone_url( - self, repo_id: GitHubRepoID, -) -> dffml_feature_git.feature.definitions.URLType: - """ - Convert GitHub Integer Repository ID to Clonable URL. - """ - with tempfile.TemporaryDirectory() as tempdir: - # Write out the API query response to a file - api_response_contents_path = pathlib.Path(tempdir, "contents") - with open(api_response_contents_path , "wb") as stdout: - await dffml.run_command( - ["gh", "api", f"https://api.github.com/repositories/{repo_id}"], - stdout=stdout, - ) - stdout.seek(0) - # Parse in the response body as JSON - repository = json.loads(api_response_contents_path.read_text()) - return repository["clone_url"] - - -LocalRepoDirectory = NewType("LocalRepoDirectory", str) - - -@dffml.op( - inputs={ - "directory": LocalRepoDirectory, - }, - outputs={"repo": dffml_feature_git.feature.definitions.git_repository}, -) -async def local_repo_resolver( - self, directory: LocalRepoDirectory, -) -> dffml_feature_git.feature.definitions.git_repository.spec: - async for event, result in dffml.run_command_events( - ["git", "remote", "get-url", "origin"], - cwd=directory, - logger=self.logger, - raise_on_failure=False, - events=[dffml.Subprocess.STDOUT, dffml.Subprocess.COMPLETED], - ): - if event is dffml.Subprocess.STDOUT: - url = result.decode().strip() - if url.endswith(".git"): - url = url[:-4] - elif event is dffml.Subprocess.COMPLETED and result != 0: - raise RuntimeError("Failed to get local directory remote URL") - return {"repo": {"URL": url, "directory": directory}} - - -COLLECTOR_DATAFLOW = dffml.DataFlow( - dffml.GroupBy, - *dffml.opimp_in(dffml_feature_git.feature.operations), - *dffml.opimp_in(operations), - *dffml.opimp_in(sys.modules[__name__]), - # TODO(alice) Update to use the real overlay infra within run() - *itertools.chain( - *[ - dffml.object_to_operations(cls) - for cls in dffml.Overlay.load( - entrypoint="dffml.overlays.alice.shouldi.contribute", - ) - ], - ), - configs={ - ensure_tokei.op.name: EnsureTokeiConfig( - cache_dir=pathlib.Path( - ".tools", "open-architecture", "innersource", ".cache", "tokei", - ) - ), - }, -) -COLLECTOR_DATAFLOW.seed = [ - dffml.Input(value=1, definition=COLLECTOR_DATAFLOW.definitions["quarters"]), - dffml.Input( - value=True, definition=COLLECTOR_DATAFLOW.definitions["no_git_branch_given"], - ), - dffml.Input( - value=dict( - itertools.chain( - *[ - [ - (output.name, { - "group": output.name, - "by": "quarter", - "nostrict": True, - }) - for output in operation.outputs.values() - ] - for operation in COLLECTOR_DATAFLOW.operations.values() - ] - ) - ), - definition=COLLECTOR_DATAFLOW.definitions["group_by_spec"], - ), -] -COLLECTOR_DATAFLOW.operations[ - COLLECTOR_DATAFLOW.operations["lines_of_code_by_language"].name -] = COLLECTOR_DATAFLOW.operations[ - COLLECTOR_DATAFLOW.operations["lines_of_code_by_language"].name -]._replace( - conditions=[ensure_tokei.op.outputs["result"]] -) -COLLECTOR_DATAFLOW.update(auto_flow=True) -# Operations which should take inputs from other operations in flow and seed -# MUST have their input flow modified to add the seed origin to the allowlist. -for operation_name, (input_name, origins) in [ - (dffml_feature_git.feature.operations.clone_git_repo.op.name, ("URL", ["seed",])), - (dffml_feature_git.feature.operations.check_if_valid_git_repository_URL.op.name, ("URL", ["seed",])), -]: - COLLECTOR_DATAFLOW.flow[operation_name].inputs[input_name].extend(origins) -COLLECTOR_DATAFLOW.update_by_origin() - - -import copy -import dataclasses -import dffml.cli.dataflow - - -DEFAULT_SOURCE = dffml.JSONSource( - filename=pathlib.Path(".tools", "open-architecture", "innersource", "repos.json",), - readwrite=True, - allowempty=True, - mkdirs=True, -) - - -# NOTE When CLI and operations are merged: All this is the same stuff that will -# happen to Operation config_cls structures. We need a more ergonomic API to -# obsucre the complexity dataclasses introduces when modifying fields/defaults -# within subclasses. -for dffml_cli_class_name, field_modifications in { - "RunAllRecords": { - # metadata setting could be less awkward - "dataflow": {"default": COLLECTOR_DATAFLOW}, - "record_def": {"default": COLLECTOR_DATAFLOW.definitions["URL"].name}, - "sources": {"default_factory": lambda: dffml.Sources(DEFAULT_SOURCE)}, - }, - "RunRecordSet": { - "dataflow": {"default": COLLECTOR_DATAFLOW}, - "record_def": {"default": COLLECTOR_DATAFLOW.definitions["URL"].name}, - "sources": {"default_factory": lambda: dffml.Sources(DEFAULT_SOURCE)}, - }, - "Diagram": {"dataflow": {"default": COLLECTOR_DATAFLOW,},}, -}.items(): - # Create the class and config names by prepending InnerSource - new_class_name = "InnerSource" + dffml_cli_class_name - # Create a derived class - new_class = getattr(dffml.cli.dataflow, dffml_cli_class_name).subclass( - new_class_name, field_modifications, - ) - # Add our new class to the global namespace - setattr( - sys.modules[__name__], new_class.CONFIG.__qualname__, new_class.CONFIG, - ) - setattr( - sys.modules[__name__], new_class.__qualname__, new_class, - ) - - -class InnerSourceRunRecords(dffml.CMD): - """Run DataFlow and assign output to a record""" - - _set = InnerSourceRunRecordSet - _all = InnerSourceRunAllRecords - - -class InnerSourceRun(dffml.CMD): - """Run dataflow""" - - records = InnerSourceRunRecords - - -class InnerSourceCLI(dffml.CMD): - - run = InnerSourceRun - diagram = InnerSourceDiagram diff --git a/operations/innersource/dffml_operations_innersource/npm_groovy_lint.py b/operations/innersource/dffml_operations_innersource/npm_groovy_lint.py deleted file mode 100644 index eb6a49f228..0000000000 --- a/operations/innersource/dffml_operations_innersource/npm_groovy_lint.py +++ /dev/null @@ -1,183 +0,0 @@ -import json -import signal -import asyncio -import pathlib -import logging -import contextlib -from typing import NewType - -import dffml - -# from .operations import ( -from dffml_operations_innersource.operations import ( - RepoDirectory, - GroovyFileWorkflowUnixStylePaths, -) - - -NPMGroovyLintCMD = NewType("NPMGroovyLintCMD", list[str]) -NPMGroovyLintResult = NewType("NPMGroovyLintResult", str) -JavaBinary = NewType("JavaBinary", str) -CodeNarcServerProc = NewType("CodeNarcServerProc", object) -CodeNarcServerReturnCode = NewType("CodeNarcServerReturnCode", int) - - -class CouldNotResolvePathToNPMGroovyLintInstallError(Exception): - pass - - -class CodeNarcServerUnknownFailure(Exception): - pass - - -@contextlib.asynccontextmanager -async def code_narc_server( - java_binary: JavaBinary, - npm_groovy_lint_cmd: NPMGroovyLintCMD, - *, - env: dict = None, - logger: logging.Logger = None, -) -> CodeNarcServerProc: - # Path to compiled CodeNarcServer within released package - npm_groovy_lint_path = npm_groovy_lint_cmd[-1] - if isinstance(npm_groovy_lint_path, str): - npm_groovy_lint_path = pathlib.Path(npm_groovy_lint_path) - if not npm_groovy_lint_path.exists(): - npm_groovy_lint_path = dffml.which(npm_groovy_lint_path.name) - if not isinstance(npm_groovy_lint_path, pathlib.Path): - raise CouldNotResolvePathToNPMGroovyLintInstallError(npm_groovy_lint_cmd) - java_lib_path = npm_groovy_lint_path.resolve().parents[1].joinpath( - "lib", "java", - ) - # Run the server - proc = None - # TODO Port is currently hardcoded, recompile? src/ files in npm-groovy-lint - async for event, result in dffml.run_command_events( - [ - - java_binary, - "-Djava.net.useSystemProxies=true", - "-Xms256m", - "-Xmx2048m", - "-cp", - ( - str(java_lib_path.joinpath("CodeNarcServer.jar").resolve()) - + ":" - + str(java_lib_path.joinpath("*").resolve()) - ), - "com.nvuillam.CodeNarcServer", - "--server", - r"includes='{}/.groovy'", - ], - env=env, - logger=logger, - events=[ - dffml.Subprocess.CREATED, - dffml.Subprocess.COMPLETED, - ], - raise_on_failure=False, - ): - if event is dffml.Subprocess.CREATED: - proc = result - # TODO Ask for STDOUT_READLINE and wait to yield until we know we - # can hit the HTTP server? - try: - yield proc - finally: - # Send Ctrl-C to exit cleanly - with contextlib.suppress(ProcessLookupError): - proc.send_signal(signal.SIGINT) - elif event is dffml.Subprocess.COMPLETED: - # Clean exit triggered by Ctrl-C will have a return code as follows - if result not in (130, -2): - raise CodeNarcServerUnknownFailure(f"Exit code: {result}") - - -@dffml.op -async def start_code_narc_server( - java_binary: JavaBinary, - npm_groovy_lint_cmd: NPMGroovyLintCMD, - *, - env: dict = None, - logger: logging.Logger = None, -) -> CodeNarcServerProc: - proc_context_manager = code_narc_server( - java_binary, - npm_groovy_lint_cmd, - env=env, - logger=logger, - ) - proc_context_manager.proc = await proc_context_manager.__aenter__() - return proc_context_manager - - -@dffml.op( - stage=dffml.Stage.CLEANUP, -) -async def stop_code_narc_server( - proc: CodeNarcServerProc, - *, - env: dict = None, - logger: logging.Logger = None, -) -> CodeNarcServerReturnCode: - await proc.__aexit__(None, None, None) - return proc.proc.returncode - - -@dffml.op -async def npm_groovy_lint( - repo_directory: RepoDirectory, - java_binary: JavaBinary, - # TODO Port for code narc is currently hardcoded, upstream fix and use here. - _code_narc_proc: CodeNarcServerProc, - npm_groovy_lint_cmd: NPMGroovyLintCMD, - groovy_paths: GroovyFileWorkflowUnixStylePaths, - *, - env: dict = None, - logger: logging.Logger = None, -) -> NPMGroovyLintResult: - if not groovy_paths: - return - # Check for config file - config_args = [] - npmgroovylintrc_paths = list(pathlib.Path(repo_directory).rglob(".groovylintrc.json")) - if npmgroovylintrc_paths: - if logger and len(npmgroovylintrc_paths) > 1: - logger.warning("Choosing first config file of multiple found: %r", npmgroovylintrc_paths) - config_args = ["--config", npmgroovylintrc_paths[0]] - cmd = [ - *npm_groovy_lint_cmd, - *config_args, - "--noserver", - # It will try to install java unless we give it one - "--javaexecutable", - java_binary, - "--output", - "json", - "--", - *groovy_paths, - ] - if logger: - logger.debug("cmd: %r", cmd) - proc = await asyncio.create_subprocess_exec( - *cmd, - cwd=repo_directory, - env=env, - stdout=asyncio.subprocess.PIPE, - ) - work = { - asyncio.create_task(proc.wait()): "wait", - asyncio.create_task(proc.communicate()): "communicate", - } - async for event, result in dffml.concurrently(work): - if event == "communicate": - parsed_result = json.loads(result[0]) - return { - **parsed_result, - **{ - "files": { - str(pathlib.Path(path).relative_to(repo_directory)): value - for path, value in parsed_result.get("files", {}).items() - } - } - } diff --git a/operations/innersource/dffml_operations_innersource/operations.py b/operations/innersource/dffml_operations_innersource/operations.py deleted file mode 100644 index 586cc1a4e3..0000000000 --- a/operations/innersource/dffml_operations_innersource/operations.py +++ /dev/null @@ -1,317 +0,0 @@ -import pathlib -import logging -import datetime -import itertools -from typing import List, NewType - -import dffml -from dffml_feature_git.feature.definitions import ( - git_repository_checked_out, - quarter_start_date, -) - - -GitHubActionsWorkflowUnixStylePath = NewType("GitHubActionsWorkflowUnixStylePath", str) -JenkinsfileWorkflowUnixStylePath = NewType("JenkinsfileWorkflowUnixStylePath", str) -GroovyFileWorkflowUnixStylePath = NewType("GroovyFileWorkflowUnixStylePath", str) -GroovyFileWorkflowUnixStylePaths = NewType("GroovyFileWorkflowUnixStylePaths", list[GroovyFileWorkflowUnixStylePath ]) -ActionYAMLFileWorkflowUnixStylePath = NewType("ActionYAMLFileWorkflowUnixStylePath", str) -ActionYAMLFileWorkflowUnixStylePaths = NewType("ActionYAMLFileWorkflowUnixStylePaths", list[ActionYAMLFileWorkflowUnixStylePath]) -IsGitHubAction = NewType("IsGitHubAction", bool) -IsJenkinsLibrary = NewType("IsJenkinsLibrary", bool) - - -def relative_paths( - directory: str, - paths: List[str], -): - return [ - path.relative_to(directory) - for path in paths - ] - - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={"result": GitHubActionsWorkflowUnixStylePath}, - expand=["result"], -) -def github_workflows(self, repo: git_repository_checked_out.spec) -> dict: - return { - "result": map( - str, - relative_paths( - repo.directory, - pathlib.Path(repo.directory, ".github", "workflows").glob("*.yml"), - ), - ), - } - - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={"result": JenkinsfileWorkflowUnixStylePath}, - expand=["result"], -) -def jenkinsfiles(self, repo: git_repository_checked_out.spec) -> dict: - return { - "result": map( - str, - relative_paths( - repo.directory, - pathlib.Path(repo.directory).rglob("**/*Jenkinsfile") - ), - ), - } - - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={ - "is_jenkins_library": IsJenkinsLibrary, - "groovy_files": GroovyFileWorkflowUnixStylePaths, - "groovy_file": GroovyFileWorkflowUnixStylePath, - }, - expand=["groovy_file"], -) -def groovy_files(self, repo: git_repository_checked_out.spec) -> dict: - list_of_groovy_files = list( - map( - str, - relative_paths( - repo.directory, - [ - *pathlib.Path(repo.directory).rglob("vars/*.groovy"), - *pathlib.Path(repo.directory).rglob("src/**/*.groovy"), - ], - ), - ), - ) - return { - "is_jenkins_library": bool(list_of_groovy_files), - "groovy_files": list_of_groovy_files, - "groovy_file": list_of_groovy_files, - } - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={ - "actions": ActionYAMLFileWorkflowUnixStylePaths, - "action": ActionYAMLFileWorkflowUnixStylePath, - "is_github_action": IsGitHubAction, - }, - expand=["action"], -) -def action_yml_files(self, repo: git_repository_checked_out.spec) -> dict: - list_of_action_yml_files = list( - pathlib.Path(repo.directory).rglob("**/action.yml") - ) - # Remove YAML files that are not GitHub Actions (for example if someone - # named a workflow action.yml). - remove_paths = set() - for action_path in list_of_action_yml_files: - action_text = action_path.read_text(errors="backslashreplace") - action_text = action_text.replace("\r", "") - # Look for runs: at top level - if not "runs:" in action_text.split("\n"): - remove_paths.add(action_path) - for remove_path in remove_paths: - list_of_action_yml_files.remove(remove_path) - # Conver to repo relative paths - list_of_action_yml_files = list( - map( - str, - relative_paths( - repo.directory, - list_of_action_yml_files, - ), - ), - ) - return { - "is_github_action": bool(list_of_action_yml_files), - "actions": list_of_action_yml_files, - "action": list_of_action_yml_files, - } - - -FileReadmePresent = NewType("FileReadmePresent", bool) -FileContributingPresent = NewType("FileContributingPresent", bool) -FileCodeOfConductPresent = NewType("FileCodeOfConductPresent", bool) -FileSecurityPresent = NewType("FileSecurityPresent", bool) -FileSupportPresent = NewType("FileSupportPresent", bool) - - -@dffml.op(inputs={"repo": git_repository_checked_out,},) -def readme_present(self, repo: git_repository_checked_out.spec) -> FileReadmePresent: - return any( - [ - path - for path in pathlib.Path(repo.directory).iterdir() - if "readme" == path.stem.lower() - ] - ) - - -@dffml.op(inputs={"repo": git_repository_checked_out,},) -def contributing_present(self, repo: git_repository_checked_out.spec) -> FileContributingPresent: - return any( - [ - pathlib.Path(repo.directory, "CONTRIBUTING.md").is_file(), - pathlib.Path(repo.directory, "CONTRIBUTING.rst").is_file() - ] - ) - - -# TODO Check compliance with RFC 9116 -@dffml.op(inputs={"repo": git_repository_checked_out,},) -def security_present(self, repo: git_repository_checked_out.spec) -> FileSecurityPresent: - return any( - [ - pathlib.Path(repo.directory, "SECURITY.md").is_file(), - pathlib.Path(repo.directory, "SECURITY.rst").is_file(), - pathlib.Path(repo.directory, "SECURITY.txt").is_file(), - pathlib.Path(repo.directory, "security.txt").is_file(), - ] - ) - - -@dffml.op(inputs={"repo": git_repository_checked_out,},) -def support_present(self, repo: git_repository_checked_out.spec) -> FileSupportPresent: - return any( - [ - pathlib.Path(repo.directory, "SUPPORT.md").is_file(), - pathlib.Path(repo.directory, "SUPPORT.rst").is_file(), - ] - ) - - -@dffml.op(inputs={"repo": git_repository_checked_out,},) -def code_of_conduct_present(self, repo: git_repository_checked_out.spec) -> FileCodeOfConductPresent: - return any( - [ - pathlib.Path(repo.directory, "CODE_OF_CONDUCT.md").is_file(), - pathlib.Path(repo.directory, "CODE_OF_CONDUCT.rst").is_file(), - ] - ) - - -# TODO Auto definition code which is about to undergo refactor will fix up this -# oddness with typing and half abilty to have auto inputs with types. -@dffml.op(inputs={}, outputs={"result": quarter_start_date}) -def get_current_datetime_as_git_date(): - return { - "result": datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), - } - - -@dffml.config -class MaintainedConfig: - commits: int = dffml.field( - "Equal or greater to this number of commits in the last quarter results in a return value of True", - default=1, - ) - - -@dffml.op( - inputs={ - "results": dffml.GroupBy.op.outputs["output"], - }, - config_cls=MaintainedConfig, - stage=dffml.Stage.OUTPUT, -) -def maintained(results: dict) -> bool: - # As an example, if there is one commit in the last period (quarter), return - # maintained (True for the maintained opreation for this input data). - if results["commits"][-1] >= self.config_cls.commits: - return True - - -@dffml.config -class UnmaintainedConfig: - commits: int = dffml.field( - "Any less than this number of commits in the last quarter results in a return value of True", - default=1, - ) - - -@dffml.op( - inputs={ - "results": dffml.GroupBy.op.outputs["output"], - }, - stage=dffml.Stage.OUTPUT, - config_cls=UnmaintainedConfig, -) -def unmaintained(self, results: dict) -> bool: - # As an example, if there are no commits in the last quarter, return - # unmaintained (True for the unmaintained opreation for this input data). - if results["commits"][-1] < self.config_cls.commits: - return True - - -# TODO We may not need stage anymore, need to see if we should depricate -@dffml.op( - stage=dffml.Stage.OUTPUT, conditions=[maintained.op.outputs["result"]], -) -def badge_maintained() -> str: - return "https://img.shields.io/badge/Maintainance-Active-green" - - -@dffml.op( - stage=dffml.Stage.OUTPUT, conditions=[unmaintained.op.outputs["result"]], -) -def badge_unmaintained() -> str: - return "https://img.shields.io/badge/Maintainance-Inactive-red" - - -RepoDirectory = NewType("RepoDirectory", str) - - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={"result": RepoDirectory}, -) -def repo_directory(self, repo: git_repository_checked_out.spec) -> RepoDirectory: - # How did this not exist? I think it does somwhere else, another branch - return {"result": repo.directory} - - -RepoURL = NewType("RepoURL", str) - - -@dffml.op( - inputs={"repo": git_repository_checked_out,}, - outputs={"result": RepoURL}, -) -def repo_url(self, repo: git_repository_checked_out.spec) -> RepoURL: - """ - Helper opertion to expose repo URL of checked out repo object. - - TODO Remove this in favor of some kind of mapping extract style on objects - ref engineering logs for more notes on @op.mapping.extract style decorator. - """ - return {"result": repo.URL} - - -HasDocs = NewType("HasDocs", dict) - - -@dffml.op -def has_docs( - repo_directory: RepoDirectory, - readme_present: FileReadmePresent, - *, - logger: logging.Logger = None, -) -> HasDocs: - # TODO Refactor this, ideally support regex and or open policy agent - check_files_or_strings = ("support", "usage", "example", "known issues") - output = dict(zip(["readme_present", *check_files_or_strings], [False] * 5)) - for path in pathlib.Path(repo_directory).iterdir(): - if "readme" == path.stem.lower(): - output["readme_present"] = True - for check in check_files_or_strings: - if check in path.read_text(errors='backslashreplace').lower(): - output[check] = True - for check in check_files_or_strings: - if check.replace(" ", "_") == path.stem.lower(): - output[check] = True - return output diff --git a/operations/innersource/dffml_operations_innersource/version.py b/operations/innersource/dffml_operations_innersource/version.py deleted file mode 100644 index 901e5110b2..0000000000 --- a/operations/innersource/dffml_operations_innersource/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "0.0.1" diff --git a/operations/innersource/entry_points.txt b/operations/innersource/entry_points.txt deleted file mode 100644 index 6843b583b5..0000000000 --- a/operations/innersource/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[dffml.service.cli] -innersource = dffml_operations_innersource.cli:InnerSourceCLI diff --git a/operations/innersource/pyproject.toml b/operations/innersource/pyproject.toml deleted file mode 100644 index 17b1235941..0000000000 --- a/operations/innersource/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] -build-backend = "setuptools.build_meta" - -[tool.setuptools_scm] - -[tool.black] -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/operations/innersource/setup.cfg b/operations/innersource/setup.cfg deleted file mode 100644 index bbe2596188..0000000000 --- a/operations/innersource/setup.cfg +++ /dev/null @@ -1,40 +0,0 @@ -[metadata] -name = dffml-operations-innersource -version = attr: dffml_operations_innersource.version.VERSION -description = DFFML Operations for measureing org health -long_description = file: README.rst -author = John Andersen -author_email = johnandersenpdx@gmail.com -maintainer = John Andersen -maintainer_email = johnandersenpdx@gmail.com -url = https://github.com/intel/dffml/blob/master/operations/innersource -license = MIT -keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -zip_safe = False -include_package_data = True -packages = find: -entry_points = file: entry_points.txt -install_requires = - dffml>=0.4.0 - pyyaml>=6.0 - -[options.extras_require] -dev = - coverage - codecov - sphinx - twine - black==19.10b0 - importlib_metadata>=4.8.1;python_version<"3.8" diff --git a/operations/innersource/setup.py b/operations/innersource/setup.py deleted file mode 100644 index 17542f4d0e..0000000000 --- a/operations/innersource/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup() diff --git a/operations/innersource/tests/__init__.py b/operations/innersource/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/innersource/tests/test_operations.py b/operations/innersource/tests/test_operations.py deleted file mode 100644 index cfd08b006f..0000000000 --- a/operations/innersource/tests/test_operations.py +++ /dev/null @@ -1,101 +0,0 @@ -import sys -import copy -import pathlib -import itertools - -from dffml.df.types import Input, DataFlow -from dffml.df.base import opimp_in -from dffml.df.memory import MemoryOrchestrator -from dffml.operation.output import GetSingle -from dffml.util.asynctestcase import AsyncTestCase - -from dffml_operations_innersource.operations import * -from dffml_feature_git.feature.operations import ( - check_if_valid_git_repository_URL, - clone_git_repo, - cleanup_git_repo, -) - -OPIMPS = opimp_in(sys.modules[__name__]) - -DFFML_ROOT_DIR = pathlib.Path(__file__).parents[3] - -DATAFLOW = DataFlow.auto( - *OPIMPS, -) - - -class TestOperations(AsyncTestCase): - async def test_run(self): - dataflow = copy.deepcopy(DATAFLOW) - # Tell the dataflow to accept repo inputs with an origin of seed (the - # default origin for when inputs are added on dataflow start). Where the - # input definition name is the name of the repo definition. - dataflow.flow[github_workflow_present.op.name].inputs["repo"] += [ - {"seed": [github_workflow_present.op.inputs["repo"].name]}, - ] - # Update flow mappings - dataflow.update() - await self.assertRunDataFlow(dataflow, { - "dffml": ( - [ - Input( - value=github_workflow_present.op.inputs["repo"].spec( - directory=DFFML_ROOT_DIR, - ), - definition=github_workflow_present.op.inputs["repo"], - ), - Input( - value=list(itertools.chain(*[ - [ - definition.name - for definition in opimp.op.outputs.values() - ] - for opimp in OPIMPS - # The operations we don't care to compare outputs - if opimp.op.name not in [ - GetSingle.op.name, - clone_git_repo.op.name, - check_if_valid_git_repository_URL.op.name, - ] - ])), - definition=GetSingle.op.inputs["spec"], - ), - ], - { - github_workflow_present.op.outputs["result"].name: True - }, - ) - }) - - async def test_on_repos(self): - dataflow = copy.deepcopy(DATAFLOW) - await self.assertRunDataFlow(dataflow, { - "dffml": ( - [ - Input( - value="https://github.com/pdxjohnny/httptest", - definition=clone_git_repo.op.inputs["URL"], - ), - Input( - value=list(itertools.chain(*[ - [ - definition.name - for definition in opimp.op.outputs.values() - ] - for opimp in OPIMPS - # The operations we don't care to compare outputs - if opimp.op.name not in [ - GetSingle.op.name, - clone_git_repo.op.name, - check_if_valid_git_repository_URL.op.name, - ] - ])), - definition=GetSingle.op.inputs["spec"], - ), - ], - { - github_workflow_present.op.outputs["result"].name: True, - }, - ) - }) diff --git a/operations/neo4j/.coveragerc b/operations/neo4j/.coveragerc deleted file mode 100644 index 86b204bec4..0000000000 --- a/operations/neo4j/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - dffml_operations_neo4j - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/operations/neo4j/.gitignore b/operations/neo4j/.gitignore deleted file mode 100644 index 3af0b3e081..0000000000 --- a/operations/neo4j/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ -built_html_docs/ diff --git a/operations/neo4j/Dockerfile b/operations/neo4j/Dockerfile deleted file mode 100644 index 448f3b5b4b..0000000000 --- a/operations/neo4j/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Usage -# docker build -t pdxjohnny/dffml_operations_neo4j . -# docker run --rm -ti -p 80:8080 pdxjohnny/dffml_operations_neo4j -insecure -log debug -# -# curl -v http://127.0.0.1:80/list/sources -FROM ubuntu:22.04@sha256:6042500cf4b44023ea1894effe7890666b0c5c7871ed83a97c36c76ae560bb9b - -RUN apt-get update && \ - apt-get install -y \ - gcc \ - python3-dev \ - python3-pip \ - python3 \ - ca-certificates && \ - python3 -m pip install -U pip && \ - python3 -m pip install dffml-service-http && \ - apt-get purge -y \ - gcc \ - python3-dev && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /usr/src/app -COPY . /usr/src/app - -RUN python3 -m pip install -e .[dev] - -ENTRYPOINT ["python3", "-m", "dffml", "service", "http", "server", "-addr", "0.0.0.0"] -CMD ["-mc-config", "dffml_operations_neo4j/deploy"] diff --git a/operations/neo4j/LICENSE b/operations/neo4j/LICENSE deleted file mode 100644 index ebaa2d3d08..0000000000 --- a/operations/neo4j/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2023 Intel - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/operations/neo4j/MANIFEST.in b/operations/neo4j/MANIFEST.in deleted file mode 100644 index 09138e7daf..0000000000 --- a/operations/neo4j/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.rst -include LICENSE -recursive-include dffml_operations_neo4j * diff --git a/operations/neo4j/README.rst b/operations/neo4j/README.rst deleted file mode 100644 index f687c62017..0000000000 --- a/operations/neo4j/README.rst +++ /dev/null @@ -1,10 +0,0 @@ -Package Name -============ - -Package description - -Install from pip - -.. code-block:: console - - $ pip install package diff --git a/operations/neo4j/dffml_operations_neo4j/__init__.py b/operations/neo4j/dffml_operations_neo4j/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/operations/neo4j/dffml_operations_neo4j/operations.py b/operations/neo4j/dffml_operations_neo4j/operations.py deleted file mode 100644 index a4089f487f..0000000000 --- a/operations/neo4j/dffml_operations_neo4j/operations.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import List - -from dffml.df.base import op - -from .definitions import calc_string, is_add, is_mult, numbers, result - - -@op(inputs={"numbers": numbers}, outputs={"sum": result}, conditions=[is_add]) -async def calc_add(numbers: List[int]): - """ - Sum of a list of numbers - """ - return {"sum": sum(numbers)} - - -@op( - inputs={"numbers": numbers}, - outputs={"product": result}, - conditions=[is_mult], -) -async def calc_mult(numbers: List[int]): - """ - Multiply a list of numbers together - """ - product = 1 - for number in numbers: - product *= number - return {"product": product} - - -@op( - inputs={"line": calc_string}, - outputs={"add": is_add, "mult": is_mult, "numbers": numbers}, -) -async def calc_parse_line(line: str): - """ - Parse a line which holds the English form of a math calculation to be done - """ - return { - "add": "add" in line, - "mult": "mult" in line, - "numbers": [int(item) for item in line.split() if item.isdigit()], - } diff --git a/operations/neo4j/entry_points.txt b/operations/neo4j/entry_points.txt deleted file mode 100644 index 4312c791e5..0000000000 --- a/operations/neo4j/entry_points.txt +++ /dev/null @@ -1,4 +0,0 @@ -[dffml.operation] -calc_add = dffml_operations_neo4j.operations:calc_add -calc_mult = dffml_operations_neo4j.operations:calc_mult -calc_parse_line = dffml_operations_neo4j.operations:calc_parse_line diff --git a/operations/neo4j/pyproject.toml b/operations/neo4j/pyproject.toml deleted file mode 100644 index 17b1235941..0000000000 --- a/operations/neo4j/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] -build-backend = "setuptools.build_meta" - -[tool.setuptools_scm] - -[tool.black] -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/operations/neo4j/setup.cfg b/operations/neo4j/setup.cfg deleted file mode 100644 index 92fd20cbfb..0000000000 --- a/operations/neo4j/setup.cfg +++ /dev/null @@ -1,41 +0,0 @@ -[metadata] -name = dffml-operations-neo4j -description = DFFML operations dffml-operations-neo4j -long_description = file: README.rst -author = Unknown -author_email = unknown@example.com -maintainer = Unknown -maintainer_email = unknown@example.com -url = https://github.com/pdxjohnny/dffml-operations-neo4j -license = MIT -keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -zip_safe = False -include_package_data = True -packages = find: -entry_points = file: entry_points.txt -setup_requires = - setuptools_scm[toml]>=3.4.3 -install_requires = - dffml>=0.4.0 - -[options.extras_require] -dev = - coverage - codecov - sphinx - twine - setuptools_scm[toml]>=3.4.3 - black==23.3.0 - importlib_metadata>=4.8.1;python_version<"3.8" diff --git a/operations/neo4j/setup.py b/operations/neo4j/setup.py deleted file mode 100644 index cc5beb58f6..0000000000 --- a/operations/neo4j/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup(use_scm_version=True) diff --git a/operations/neo4j/tests/__init__.py b/operations/neo4j/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/activitypub-webhook-relay/.coveragerc b/service/activitypub-webhook-relay/.coveragerc deleted file mode 100644 index 4d91a6367d..0000000000 --- a/service/activitypub-webhook-relay/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - activitypub_webhook_relay - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/service/activitypub-webhook-relay/.gitignore b/service/activitypub-webhook-relay/.gitignore deleted file mode 100644 index 3af0b3e081..0000000000 --- a/service/activitypub-webhook-relay/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ -built_html_docs/ diff --git a/service/activitypub-webhook-relay/CONTRIBUTING.rst b/service/activitypub-webhook-relay/CONTRIBUTING.rst deleted file mode 100644 index 4297b798a7..0000000000 --- a/service/activitypub-webhook-relay/CONTRIBUTING.rst +++ /dev/null @@ -1,15 +0,0 @@ -CONTRIBUTING -============ - -Create new virtual environment - -.. code-block:: console - - $ python -m venv .venv - $ . .venv/bin/activate - -Install in development mode - -.. code-block:: console - - $ pip install -e .[dev] ../../ diff --git a/service/activitypub-webhook-relay/LICENSE b/service/activitypub-webhook-relay/LICENSE deleted file mode 100644 index ebaa2d3d08..0000000000 --- a/service/activitypub-webhook-relay/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2023 Intel - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/service/activitypub-webhook-relay/MANIFEST.in b/service/activitypub-webhook-relay/MANIFEST.in deleted file mode 100644 index 318a248f9c..0000000000 --- a/service/activitypub-webhook-relay/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.rst -include LICENSE -recursive-include activitypub_webhook_relay * diff --git a/service/activitypub-webhook-relay/README.rst b/service/activitypub-webhook-relay/README.rst deleted file mode 100644 index c23beadf01..0000000000 --- a/service/activitypub-webhook-relay/README.rst +++ /dev/null @@ -1,15 +0,0 @@ -ActivityPub Webhook Relay -========================= - -Relays webhooks into the federated event space (ActivityPub). - -Install from pip - -.. code-block:: console - - $ pip install activitypub-webhook-relay - -References -********** - -- https://github.com/intel/dffml/issues/1315 diff --git a/service/activitypub-webhook-relay/activitypub_webhook_relay/__init__.py b/service/activitypub-webhook-relay/activitypub_webhook_relay/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/activitypub-webhook-relay/activitypub_webhook_relay/version.py b/service/activitypub-webhook-relay/activitypub_webhook_relay/version.py deleted file mode 100644 index 7723ca46a5..0000000000 --- a/service/activitypub-webhook-relay/activitypub_webhook_relay/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "0.0.0" diff --git a/service/activitypub-webhook-relay/pyproject.toml b/service/activitypub-webhook-relay/pyproject.toml deleted file mode 100644 index 3ba7736eac..0000000000 --- a/service/activitypub-webhook-relay/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] -build-backend = "setuptools.build_meta" - -[tool.black] -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/service/activitypub-webhook-relay/setup.cfg b/service/activitypub-webhook-relay/setup.cfg deleted file mode 100644 index 5bafd8fb65..0000000000 --- a/service/activitypub-webhook-relay/setup.cfg +++ /dev/null @@ -1,43 +0,0 @@ -[metadata] -name = activitypub-webhook-relay -description = DFFML service activitypub-webhook-relay -version = attr: activitypub_webhook_relay.version.VERSION -long_description = file: README.rst -author = Unknown -author_email = unknown@example.com -maintainer = Unknown -maintainer_email = unknown@example.com -url = https://github.com/intel/dffml/tree/main/service/activitypub-webhook-relay -license = MIT -# keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -zip_safe = False -include_package_data = True -packages = find: -# entry_points = file: entry_points.txt -install_requires = - quart - tomli_w - bovine -# dffml>=0.4.0 - -[options.extras_require] -dev = - coverage - codecov - sphinx - twine - setuptools_scm[toml]>=3.4.3 - black==23.7.0 - importlib_metadata>=4.8.1;python_version<"3.8" diff --git a/service/activitypub-webhook-relay/setup.py b/service/activitypub-webhook-relay/setup.py deleted file mode 100644 index cc5beb58f6..0000000000 --- a/service/activitypub-webhook-relay/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup(use_scm_version=True) diff --git a/service/activitypub-webhook-relay/tests/__init__.py b/service/activitypub-webhook-relay/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/ossse/.coveragerc b/service/ossse/.coveragerc deleted file mode 100644 index e244a1983b..0000000000 --- a/service/ossse/.coveragerc +++ /dev/null @@ -1,15 +0,0 @@ -[run] -source = - ossse - tests -branch = True -omit = - ossse/cli.py - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/service/ossse/.gitattributes b/service/ossse/.gitattributes deleted file mode 100644 index dfe0770424..0000000000 --- a/service/ossse/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto diff --git a/service/ossse/.gitignore b/service/ossse/.gitignore deleted file mode 100644 index 9df07fedf8..0000000000 --- a/service/ossse/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -*test_cache/ -htmlcov/ -*.json diff --git a/service/ossse/LICENSE b/service/ossse/LICENSE deleted file mode 100644 index acd0d6d786..0000000000 --- a/service/ossse/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2017-2022 Intel - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/service/ossse/MANIFEST.in b/service/ossse/MANIFEST.in deleted file mode 100644 index a5021c60e3..0000000000 --- a/service/ossse/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include README.rst -include LICENSE diff --git a/service/ossse/README.rst b/service/ossse/README.rst deleted file mode 100644 index 070f083b97..0000000000 --- a/service/ossse/README.rst +++ /dev/null @@ -1,6 +0,0 @@ -Open Source Software Security Evaluation -======================================== - -.. code-block:: console - - ossse server -addr 0.0.0.0 -log debug -admin json=/home/johnsa1/.config/ossse.json diff --git a/service/ossse/ossse/__init__.py b/service/ossse/ossse/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/ossse/ossse/cli.py b/service/ossse/ossse/cli.py deleted file mode 100644 index d1fdd383ad..0000000000 --- a/service/ossse/ossse/cli.py +++ /dev/null @@ -1,464 +0,0 @@ -''' -Command line interface evaluates packages given their source URLs -''' -import os -import re -import glob -import json -import asyncio -import pathlib -import datetime -import traceback -import pkg_resources -from functools import partial -from typing import Union - -import motor.motor_asyncio -from aiohttp import web, WSMsgType - -import dffml -# TODO, is this still here? -# from dffml.util.monitor import Monitor, Task - -from cvemap.cvedb import CVEDB, Client -from cvemap.cvemap import CVEMap - -from .log import LOGGER - -LOGGER = LOGGER.getChild('cli') - -class DB(object): - - def __init__(self, uri=os.environ.get("DATABASE_CONNECTION_STRING", 'mongodb://localhost:27017')): - self.client = motor.motor_asyncio.AsyncIOMotorClient(uri) - self.conn = self.client['wl']['items'] - - async def total(self): - return await self.conn.count_documents({}) - - async def page(self, page_size, page_num): - skips = page_size * (page_num - 1) - cursor = self.conn.find() - if skips > 0: - cursor.skip(skips) - cursor.limit(page_size) - async for document in cursor: - yield document - - -@dffml.config -class DBImportConfig: - sources: dffml.Sources = dffml.field( - "Sources to import from into mongodb", - ) - - -class DBImport(dffml.CMD): - CONFIG = DBImportConfig - - async def run(self): - self.db = DB() - async for record in dffml.load(self.sources): - valid = dffml.export(record.features()) - if 'features' in valid \ - and 'crypto' in valid['features'] \ - and 'evidence' in valid['features']['crypto']: - del valid['features']['crypto']['evidence'] - valid['_id'] = valid['src_url'] - await self.db.conn.insert_one(valid) - - -@dffml.config -class EvaluationServerConfig: - port: int = dffml.field( - 'Port to bind to', - default=5000, - ) - addr: str = dffml.field( - 'Address to bind to', - default='127.0.0.1', - ) - sources: dffml.Sources = dffml.field( - "Sources to import from into mongodb", - default=dffml.Sources(), - ) - - -class EvaluationServer(dffml.CMD): - CONFIG = EvaluationServerConfig - - def asset_path(self, *args): - return pkg_resources.resource_filename(__name__, - os.path.join('html', *args)) - - async def configure(self): - self.assets_path = self.asset_path('dist') - - async def sync(self, request): - ws = web.WebSocketResponse() - await ws.prepare(request) - task = request.app.loop.create_task(self.sync_sub(request, ws)) - try: - async for msg in ws: - if msg.type == WSMsgType.CLOSE: - await ws.close() - elif msg.type != WSMsgType.TEXT: - continue - LOGGER.debug('Got ws message: %r', msg.data) - try: - data = msg.json() - except Exception as error: - LOGGER.warning('JSON decode error: %r: %s', msg, error) - continue - try: - request.app.loop.create_task(self.sync_pub(request, ws, - **data)) - except Exception as error: - LOGGER.warning('sync_pub error: %r: %s: %s', msg, error, - traceback.format_exc()) - continue - finally: - task.cancel() - return ws - - async def sync_pub(self, request, ws, *, method: str = '', name: str = '', - value = None, meta = None, **kwargs): - if meta is None: - meta = {} - if method == 'get': - getter = getattr(self, 'sync_get_%s' % (meta.get('__type', ''),), - self.sync_get) - await getter(request, ws, name, meta) - elif method == 'set' and not value is None: - setter = getattr(self, 'sync_set_%s' % (meta.get('__type', ''),), - self.sync_set) - await setter(request, ws, name, value, meta) - - async def sync_get_record(self, request, ws, name, meta): - # Dataflow as class / Metrics as running output operations over cached flow or - # wait until fulfiled - record = await request.app['sources'].record(name) - # if record.features(): - # await ws.send_json(dict(name=name, method='got', data=record.export())) - # return - key, task, started = await self.evaluate_start(request.app, name) - async for event, msg in task.events(): - if event == 'set': - await ws.send_json(dict(name='%s.log' % (name,), method='got', - data=msg)) - else: - await ws.send_json({'event': event, 'msg': msg}) - results = await self.evaluate_finish(request.app, key, task, started) - record.evaluated(results) - await ws.send_json(dict(name=name, method='got', data=record.export())) - - async def sync_get(self, request, ws, name, meta): - pass - - async def sync_set(self, request, ws, name, data, meta): - pass - - async def sync_sub(self, request, ws): - # await ws.send_json({'event': event, 'msg': msg}) - pass - - async def index(self, request): - with open(self.asset_path('dist', 'index.html')) as fd: - return web.Response(text=fd.read(), content_type='text/html') - - async def setup(self, **kwargs): - await self.configure() - if not 'monitor' in kwargs: - kwargs['monitor'] = dffml.Monitor() - self.app = web.Application() - # http://docs.aiohttp.org/en/stable/faq.html#where-do-i-put-my-database-connection-so-handlers-can-access-it - self.app.update(kwargs) - self.app.add_routes([ - web.get('/', self.index), - web.get('/sync/', self.sync), - ]) - self.app.router.add_static('/', self.assets_path) - self.runner = web.AppRunner(self.app, access_log=None) - await self.runner.setup() - - async def start(self): - site = web.TCPSite(self.runner, self.addr, self.port) - await site.start() - LOGGER.info('Serving on %s:%d', self.addr, self.port) - - async def run(self): - ''' - Binds to port and starts HTTP server - ''' - async with self.sources as sources: - await self.setup(features=features, sources=sources) - await self.start() - while True: - await asyncio.sleep(60) - - async def _evaluate(self, app, key, task = None): - # This class is the basic flow with no database caching - # return await app['features'].evaluate(key, task=task) - # Run the collection dataflow - # TODO This is very similar to the HTTP API, in fact it's the first - # iteration. - # The Task stuff was kind of like the dataflow context stuff - - # Gross, hardcoded inputs and definitions. - # TODO Convert this service to make it run via dataflows run - # from the HTTP service once the HTTP service is refactored. - async for ctx, results in dffml.run( - self.dataflow, - [ - dffml.Input( - value=key, - definition=self.dataflow.definitions["URL"], - ), - dffml.Input( - # "$(date +'%Y-%m-%d %H:%M')=quarter_start_date" \ - value=datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), - definition=self.dataflow.definitions["quarter_start_date"], - ), - - ], - ): - # TODO Add events and publish changes to clients via data.set as we - # iterate over data moving between operations here and run output - # operations as soon as their dependency trees are satisified. - if task is not None: - for key, value in results.items(): - await task.data.set(key, value) - return results - - async def evaluate_start(self, app, key): - task = await app['monitor'].task(key) - if not task is None: - return key, task, False - data = await app['monitor'].start(partial(self._evaluate, app, key), - key=key) - return key, data, True - - async def evaluate_finish(self, app, key, data, started): - results = await data.complete() - if not started: - return results - await app['sources'].update(Record(key, data={'features': results})) - return results - - async def evaluate(self, app, key): - return await self.evaluate_finish(app, - *(await self.evaluate_start(app, key))) - - -DEFAULT_ADMIN_SOURCE = dffml.JSONSource( - filename=pathlib.Path( - ".tools", - "open-architecture", - "shouldi", - "server", - "sources", - "admin.json", - ), - readwrite=True, - allowempty=True, -) - - -import dffml_feature_git.feature.operations - - -DATAFLOW = dffml.DataFlow( - dffml.GroupBy, - dffml_feature_git.feature.operations.make_quarters, - dffml_feature_git.feature.operations.quarters_back_to_date, - dffml_feature_git.feature.operations.check_if_valid_git_repository_URL, - dffml_feature_git.feature.operations.clone_git_repo, - dffml_feature_git.feature.operations.git_repo_default_branch, - dffml_feature_git.feature.operations.git_repo_commit_from_date, - dffml_feature_git.feature.operations.git_repo_author_lines_for_dates, - dffml_feature_git.feature.operations.work, - dffml_feature_git.feature.operations.git_commits, - dffml_feature_git.feature.operations.count_authors, - dffml_feature_git.feature.operations.cleanup_git_repo, -) -DATAFLOW.seed = [ - dffml.Input( - value=10, - definition=DATAFLOW.definitions['quarters'], - ), - dffml.Input( - value=True, - definition=DATAFLOW.definitions['no_git_branch_given'], - ), - dffml.Input( - value={ - "authors": { - "group": "author_count", - "by": "quarter", - }, - "commits": { - "group": "commit_count", - "by": "quarter", - }, - "work": { - "group": "work_spread", - "by": "quarter", - }, - }, - definition=DATAFLOW.definitions['group_by_spec'], - ), -] - - -@dffml.config -class ServerConfig(EvaluationServerConfig): - dataflow: Union[str, dffml.DataFlow] = dffml.field( - "File containing exported DataFlow or dataflow itself", - default=DATAFLOW, - ) - admin: dffml.Sources = dffml.field( - "Admin sources", - default=dffml.Sources(DEFAULT_ADMIN_SOURCE), - ) - configloader: dffml.BaseConfigLoader = dffml.field( - "ConfigLoader to use for importing DataFlow", default=None, - ) - - -class Server(EvaluationServer): - CONFIG = ServerConfig - - async def run(self): - ''' - Binds to port and starts HTTP server - cvedb_server = os.getenv('CVEDB', default=None) - if not cvedb_server is None: - self.cvemap = CVEMap(Client(server=cvedb_server)) - else: - self.cvemap = CVEMap(CVEDB()) - ''' - self.db = DB() - # Create directories for default source if not exists - if self.admin and self.admin[0] is DEFAULT_ADMIN_SOURCE: - if not self.admin[0].config.filename.parent.is_dir(): - self.admin[0].config.filename.parent.mkdir(parents=True) - # We removed metrics in favor of features in favor of dataflows - # We need to update to calling dataflows. - # There was previously a Monitor for Monitoring execution of metrics - # We might want to re-apply that to our dataflow context watching. - async with self.sources as sources, self.admin as admin: - # Have to match new double context entry - async with sources() as sctx, admin() as actx: - await self.setup(sources=sctx, - admin=actx, - db=self.db) - await self.start() - while True: - await asyncio.sleep(60) - - async def hasaccess(self, request, name, meta): - # TODO - return True - - async def sync_get_admin(self, request, ws, name, meta): - if not await self.hasaccess(request, name, meta): - return - record = await request.app['admin'].record(name) - record = record.export() - data = record.get('features', {}) - data.update(record.get('extra', {})) - if data: - await ws.send_json(dict(name=name, method='got', data=data, - type='admin')) - - async def sync_set_admin(self, request, ws, name, value, meta): - if not await self.hasaccess(request, name, meta): - return - record = await request.app['admin'].record(name) - record.evaluated(value) - await request.app['admin'].update(record) - - async def sync_get_cves(self, request, ws, name, meta): - return - async for cveid, cve in request.app['cvemap'].cves(name): - await ws.send_json(dict(name=name, method='got', data={cveid: cve}, - type='cves')) - - async def sync_get_total(self, request, ws, name, meta): - await ws.send_json(dict(name=name, method='got', - data=await request.app['db'].total(), type='total')) - - async def sync_get_list(self, request, ws, name, meta): - async for document in request.app['db'].page( - meta.get('page_size', 5), meta.get('page_num', 0)): - await ws.send_json(dict(name=name, method='got', data=document, - type='list')) - - async def sync_get_record(self, request, ws, name, meta): - record = await request.app['sources'].record(name) - if record.data.prediction: - await ws.send_json(dict(name=name, method='got', data=record.export(), - type='record')) - return - key, task, started = await self.evaluate_start(request.app, name) - async for event, msg in task.events(): - if event == 'set': - await ws.send_json(dict(name=name, method='got', data=msg, - type='log')) - elif event != 'done': - await ws.send_json({'event': event, 'msg': msg}) - record = await self.evaluate_finish(request.app, key, task, started) - await ws.send_json(dict(name=name, method='got', data=record.export(), - type='record')) - - async def sync_set(self, request, ws, name, data, meta): - if not await self.hasaccess(request, name, meta): - return - - async def _evaluate(self, app, key, task = None): - # Grab any existing data - record = await app['sources'].record(key) - # Run the collection dataflow - results = await super()._evaluate(app, key, task=task) - # Update the results in the DB - record.evaluated(results) - await app['sources'].update(record) - return record - # Models were previously called prophets - async for record, cl, cf in app['model'].predict(record.asyncgen(), - app['features'], app['classifications']): - # Predicted took classification and confidence in classification - # Think it's still the same, Hashim has an open PR I believe - record.predicted(cl, cf) - return record - - async def evaluate_finish(self, app, key, data, started): - record = await data.complete() - if not started: - return record - await app['sources'].update(record) - return record - - async def __aenter__(self): - await super().__aenter__() - if not isinstance(self.dataflow, dffml.DataFlow): - dataflow_path = pathlib.Path(self.dataflow) - config_cls = self.configloader - if config_cls is None: - config_type = dataflow_path.suffix.replace(".", "") - config_cls = dffml.BaseConfigLoader.load(config_type) - async with config_cls.withconfig( - self.extra_config - ) as configloader: - async with configloader() as loader: - exported = await loader.loadb(dataflow_path.read_bytes()) - self.dataflow = dffml.DataFlow._fromdict(**exported) - return self - -class OSSSECLI(dffml.CMD): - ''' - CLI interface for wllearn expands upon dffml - ''' - - server = Server - _import = DBImport diff --git a/service/ossse/ossse/log.py b/service/ossse/ossse/log.py deleted file mode 100644 index 283f375316..0000000000 --- a/service/ossse/ossse/log.py +++ /dev/null @@ -1,3 +0,0 @@ -'''Logging''' -import logging -LOGGER = logging.getLogger(__package__) diff --git a/service/ossse/ossse/metric.py b/service/ossse/ossse/metric.py deleted file mode 100644 index 40602ceacf..0000000000 --- a/service/ossse/ossse/metric.py +++ /dev/null @@ -1,157 +0,0 @@ -''' -Unorganized metrics -''' -import os -import asyncio - -from dffml.metric import Metric, Data - -from dffml_metric_git.metric.git import GitMetric -from dffml_metric_git.util.proc import check_output, create, stop - -class MetricStub(Metric): - - def dtype(self): - return bool - - def length(self): - return 1 - -class Progess(MetricStub): - - NAME: str = 'progress' - - async def parse(self, data: Data): - for i in range(0, 20): - await asyncio.sleep(0.05) - await data.log('Hi %2.5f', i * 0.05) - await data.data.set('progress', True) - - async def calc(self, data: Data) -> bool: - return await data.data.get('progress') - -class Grader(MetricStub): - - NAME: str = 'grade' - - async def calc(self, data: Data) -> str: - return 'A+' - -class CoverageMetric(GitMetric): - ''' - Requirements: - pip install coverage - ''' - - NAME: str = 'unit tests' - - def dtype(self): - return bool - - def length(self): - return 1 - - async def applicable(self, data: Data) -> bool: - if not await super().applicable(data): - return False - await super().fetch(data) - if not os.path.isfile(os.path.join(data.git.cwd, 'setup.py')): - return False - await data.log('has setup.py') - return True - # await data.log('running pip install') - - async def git_parse(self, data: Data): - try: - proc = await create('coverage', 'run', 'setup.py', 'test', - cwd=data.git.cwd) - while proc.returncode is None: - done, pending = await asyncio.wait( - [proc.stdout.readline(), proc.stderr.readline()], - timeout=1, return_when=asyncio.FIRST_COMPLETED) - [fut.cancel() for fut in pending] - stream = ''.join([fut.result().decode(errors='ignore') \ - for fut in done]) - await data.log('unittest run: %s', stream.strip()) - exit_code, proc = await stop(proc) - await data.log('unittest exit code: %r', exit_code) - await data.data.set('unittest', exit_code) - report = await check_output('coverage', 'report', '-m', - cwd=data.git.cwd) - await data.log('coverage report: %s', report) - await data.data.set('coverage_report', report) - await check_output('coverage', 'html', cwd=data.git.cwd) - # TODO - # shutil.make_archive(archive_name, 'gztar', root_dir) - except RuntimeError as err: - await data.log('Error in applicable: %r', err) - raise - - async def calc(self, data: Data): - return { - 'unittest': await data.data.get('unittest', 0), - 'report': await data.data.get('coverage_report', 0), - } - -class YarnTestMetric(GitMetric): - ''' - Requirements: - yarn add - ''' - - NAME: str = 'unit tests' - - def dtype(self): - return bool - - def length(self): - return 1 - - async def applicable(self, data: Data) -> bool: - if not await super().applicable(data): - return False - await super().fetch(data) - if not os.path.isfile(os.path.join(data.git.cwd, 'package.json')): - return False - await data.log('has package.json') - await data.log('running yarn install') - try: - proc = await create('yarn', 'install', cwd=data.git.cwd) - while proc.returncode is None: - done, pending = await asyncio.wait( - [proc.stdout.readline(), - proc.stderr.readline()], - timeout=1, - return_when=asyncio.FIRST_COMPLETED) - [fut.cancel() for fut in pending] - stream = ''.join([fut.result().decode(errors='ignore') \ - for fut in done]) - await data.log('yarn install: %s', stream.strip()) - await stop(proc) - except RuntimeError as err: - await data.log('Error in applicable: %r', err) - raise - return True - - async def git_parse(self, data: Data): - try: - proc = await create('yarn', 'run', 'test', cwd=data.git.cwd) - while proc.returncode is None: - done, pending = await asyncio.wait( - [proc.stdout.readline(), - proc.stderr.readline()], - timeout=1, - return_when=asyncio.FIRST_COMPLETED) - [fut.cancel() for fut in pending] - stream = ''.join([fut.result().decode(errors='ignore') \ - for fut in done]) - await data.log('yarn test: %s', stream.strip()) - exit_code, proc = await stop(proc) - await data.data.set('yarn_test', exit_code) - await data.log('yarn test exit code: %r', exit_code) - except RuntimeError as err: - await data.log('Error in applicable: %r', err) - raise - - async def calc(self, data: Data): - return await data.data.get('yarn_test', 0) diff --git a/service/ossse/ossse/service/__init__.py b/service/ossse/ossse/service/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/ossse/ossse/version.py b/service/ossse/ossse/version.py deleted file mode 100644 index a4e55ec0ac..0000000000 --- a/service/ossse/ossse/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = '0.0.1' diff --git a/service/ossse/requirements.txt b/service/ossse/requirements.txt deleted file mode 100644 index 84af0e08f8..0000000000 --- a/service/ossse/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -aiohttp>=3.4.4 -motor>=2.0.0 -git+https://github.com/pdxjohnny/dffml@manifest#egg=dffml -git+https://github.com/pdxjohnny/dffml@manifest#egg=dffml-feature-git&subdirectory=feature/git -# Need CVEMap diff --git a/service/ossse/setup.py b/service/ossse/setup.py deleted file mode 100644 index 1f66cdffc6..0000000000 --- a/service/ossse/setup.py +++ /dev/null @@ -1,54 +0,0 @@ -import ast -from io import open - -from setuptools import find_packages, setup - -with open('ossse/version.py', 'r') as f: - for line in f: - if line.startswith('VERSION'): - version = ast.literal_eval(line.strip().split('=')[-1].strip()) - break - -with open('README.rst', 'r', encoding='utf-8') as f: - readme = f.read() - -with open('requirements.txt', 'r', encoding='utf-8') as f: - INSTALL_REQUIRES = [line for line in f] - -setup( - name='ossse', - version=version, - description='Open Source Software Data Collection', - long_description=readme, - author='John Andersen', - author_email='johnandersenpdx@gmail.com', - url='https://github.com/intel/dffml', - license='', - - keywords=[ - '', - ], - - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Natural Language :: English', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - ], - - install_requires=INSTALL_REQUIRES, - tests_require=[], - - packages=find_packages(), - entry_points={ - 'console_scripts': [ - 'ossse = ossse.cli:OSSSECLI.main', - ], - }, -) diff --git a/service/ossse/tests/__init__.py b/service/ossse/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/service/ossse/tests/test_metric_yarn.py b/service/ossse/tests/test_metric_yarn.py deleted file mode 100644 index 2170b2b27d..0000000000 --- a/service/ossse/tests/test_metric_yarn.py +++ /dev/null @@ -1,21 +0,0 @@ -import asyncio -import unittest - -from dffml.metric import Data, Metrics -from dffml.util.asynctestcase import AsyncTestCase - -from dffml_metric_git.metric.git import GitMetric - -from ossse.cli import YarnTestMetric - -class TestYarnTest(AsyncTestCase): - - def setUp(self): - self.url = 'https://github.com/pillarjs/csrf' - self.yarn_test = YarnTestMetric() - self.metrics = Metrics(self.yarn_test) - - async def test_applicable(self): - async with self.metrics: - applicable = await self.metrics.applicable(Data(self.url)) - self.assertIn(self.yarn_test, applicable) diff --git a/source/mongodb/.coveragerc b/source/mongodb/.coveragerc deleted file mode 100644 index 66a98aa5ab..0000000000 --- a/source/mongodb/.coveragerc +++ /dev/null @@ -1,13 +0,0 @@ -[run] -source = - dffml_source_mongodb - tests -branch = True - -[report] -exclude_lines = - no cov - no qa - noqa - pragma: no cover - if __name__ == .__main__.: diff --git a/source/mongodb/.gitignore b/source/mongodb/.gitignore deleted file mode 100644 index 3af0b3e081..0000000000 --- a/source/mongodb/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -*.log -*.pyc -.cache/ -.coverage -.idea/ -.vscode/ -*.egg-info/ -build/ -dist/ -docs/build/ -venv/ -wheelhouse/ -*.egss -.mypy_cache/ -*.swp -.venv/ -.eggs/ -*.modeldir -*.db -htmlcov/ -built_html_docs/ diff --git a/source/mongodb/LICENSE b/source/mongodb/LICENSE deleted file mode 100644 index 276b9945c4..0000000000 --- a/source/mongodb/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (c) 2021 Intel - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/source/mongodb/MANIFEST.in b/source/mongodb/MANIFEST.in deleted file mode 100644 index 6d6d7abb8f..0000000000 --- a/source/mongodb/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include README.rst -include LICENSE -recursive-include dffml_source_mongodb * diff --git a/source/mongodb/README.rst b/source/mongodb/README.rst deleted file mode 100644 index 5118d8716d..0000000000 --- a/source/mongodb/README.rst +++ /dev/null @@ -1,2 +0,0 @@ -DFFML Source for MongoDB -======================== diff --git a/source/mongodb/dffml_source_mongodb/__init__.py b/source/mongodb/dffml_source_mongodb/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/source/mongodb/dffml_source_mongodb/source.py b/source/mongodb/dffml_source_mongodb/source.py deleted file mode 100644 index 01621851e7..0000000000 --- a/source/mongodb/dffml_source_mongodb/source.py +++ /dev/null @@ -1,93 +0,0 @@ -import urllib.parse -from typing import AsyncIterator, Dict, List - -from dffml.base import BaseConfig -from dffml.record import Record -from dffml.source.source import BaseSourceContext, BaseSource -from dffml.util.cli.arg import Arg -from dffml.util.entrypoint import entrypoint -from dffml.base import config - - -import motor.motor_asyncio - - -@config -class MongoDBSourceConfig: - uri: str - db: str = None - collection: str = None - tlsInsecure: bool = False - log_collection_names: bool = False - - def __post_init__(self): - uri = urllib.parse.urlparse(self.uri) - if uri.path: - self.db = uri.path[1:] - - -# TODO Investigate use of -# https://pymongo.readthedocs.io/en/3.12.0/api/pymongo/client_session.html#pymongo.client_session.ClientSession -# for Context. -class MongoDBSourceContext(BaseSourceContext): - async def update(self, record): - self.logger.debug("update: %s: %r", record.key, record.export()) - await self.parent.collection.replace_one( - {"_id": record.key}, - {"_id": record.key, **record.export()}, - upsert=True, - ) - - def document_to_record(self, document, key=None): - self.logger.debug("document: %r", document) - if document is None: - if key is None: - raise ValueError("Cannot create empty record with no key") - return Record(key) - if "key" in document: - key = document["key"] - del document["key"] - else: - key = document["_id"] - del document["_id"] - if "features" in document: - return Record(key, data=document) - else: - return Record(key, data={"features": document}) - - async def records(self) -> AsyncIterator[Record]: - async for document in self.parent.collection.find(): - yield self.document_to_record(document) - - async def record(self, key: str) -> Record: - document = await self.parent.collection.find_one({"_id": key}) - return self.document_to_record(document, key=key) - - -@entrypoint("mongodb") -class MongoDBSource(BaseSource): - """ - Stores records ... somewhere! (skeleton template is in memory) - """ - - CONFIG = MongoDBSourceConfig - CONTEXT = MongoDBSourceContext - - def __init__(self, config: BaseConfig) -> None: - super().__init__(config) - self.client = None - - async def __aenter__(self): - self.client = motor.motor_asyncio.AsyncIOMotorClient(self.config.uri, - tlsInsecure=self.config.tlsInsecure) - self.db = self.client[self.config.db] - # Thought: Plugins as dataflows. Is a method call an event? Is it an - # input? - if self.config.log_collection_names: - self.logger.info("Collection names: %r", await self.db.list_collection_names()) - self.collection = self.db[self.config.collection] - self.logger.info("Collection options: %r", await self.collection.options()) - return self - - async def __aexec__(self, _exc_type, _exc_value, _traceback): - self.client = None diff --git a/source/mongodb/dffml_source_mongodb/util/__init__.py b/source/mongodb/dffml_source_mongodb/util/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/source/mongodb/dffml_source_mongodb/util/mongodb_docker.py b/source/mongodb/dffml_source_mongodb/util/mongodb_docker.py deleted file mode 100644 index dbaa796922..0000000000 --- a/source/mongodb/dffml_source_mongodb/util/mongodb_docker.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -import json -import time -import atexit -import socket -import pathlib -import logging -import tempfile -import unittest -import subprocess -from contextlib import contextmanager -from typing import Optional - -import docker - -LOGGER = logging.getLogger(__package__) - -logging.basicConfig(level=logging.DEBUG) - -DOCKER_IMAGE = "mongo:4" -# MongoDB daemons default listing port -DEFAULT_PORT = 27017 -# Environment variables passed to MongoDB container -DOCKER_ENV = { - "MONGO_INITDB_ROOT_USERNAME": "mongoadmin", - "MONGO_INITDB_ROOT_PASSWORD": "secret", -} -DOCKER_NA: str = "Failed to connect to docker daemon" -DOCKER_AVAILABLE: bool = False -try: - DOCKER_CLIENT: docker.DockerClient = docker.from_env() - DOCKER_AVAILABLE = DOCKER_CLIENT.ping() - DOCKER_CLIENT.close() -except: - pass - - -class MongoDBFailedToStart(Exception): - pass # pragma: no cov - - -def check_connection(addr: str, port: int, *, timeout: float = 0.1) -> bool: - """ - Attempt to make a TCP connection. Return if a connection was made in - less than ``timeout`` seconds. Return True if a connection is made within - the timeout. - """ - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.settimeout(float(timeout)) - try: - s.connect((addr, port)) - except Exception as error: - return False - return True - - -def mkcleanup(docker_client, container): - """ - Create a function which will remove the temporary file and stop the - container. The function will register itself with the :py:`atexit` module to - ensure that the container is stopped before Python exits. It will unregister - itself whenever it is called. - """ - func = None - - def cleanup(): - atexit.unregister(func) - try: - container.stop() - container.wait() - except: - pass - docker_client.close() - - func = cleanup - atexit.register(func) - return cleanup - - -@contextmanager -def mongodb(*, js_setup: Optional[str] = None): - """ - Start a MongoDB container and yield the IP of the container once ready for - connections. ``js_setup`` should be the .sql file used to initialize the - database. - """ - if not DOCKER_AVAILABLE: - raise unittest.SkipTest("Need docker to run MongoDB") - - docker_client: docker.DockerClient = docker.from_env() - with tempfile.TemporaryDirectory() as tempdir: - # Volumes to mount - volumes = {} - # Dump out JavaScript initialization file - if js_setup is not None: - js_setup_path = pathlib.Path(tempdir, "dump.js") - js_setup_path.write_text(js_setup) - js_setup_path.chmod(0o555) - volumes[js_setup_path.resolve()] = { - "bind": "/docker-entrypoint-initdb.d/dump.js" - } - # Tell the docker daemon to start MongoDB - LOGGER.debug("Starting MongoDB...") - container = docker_client.containers.run( - DOCKER_IMAGE, - environment=DOCKER_ENV, - detach=True, - auto_remove=True, - volumes=volumes, - ) - # Sometimes very bad things happen, this ensures that the container will - # be cleaned up on process exit no matter what - cleanup = mkcleanup(docker_client, container) - try: - # Get the IP from the docker daemon - inspect = docker_client.api.inspect_container(container.id) - container_ip = inspect["NetworkSettings"]["IPAddress"] - # Wait until MongoDB reports it's ready for connections - container_start_time = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - ready = False - for line in container.logs(stream=True, follow=True): - now_time = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - LOGGER.debug( - "MongoDB log (%0.02f seconds): %s", - (now_time - container_start_time), - line.decode(errors="ignore").strip(), - ) - if not line.startswith(b"{"): - continue - log_entry = json.loads(line.decode()) - if ( - log_entry["c"] == "NETWORK" - and log_entry["ctx"] == "listener" - and log_entry["msg"] == "Waiting for connections" - ): - ready = True - break - if not ready: - raise MongoDBFailedToStart( - 'Never saw "Waiting for connections"' - ) - # Ensure that we can make a connection - start_time = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - max_timeout = float(os.getenv("MONGODB_START_TIMEOUT", "600")) - LOGGER.debug( - "Attempting to connect to MongoDB: Timeout of %d seconds", - max_timeout, - ) - while not check_connection(container_ip, DEFAULT_PORT): - end_time = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - if (end_time - start_time) >= max_timeout: - raise MongoDBFailedToStart("Timed out waiting for MongoDB") - end_time = time.clock_gettime(time.CLOCK_MONOTONIC_RAW) - LOGGER.debug( - "MongoDB running: Took %0.02f seconds", - end_time - container_start_time, - ) - # Yield IP of container to caller - yield container_ip - finally: - cleanup() diff --git a/source/mongodb/dffml_source_mongodb/version.py b/source/mongodb/dffml_source_mongodb/version.py deleted file mode 100644 index 901e5110b2..0000000000 --- a/source/mongodb/dffml_source_mongodb/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = "0.0.1" diff --git a/source/mongodb/entry_points.txt b/source/mongodb/entry_points.txt deleted file mode 100644 index bda54d210d..0000000000 --- a/source/mongodb/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[dffml.source] -mongodb = dffml_source_mongodb.source:MongoDBSource diff --git a/source/mongodb/pyproject.toml b/source/mongodb/pyproject.toml deleted file mode 100644 index 17b1235941..0000000000 --- a/source/mongodb/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -requires = ["setuptools>=44", "wheel", "setuptools_scm[toml]>=3.4.3"] -build-backend = "setuptools.build_meta" - -[tool.setuptools_scm] - -[tool.black] -exclude = ''' -( - /( - \.eggs # exclude a few common directories in the - | \.git # root of the project - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist - ) -) -''' diff --git a/source/mongodb/setup.cfg b/source/mongodb/setup.cfg deleted file mode 100644 index 609ea01a63..0000000000 --- a/source/mongodb/setup.cfg +++ /dev/null @@ -1,41 +0,0 @@ -[metadata] -name = dffml-source-mongodb -description = DFFML source dffml-source-mongodb -version = attr: dffml_source_mongodb.version.VERSION -long_description = file: README.rst -author = John Andersen -author_email = johnandersenpdx@gmail.com -maintainer = John Andersen -maintainer_email = johnandersenpdx@gmail.com -url = https://github.com/dffml/dffml-source-mongodb -license = MIT -keywords = dffml -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Developers - License :: OSI Approved :: MIT License - Natural Language :: English - Operating System :: OS Independent - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.7 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - -[options] -zip_safe = False -include_package_data = True -packages = find: -entry_points = file: entry_points.txt -install_requires = - dffml>=0.4.0 - motor>=2.5.1 - -[options.extras_require] -dev = - coverage - codecov - sphinx - twine - black==19.10b0 - importlib_metadata>=4.8.1;python_version<"3.8" - docker>=4.0.2 diff --git a/source/mongodb/setup.py b/source/mongodb/setup.py deleted file mode 100644 index 17542f4d0e..0000000000 --- a/source/mongodb/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -import site -import setuptools - -# See https://github.com/pypa/pip/issues/7953 -site.ENABLE_USER_SITE = "--user" in sys.argv[1:] - -setuptools.setup() diff --git a/source/mongodb/tests/__init__.py b/source/mongodb/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/source/mongodb/tests/test_source.py b/source/mongodb/tests/test_source.py deleted file mode 100644 index c39f17f395..0000000000 --- a/source/mongodb/tests/test_source.py +++ /dev/null @@ -1,56 +0,0 @@ -import socket -import inspect -import contextlib -from unittest.mock import patch - -from dffml.util.testing.source import SourceTest -from dffml.util.asynctestcase import AsyncTestCase - -from dffml_source_mongodb.source import MongoDBSourceConfig, MongoDBSource - -from dffml_source_mongodb.util.mongodb_docker import ( - mongodb, - DOCKER_ENV, - DEFAULT_PORT, -) - - -class TestMongoDBSource(AsyncTestCase, SourceTest): - - JS_SETUP = """""" - - @classmethod - def setUpClass(cls): - super().setUpClass() - cls._exit_stack = contextlib.ExitStack() - cls.exit_stack = cls._exit_stack.__enter__() - cls.container_ip = cls.exit_stack.enter_context(mongodb()) - cls.source_config = MongoDBSourceConfig( - uri=f'mongodb://{DOCKER_ENV["MONGO_INITDB_ROOT_USERNAME"]}:{DOCKER_ENV["MONGO_INITDB_ROOT_PASSWORD"]}@mongodb.unittest:{DEFAULT_PORT}/', - db="mydb", - collection="mycollection", - ) - # Make it so that when the client tries to connect to mongodb.unittest the - # address it gets back is the one for the container - cls.exit_stack.enter_context( - patch( - "socket.getaddrinfo", - return_value=[ - ( - socket.AF_INET, - socket.SOCK_STREAM, - 6, - "", - (cls.container_ip, DEFAULT_PORT), - ) - ], - ) - ) - - @classmethod - def tearDownClass(cls): - super().tearDownClass() - cls._exit_stack.__exit__(None, None, None) - - async def setUpSource(self): - return MongoDBSource(self.source_config)