Skip to content

Commit

Permalink
Uploading major changes for 0.0.74
Browse files Browse the repository at this point in the history
  • Loading branch information
Renan Souza committed Aug 28, 2020
1 parent 87b92a1 commit a8f1779
Show file tree
Hide file tree
Showing 13 changed files with 685 additions and 18 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# Multi-Data Lineage System
# ProvLake Lib

IBM Research Multi-Data Lineage System is a provenance data management system capable of capturing, integrating, and querying provenance data generated across multiple, distributed services, programs, databases, and computational workflows.
This is part of ProvLake Project. See [ibm.biz/provlake](http://ibm.biz/provlake) for more details.

**For more information on the project, including participants and publications, please see [ibm.biz/provlake](http://ibm.biz/provlake).**
### A Python lib to capture multiworkflow provenance data from Python Scripts

This repository contains the Python library that captures provenance data in Python applications and send to the Multi-Data Lineage Manager, which is responsible for integrating the data in a provenance database stored as a knowledge graph (semantic detabase),
then allowing users to run queries over the data.
Use this library for code instrumentation to collect provenance data of function calls in a script. Input arguments or output values from functions can come from distributed data storages, including file systems and database systems.

It supports Python>=3.6
Python 3.6


### Very simple utilization example
Expand All @@ -17,7 +16,7 @@ from provlake.prov_lake import ProvLake
from provlake.prov_task import ProvTask

"""
Very simple example to show how this library is used to instrument a simple python script for provenance data management.
Very simple example to show how ProvLake is used to instrument a simple python script for provenance data management.
"""


Expand All @@ -41,4 +40,5 @@ with ProvTask(prov, "factorial_number", in_args) as prov_task:
prov_task.output(out_args)

prov.close()

```
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@


setup(name='provlake',
version='0.0.72',
version='0.0.74',
description='A Python lib to capture multiworkflow provenance data from Python Scripts',
url='http://ibm.biz/provlake',
url='https://github.ibm.com/provlake/ProvLakePy',
author='IBM Research',
license='Apache 2.0',
license='Internal use only / IBM only',
install_requires=requires,
package_dir={'': 'src'},
packages=find_packages(where='src'),
Expand Down
7 changes: 3 additions & 4 deletions src/provlake/prov_lake.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self,
prospective_provenance_dict: dict = None,
storage_configuration_path: str=None,
storage_configuration_dict: dict=None,
dataflow_name: str=None,
workflow_name: str=None,
context: str=None,
insert_prospective=False,
with_validation: bool=False,
Expand Down Expand Up @@ -71,10 +71,10 @@ def __init__(self,
self.last_task_id = 0
self.wf_start_time = time()

self.df_name = dataflow_name or self.df_structure.get("dataflow_name", "NI")
self.df_name = workflow_name or self.df_structure.get("dataflow_name", "NI")

self.tasks = dict()
self.wf_execution = "wfexec_" + str(self.wf_start_time)
self.wf_execution = self.wf_start_time
self.wf_obj = {
"wf_execution": self.wf_execution,
"startTime": self.wf_start_time
Expand All @@ -96,7 +96,6 @@ def __init__(self,
if log_level == "NONE":
log_level = "ERROR"
log_lvl = getattr(logging, log_level.upper())
#logging.getLogger().setLevel(log_lvl)
logger.setLevel(log_lvl)

self.prov_persister = _ProvPersister(self.df_name, service_url=service_url, context=context, bag_size=bag_size,
Expand Down
3 changes: 2 additions & 1 deletion src/provlake/stateless/cycle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import traceback
import logging
from time import time
from provlake import prov_utils
from provlake.utils import prov_utils

logger = logging.getLogger('PROV')


Expand Down
3 changes: 2 additions & 1 deletion src/provlake/stateless/task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import traceback
import logging
from time import time
from provlake import prov_utils
from provlake.utils import prov_utils

logger = logging.getLogger('PROV')


Expand Down
2 changes: 1 addition & 1 deletion src/provlake/stateless/workflow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from time import time
from provlake import prov_utils
from provlake.utils import prov_utils
import os


Expand Down
Empty file added src/provlake/utils/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions src/provlake/utils/args_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
def get_dict(_dict:dict) -> dict:
if _dict is None:
return None
if len(_dict) == 0:
return {}
return {
"type": "dict",
"values": _dict
}

def get_list(_list:list) -> dict:
if _list is None:
return None
if len(_list) == 0:
return []
return {
"type": "list",
"values": _list
}

def get_recursive_dicts(_dict:dict) -> dict:
if _dict is None:
return None
if len(_dict) == 0:
return {}
values = dict()
for k in _dict:
v = _dict[k]
if type(v) == dict:
values[k] = get_recursive_dicts(v)
else:
values[k] = v
return {
"type": "dict",
"values": values
}
Loading

0 comments on commit a8f1779

Please sign in to comment.