diff --git a/pm4py/objects/conversion/log/variants/to_nx.py b/pm4py/objects/conversion/log/variants/to_nx.py index 171336437..d7a2d18c7 100644 --- a/pm4py/objects/conversion/log/variants/to_nx.py +++ b/pm4py/objects/conversion/log/variants/to_nx.py @@ -63,6 +63,8 @@ def apply(log_obj: Union[EventLog, EventStream, pd.DataFrame], parameters: Optio case_id_attribute = exec_utils.get_param_value(Parameters.CASE_ID_ATTRIBUTE, parameters, "concept:name") other_case_attributes_as_nodes = exec_utils.get_param_value(Parameters.OTHER_CASE_ATTRIBUTES_AS_NODES, parameters, None) event_attributes_as_nodes = exec_utils.get_param_value(Parameters.EVENT_ATTRIBUTES_AS_NODES, parameters, None) + + parameters["stream_postprocessing"] = True log_obj = to_event_log.apply(log_obj, parameters=parameters) if event_attributes_as_nodes is None: diff --git a/pm4py/objects/conversion/ocel/variants/ocel_to_nx.py b/pm4py/objects/conversion/ocel/variants/ocel_to_nx.py index e8637ac43..bf9ef6bc6 100644 --- a/pm4py/objects/conversion/ocel/variants/ocel_to_nx.py +++ b/pm4py/objects/conversion/ocel/variants/ocel_to_nx.py @@ -20,6 +20,8 @@ import networkx as nx from typing import Optional, Dict, Any from pm4py.util import exec_utils +from pm4py.objects.conversion.log.variants import to_event_stream +from copy import copy class Parameters(Enum): @@ -57,13 +59,15 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> nx.DiGraph G = nx.DiGraph() stream = ocel.events.to_dict("records") + stream = to_event_stream.__postprocess_stream(stream) for ev in stream: - ev["type"] = "event" + ev["type"] = "EVENT" G.add_node(ev[ocel.event_id_column], attr=ev) stream = ocel.objects.to_dict("records") + stream = to_event_stream.__postprocess_stream(stream) for obj in stream: - obj["type"] = "object" + obj["type"] = "OBJECT" G.add_node(obj[ocel.object_id_column], attr=obj) rel_cols = {ocel.event_id_column, ocel.object_id_column, ocel.qualifier} @@ -96,7 +100,9 @@ def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> nx.DiGraph object_changes = ocel.object_changes.to_dict("records") for i in range(len(object_changes)): change_id = "@@change##%d" % i - G.add_node(change_id, attr=object_changes[i]) - G.add_edge(object_changes[i][ocel.object_id_column], change_id) + change_dict = copy(object_changes[i]) + change_dict["type"] = "CHANGE" + G.add_node(change_id, attr=change_dict) + G.add_edge(change_id, object_changes[i][ocel.object_id_column]) return G diff --git a/pm4py/util/pandas_utils.py b/pm4py/util/pandas_utils.py index 4811638a6..59695c863 100644 --- a/pm4py/util/pandas_utils.py +++ b/pm4py/util/pandas_utils.py @@ -258,7 +258,7 @@ def check_is_pandas_dataframe(log): return type(log) is pd.DataFrame -def check_pandas_dataframe_columns(df, activity_key=None, case_id_key=None, timestamp_key=None): +def check_pandas_dataframe_columns(df, activity_key=None, case_id_key=None, timestamp_key=None, start_timestamp_key=None): """ Checks if the dataframe contains all the required columns. If not, raise an exception @@ -310,6 +310,16 @@ def check_pandas_dataframe_columns(df, activity_key=None, case_id_key=None, time if df[timestamp_key].isnull().values.any(): raise Exception("the timestamp column should not contain any empty value.") + if start_timestamp_key is not None: + if start_timestamp_key not in df.columns: + raise Exception("the specified start timestamp column is not contained in the dataframe. Available columns: "+str(sorted(list(df.columns)))) + + if start_timestamp_key not in timest_columns: + raise Exception("the start timestamp column should be of time datetime. Use the function pandas.to_datetime") + + if df[start_timestamp_key].isnull().values.any(): + raise Exception("the start timestamp column should not contain any empty value.") + """if len(set(df.columns).intersection( set([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_NAME_KEY, xes_constants.DEFAULT_TIMESTAMP_KEY]))) < 3: diff --git a/pm4py/utils.py b/pm4py/utils.py index be84fc07a..87eb39859 100644 --- a/pm4py/utils.py +++ b/pm4py/utils.py @@ -293,13 +293,14 @@ def deserialize(ser_obj: Tuple[str, bytes]) -> Any: return dfg_importer.deserialize(ser_obj[1]) -def get_properties(log, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", resource_key: str = "org:resource", group_key: Optional[str] = None, **kwargs): +def get_properties(log, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", resource_key: str = "org:resource", group_key: Optional[str] = None, start_timestamp_key: Optional[str] = None, **kwargs): """ Gets the properties from a log object :param log: Log object :param activity_key: attribute to be used for the activity :param timestamp_key: attribute to be used for the timestamp + :param start_timestamp_key: (optional) attribute to be used for the start timestamp :param case_id_key: attribute to be used as case identifier :param resource_key: (if provided) attribute to be used as resource :param group_key: (if provided) attribute to be used as group identifier @@ -320,6 +321,9 @@ def get_properties(log, activity_key: str = "concept:name", timestamp_key: str = if timestamp_key is not None: parameters[constants.PARAMETER_CONSTANT_TIMESTAMP_KEY] = timestamp_key + if start_timestamp_key is not None: + parameters[constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY] = start_timestamp_key + if case_id_key is not None: parameters[constants.PARAMETER_CONSTANT_CASEID_KEY] = case_id_key