Skip to content

Commit

Permalink
Merge branch 'PMPY-2090' into 'integration'
Browse files Browse the repository at this point in the history
PMPY-2090 Consistency checks when importing/exporting OCELs

Closes PMPY-2090

See merge request process-mining/pm4py/pm4py-core!988
  • Loading branch information
fit-alessandro-berti committed Apr 22, 2023
2 parents 4d3d97e + d9b3433 commit 45dcc3d
Show file tree
Hide file tree
Showing 14 changed files with 91 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
### Added

### Changed
* 047452b36bf0df0fb6f59caa321296c4dc544a8f
* consistency (IDs, types) checks when importing/exporting OCELs

### Deprecated

Expand Down
3 changes: 3 additions & 0 deletions pm4py/objects/ocel/exporter/csv/variants/pandas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any
from pm4py.objects.ocel.util import ocel_consistency


def apply(ocel: OCEL, output_path: str, objects_path=None, parameters: Optional[Dict[Any, Any]] = None):
Expand All @@ -20,6 +21,8 @@ def apply(ocel: OCEL, output_path: str, objects_path=None, parameters: Optional[
if parameters is None:
parameters = {}

ocel = ocel_consistency.apply(ocel, parameters=parameters)

ocel.get_extended_table().to_csv(output_path, index=False, na_rep="")

if objects_path is not None:
Expand Down
3 changes: 3 additions & 0 deletions pm4py/objects/ocel/exporter/jsonocel/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pm4py.objects.ocel.util import attributes_names
from pm4py.objects.ocel.util import related_objects
from pm4py.util import exec_utils
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand Down Expand Up @@ -41,6 +42,8 @@ def apply(ocel: OCEL, target_path: str, parameters: Optional[Dict[Any, Any]] = N
object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column)
object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column)

ocel = ocel_consistency.apply(ocel, parameters=parameters)

all_object_types = list(ocel.objects[object_type].unique())
all_attribute_names = attributes_names.get_attribute_names(ocel, parameters=parameters)
global_event_items = ocel.globals[
Expand Down
6 changes: 6 additions & 0 deletions pm4py/objects/ocel/exporter/sqlite/variants/ocel20.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pm4py.objects.ocel.util import names_stripping
from enum import Enum
from pm4py.util import exec_utils
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand All @@ -23,6 +24,8 @@ def apply(ocel: OCEL, file_path: str, parameters: Optional[Dict[Any, Any]] = Non
if os.path.exists(file_path):
os.remove(file_path)

ocel = ocel_consistency.apply(ocel, parameters=parameters)

event_id = ocel.event_id_column
event_activity = ocel.event_activity
event_timestamp = ocel.event_timestamp
Expand Down Expand Up @@ -67,6 +70,7 @@ def apply(ocel: OCEL, file_path: str, parameters: Optional[Dict[Any, Any]] = Non
df = ocel.events[ocel.events[event_activity] == act].dropna(how="all", axis="columns")
del df[event_activity]
df = df.rename(columns={event_id: "ocel_id", event_timestamp: "ocel_time"})
df["ocel_id"] = df["ocel_id"].astype("string")

act_red = names_stripping.apply(act) if enable_names_stripping else act

Expand All @@ -85,6 +89,8 @@ def apply(ocel: OCEL, file_path: str, parameters: Optional[Dict[Any, Any]] = Non
df2 = df2.rename(columns={object_id: "ocel_id", event_timestamp: "ocel_time", changed_field: "ocel_changed_field"})
df = pd.concat([df, df2], axis=0)

df["ocel_id"] = df["ocel_id"].astype("string")

ot_red = names_stripping.apply(ot) if enable_names_stripping else ot

df.to_sql("object_"+ot_red, conn, index=False)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pm4py.objects.ocel.obj import OCEL
from typing import Dict, Any, Optional
from pm4py.objects.ocel.util import ocel_consistency
import os


def apply(ocel: OCEL, target_path: str, parameters: Optional[Dict[Any, Any]] = None):
Expand All @@ -20,6 +22,11 @@ def apply(ocel: OCEL, target_path: str, parameters: Optional[Dict[Any, Any]] = N

import sqlite3

if os.path.exists(target_path):
os.remove(target_path)

ocel = ocel_consistency.apply(ocel, parameters=parameters)

conn = sqlite3.connect(target_path)

ocel.events.to_sql("EVENTS", conn, index=False)
Expand Down
3 changes: 3 additions & 0 deletions pm4py/objects/ocel/exporter/xmlocel/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pm4py.objects.ocel.util import attributes_names
from pm4py.objects.ocel.util import related_objects
from pm4py.util import exec_utils
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand Down Expand Up @@ -52,6 +53,8 @@ def apply(ocel: OCEL, target_path: str, parameters: Optional[Dict[Any, Any]] = N
object_id = exec_utils.get_param_value(Parameters.OBJECT_ID, parameters, ocel.object_id_column)
object_type = exec_utils.get_param_value(Parameters.OBJECT_TYPE, parameters, ocel.object_type_column)

ocel = ocel_consistency.apply(ocel, parameters=parameters)

all_object_types = list(ocel.objects[object_type].unique())
all_attribute_names = attributes_names.get_attribute_names(ocel, parameters=parameters)
global_event_items = ocel.globals[
Expand Down
3 changes: 3 additions & 0 deletions pm4py/objects/ocel/exporter/xmlocel/variants/ocel20.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pm4py.objects.ocel.util import attributes_names
from pm4py.objects.ocel.util import related_objects
from pm4py.util import exec_utils
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand All @@ -34,6 +35,8 @@ def apply(ocel: OCEL, target_path: str, parameters: Optional[Dict[Any, Any]] = N
qualifier_column = exec_utils.get_param_value(Parameters.QUALIFIER, parameters, ocel.qualifier)
changed_field_column = exec_utils.get_param_value(Parameters.CHANGED_FIELD, parameters, ocel.changed_field)

ocel = ocel_consistency.apply(ocel, parameters=parameters)

ets = {k: {x: str(v[x].dtype) for x in v.dropna(axis="columns", how="all").columns if not x.startswith("ocel:")} for k, v in ocel.events.groupby(event_activity_column)}
ots = {k: {x: str(v[x].dtype) for x in v.dropna(axis="columns", how="all").columns if not x.startswith("ocel:")} for k, v in ocel.objects.groupby(object_type_column)}
ots2 = {k: {x: str(v[x].dtype) for x in v.dropna(axis="columns", how="all").columns if not x.startswith("ocel:")} for k, v in ocel.object_changes.groupby(object_type_column)}
Expand Down
6 changes: 5 additions & 1 deletion pm4py/objects/ocel/importer/csv/variants/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.ocel.util import extended_table
from pm4py.objects.ocel.util import ocel_consistency


def apply(file_path: str, objects_path: str = None, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
Expand Down Expand Up @@ -33,4 +34,7 @@ def apply(file_path: str, objects_path: str = None, parameters: Optional[Dict[An
if objects_path is not None:
objects = pd.read_csv(objects_path, index_col=False)

return extended_table.get_ocel_from_extended_table(table, objects, parameters=parameters)
ocel = extended_table.get_ocel_from_extended_table(table, objects, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)

return ocel
4 changes: 3 additions & 1 deletion pm4py/objects/ocel/importer/jsonocel/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pm4py.objects.ocel import constants
from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.ocel.util import filtering_utils
from pm4py.objects.ocel.util import ocel_consistency
from pm4py.util import exec_utils, dt_parsing


Expand Down Expand Up @@ -101,6 +102,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
globals[constants.OCEL_GLOBAL_OBJECT] = ocel[constants.OCEL_GLOBAL_OBJECT]

ocel = OCEL(events=events, objects=objects, relations=relations, globals=globals, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)

return ocel
4 changes: 4 additions & 0 deletions pm4py/objects/ocel/importer/sqlite/variants/ocel20.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pm4py.objects.ocel.obj import OCEL
from pm4py.util import exec_utils
import pandas as pd
from pm4py.objects.ocel.util import ocel_consistency
from pm4py.objects.ocel.util import filtering_utils


class Parameters(Enum):
Expand Down Expand Up @@ -120,5 +122,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
del object_changes[internal_index]

ocel = OCEL(events=event_types_coll, objects=objects, relations=E2O, object_changes=object_changes, o2o=O2O, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)

return ocel
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pm4py.objects.ocel.obj import OCEL
from typing import Dict, Any, Optional
import pandas as pd
from pm4py.objects.ocel.util import ocel_consistency
from pm4py.objects.ocel.util import filtering_utils


def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
Expand Down Expand Up @@ -30,4 +32,8 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
objects = pd.read_sql("SELECT * FROM OBJECTS", conn)
relations = pd.read_sql("SELECT * FROM RELATIONS", conn)

return OCEL(events=events, objects=objects, relations=relations, parameters=parameters)
ocel = OCEL(events=events, objects=objects, relations=relations, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel, parameters=parameters)

return ocel
2 changes: 2 additions & 0 deletions pm4py/objects/ocel/importer/xmlocel/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.ocel.util import filtering_utils
from pm4py.util import exec_utils, dt_parsing
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand Down Expand Up @@ -170,6 +171,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
globals = {}

ocel = OCEL(events=events, objects=objects, relations=relations, globals=globals, o2o=o2o, object_changes=object_changes, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel)

return ocel
2 changes: 2 additions & 0 deletions pm4py/objects/ocel/importer/xmlocel/variants/ocel20.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pm4py.objects.ocel.obj import OCEL
from pm4py.objects.ocel.util import filtering_utils
from pm4py.util import exec_utils, dt_parsing
from pm4py.objects.ocel.util import ocel_consistency


class Parameters(Enum):
Expand Down Expand Up @@ -166,6 +167,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
del relations_list[internal_index_column]

ocel = OCEL(events=events_list, objects=objects_list, relations=relations_list, globals=globals, o2o=o2o_list, object_changes=object_changes_list, parameters=parameters)
ocel = ocel_consistency.apply(ocel, parameters=parameters)
ocel = filtering_utils.propagate_relations_filtering(ocel)

return ocel
41 changes: 41 additions & 0 deletions pm4py/objects/ocel/util/ocel_consistency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any


def apply(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
"""
Forces the consistency of the OCEL, ensuring that the event/object identifier,
event/object type are of type string and non-empty.
Parameters
--------------
ocel
OCEL
parameters
Possible parameters of the method
Returns
--------------
ocel
Consistent OCEL
"""
if parameters is None:
parameters = {}

fields = {
"events": ["ocel:eid", "ocel:activity"],
"objects": ["ocel:oid", "ocel:type"],
"relations": ["ocel:eid", "ocel:oid", "ocel:activity", "ocel:type"],
"o2o": ["ocel:oid", "ocel:oid_2"],
"e2e": ["ocel:eid", "ocel:eid_2"],
"object_changes": ["ocel:oid"]
}

for tab in fields:
df = getattr(ocel, tab)
for fie in fields[tab]:
df.dropna(subset=[fie], how="any", inplace=True)
df[fie] = df[fie].astype("string")
df = df[df[fie].str.len() > 0]

return ocel

0 comments on commit 45dcc3d

Please sign in to comment.