Merge branch 'integration' into 147-refactoring-log_to_fea_descr

pm4py · Oct 6, 2023 · 034fe7f · 034fe7f
2 parents 84af656 + f4e53a8
commit 034fe7f
Show file tree

Hide file tree

Showing 48 changed files with 236 additions and 285 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,43 @@
 # Changelog of pm4py
 
+## pm4py 2.7.8 (2023.10.06)
+
+### Added
+
+### Changed
+* 634b6a5ac1b40963baa76a42a10c3c22176aaf84
+  f6993293d665e2f7b69c27ce0f09d2df4e889b0b
+  f0240670292086cb3b6fe523b1646dcfa4c71ddc
+  * Refactoring OCEL import/export
+* c1379120480539f5578a52ce6d76effb4819b3c6
+  * centralized enabling/disabling of TQDM progress bar + Disabling progress bar in tests
+* 08c2c16d17d2cbe26224662032a298f6b0a409a9
+  * avoiding the necessity of re-creating setup.py when new packages are added to pm4py
+* a7dc86f7fd821b5dd229ff404b5afa3b5ad919b4
+  * disable IM fallthroughs in the simplified interface
+
+### Deprecated
+
+### Fixed
+* 063a6d64bae61f1b54444e0b34ec0926b504aa34
+  * properly closing file objects in different pm4py importers/exporters (XES, PNML, PTML, ...)
+* 35f13b65a0523f889748679fbe90cf2d041e1038
+  * fixing XES importing warnings in obtaining the resulting pd.DataFrame
+* ef548ef18f514ad6ad0a32a104f380b322ab72e7
+  * fixing test/examples execution
+* d1b39bde1b14f160c0fff42bdc6b172bb0ae760e
+	* fix Petri net serialization
+* e51c5e1e084a7fd7d13cb8d1381f868435762cca
+	* fixing TBR diagnostics when the methods are called on pd.DataFrame
+
+### Removed
+
+### Other
+* 49a472d002890b35e3f59ef93fd75f2e35455715
+	* storing stable pm4py Python requirements for the old Python 3.8
+
+---
+
 
 ## pm4py 2.7.7 (2023.09.22)
 

diff --git a/README.md b/README.md
@@ -22,6 +22,9 @@ if __name__ == "__main__":
 pm4py can be installed on Python 3.9.x / 3.10.x / 3.11.x / 3.12.x by invoking:
 *pip install -U pm4py*
 
+pm4py is also running on older Python environments with different requirements sets, including:
+- Python 3.8 (3.8.10): third_party/old_python_deps/requirements_py38.txt
+
 ## Requirements
 pm4py depends on some other Python packages, with different levels of importance:
 * *Essential requirements*: numpy, pandas, deprecation, networkx

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = '2.7'
 # The full version, including alpha/beta/rc tags
-release = '2.7.7'
+release = '2.7.8'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/activities_to_alphabet.py b/examples/activities_to_alphabet.py
@@ -4,7 +4,7 @@
 
 
 def execute_script():
-    dataframe = pm4py.read_xes("../tests/input_data/running-example.xes")
+    dataframe = pm4py.read_xes("../tests/input_data/running-example.xes", return_legacy_log_object=False)
     renamed_dataframe = activities_to_alphabet.apply(dataframe, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"})
     print(renamed_dataframe)
 

diff --git a/examples/cost_based_dfg.py b/examples/cost_based_dfg.py
@@ -5,7 +5,7 @@
 
 
 def execute_script():
-    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"))
+    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"), return_legacy_log_object=False)
     cost_based_dfg = df_statistics.get_dfg_graph(log, measure="cost", cost_attribute="amount")
     gviz = dfg_visualizer.apply(cost_based_dfg, variant=dfg_visualizer.Variants.COST, parameters={"format": "svg"})
     dfg_visualizer.view(gviz)

diff --git a/examples/inductive_miner_dfg.py b/examples/inductive_miner_dfg.py
@@ -4,7 +4,7 @@
 
 
 def execute_script():
-    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
+    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"), return_legacy_log_object=False)
     typed_dfg_1 = pm4py.discover_dfg_typed(log)
     # in alternative ...
     dfg, sa, ea = pm4py.discover_dfg(log)

diff --git a/examples/inductive_miner_variants.py b/examples/inductive_miner_variants.py
@@ -7,7 +7,7 @@
 
 
 def execute_script():
-    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
+    log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"), return_legacy_log_object=False)
     variants = pm4py.get_variants(log)
     uvcl = UVCL()
     for var, occ in variants.items():

diff --git a/pm4py/algo/conformance/alignments/decomposed/variants/recompos_maximal.py b/pm4py/algo/conformance/alignments/decomposed/variants/recompos_maximal.py
@@ -159,7 +159,7 @@ def apply_log(log, list_nets, parameters=None):
     if parameters is None:
         parameters = {}
 
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
     icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict())
     mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict())
 

diff --git a/pm4py/algo/conformance/alignments/petri_net/algorithm.py b/pm4py/algo/conformance/alignments/petri_net/algorithm.py
@@ -15,7 +15,7 @@
 from typing import Optional, Dict, Any, Union, Tuple
 from pm4py.objects.log.obj import EventLog, EventStream, Trace
 from pm4py.objects.petri_net.obj import PetriNet, Marking
-from pm4py.util import typing
+from pm4py.util import typing, constants
 import pandas as pd
 
 
@@ -328,7 +328,7 @@ def __get_variants_structure(log, parameters):
 
 
 def __get_progress_bar(num_variants, parameters):
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
     progress = None
     if importlib.util.find_spec("tqdm") and show_progress_bar and num_variants > 1:
         from tqdm.auto import tqdm

diff --git a/pm4py/algo/conformance/alignments/process_tree/variants/search_graph_pt.py b/pm4py/algo/conformance/alignments/process_tree/variants/search_graph_pt.py
@@ -215,7 +215,7 @@ def apply_variant(variant, tree, parameters=None):
 
 
 def _construct_progress_bar(progress_length, parameters):
-    if exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True) and importlib.util.find_spec("tqdm"):
+    if exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR) and importlib.util.find_spec("tqdm"):
         if progress_length > 1:
             from tqdm.auto import tqdm
             return tqdm(total=progress_length, desc="aligning log, completed variants :: ")

diff --git a/pm4py/algo/conformance/tokenreplay/diagnostics/duration_diagnostics.py b/pm4py/algo/conformance/tokenreplay/diagnostics/duration_diagnostics.py
@@ -7,6 +7,7 @@
 from enum import Enum
 from pm4py.util import exec_utils
 from pm4py.util import constants
+from pm4py.objects.conversion.log import converter as log_converter
 
 
 class Parameters(Enum):
@@ -76,6 +77,8 @@ def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, p
     if parameters is None:
         parameters = {}
 
+    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
+
     timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
     diagnostics = {}
 
@@ -129,6 +132,8 @@ def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
     if parameters is None:
         parameters = {}
 
+    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
+
     timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
     diagnostics = {}
 

diff --git a/pm4py/algo/conformance/tokenreplay/diagnostics/root_cause_analysis.py b/pm4py/algo/conformance/tokenreplay/diagnostics/root_cause_analysis.py
@@ -6,6 +6,7 @@
 from pm4py.objects.log.obj import EventLog, Trace, Event
 from pm4py.objects.log.util import basic_filter
 from pm4py.util import exec_utils
+from pm4py.objects.conversion.log import converter as log_converter
 
 
 class Parameters(Enum):
@@ -128,6 +129,8 @@ def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
     if parameters is None:
         parameters = {}
 
+    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
+
     diagnostics = {}
     string_attributes = exec_utils.get_param_value(Parameters.STRING_ATTRIBUTES, parameters, [])
     numeric_attributes = exec_utils.get_param_value(Parameters.NUMERIC_ATTRIBUTES, parameters, [])
@@ -209,6 +212,8 @@ def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, p
     if parameters is None:
         parameters = {}
 
+    log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
+
     diagnostics = {}
     string_attributes = exec_utils.get_param_value(Parameters.STRING_ATTRIBUTES, parameters, [])
     numeric_attributes = exec_utils.get_param_value(Parameters.NUMERIC_ATTRIBUTES, parameters, [])

diff --git a/pm4py/algo/conformance/tokenreplay/variants/token_replay.py b/pm4py/algo/conformance/tokenreplay/variants/token_replay.py
@@ -1102,7 +1102,7 @@ def apply(log: EventLog, net: PetriNet, initial_marking: Marking, final_marking:
     activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY)
     consider_activities_not_in_model_in_fitness = exec_utils.get_param_value(Parameters.CONSIDER_ACTIVITIES_NOT_IN_MODEL_IN_FITNESS, parameters, False)
 
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
     case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     if type(log) is not pd.DataFrame:

diff --git a/pm4py/algo/discovery/ilp/variants/classic.py b/pm4py/algo/discovery/ilp/variants/classic.py
@@ -120,7 +120,7 @@ def apply(log0: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional
                                                            constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
     artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
                                                          constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
 
     log0 = log_converter.apply(log0, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
     log0 = filtering_utils.keep_one_trace_per_variant(log0, parameters=parameters)

diff --git a/pm4py/algo/evaluation/precision/variants/align_etconformance.py b/pm4py/algo/evaluation/precision/variants/align_etconformance.py
@@ -196,7 +196,7 @@ def align_fake_log_stop_marking(fake_log, net, marking, final_marking, parameter
     if parameters is None:
         parameters = {}
 
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
     multiprocessing = exec_utils.get_param_value(Parameters.MULTIPROCESSING, parameters, constants.ENABLE_MULTIPROCESSING_DEFAULT)
 
     progress = None

diff --git a/pm4py/algo/evaluation/precision/variants/etconformance_token.py b/pm4py/algo/evaluation/precision/variants/etconformance_token.py
@@ -70,7 +70,7 @@ def apply(log: EventLog, net: PetriNet, marking: Marking, final_marking: Marking
                                                       executor.Variants.TOKEN_REPLAY)
     activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, log_lib.util.xes.DEFAULT_NAME_KEY)
     case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
 
     # default value for precision, when no activated transitions (not even by looking at the initial marking) are found
     precision = 1.0

diff --git a/pm4py/algo/evaluation/replay_fitness/variants/token_replay.py b/pm4py/algo/evaluation/replay_fitness/variants/token_replay.py
@@ -87,7 +87,7 @@ def apply(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_ma
     token_replay_variant = exec_utils.get_param_value(Parameters.TOKEN_REPLAY_VARIANT, parameters,
                                                       executor.Variants.TOKEN_REPLAY)
     cleaning_token_flood = exec_utils.get_param_value(Parameters.CLEANING_TOKEN_FLOOD, parameters, False)
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
     case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
 
     parameters_tr = {token_replay.Parameters.ACTIVITY_KEY: activity_key,

diff --git a/pm4py/discovery.py b/pm4py/discovery.py
@@ -275,7 +275,7 @@ def discover_petri_net_alpha_plus(log: Union[EventLog, pd.DataFrame], activity_k
     return alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_PLUS, parameters=get_properties(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key))
 
 
-def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, noise_threshold: float = 0.0, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> Tuple[
+def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, noise_threshold: float = 0.0, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> Tuple[
         PetriNet, Marking, Marking]:
     """
     Discovers a Petri net using the inductive miner algorithm.
@@ -290,6 +290,7 @@ def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_
     :param activity_key: attribute to be used for the activity
     :param timestamp_key: attribute to be used for the timestamp
     :param case_id_key: attribute to be used as case identifier
+    :param disable_fallthroughs: disable the Inductive Miner fall-throughs
     :rtype: ``Tuple[PetriNet, Marking, Marking]``
 
     .. code-block:: python3
@@ -308,7 +309,7 @@ def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_
             log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
 
     pt = discover_process_tree_inductive(
-        log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
+        log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, disable_fallthroughs=disable_fallthroughs)
     from pm4py.convert import convert_to_petri_net
     return convert_to_petri_net(pt)
 
@@ -357,7 +358,7 @@ def discover_petri_net_heuristics(log: Union[EventLog, pd.DataFrame], dependency
         return heuristics_miner.apply(log, parameters=parameters)
 
 
-def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> ProcessTree:
+def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> ProcessTree:
     """
     Discovers a process tree using the inductive miner algorithm
 
@@ -371,6 +372,7 @@ def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noi
     :param multi_processing: boolean that enables/disables multiprocessing in inductive miner
     :param timestamp_key: attribute to be used for the timestamp
     :param case_id_key: attribute to be used as case identifier
+    :param disable_fallthroughs: disable the Inductive Miner fall-throughs
     :rtype: ``ProcessTree``
 
     .. code-block:: python3
@@ -393,6 +395,7 @@ def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noi
         log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
     parameters["noise_threshold"] = noise_threshold
     parameters["multiprocessing"] = multi_processing
+    parameters["disable_fallthroughs"] = disable_fallthroughs
 
     variant = inductive_miner.Variants.IMf if noise_threshold > 0 else inductive_miner.Variants.IM
 
@@ -539,7 +542,7 @@ def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame], activi
         return get.apply(log, parameters=properties)
 
 
-def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> BPMN:
+def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> BPMN:
     """
     Discovers a BPMN using the Inductive Miner algorithm
 
@@ -553,6 +556,7 @@ def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_thres
     :param activity_key: attribute to be used for the activity
     :param timestamp_key: attribute to be used for the timestamp
     :param case_id_key: attribute to be used as case identifier
+    :param disable_fallthroughs: disable the Inductive Miner fall-throughs
     :rtype: ``BPMN``
 
     .. code-block:: python3
@@ -571,7 +575,7 @@ def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_thres
             log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
 
     pt = discover_process_tree_inductive(
-        log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
+        log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, disable_fallthroughs=disable_fallthroughs)
     from pm4py.convert import convert_to_bpmn
     return convert_to_bpmn(pt)
 

diff --git a/pm4py/meta.py b/pm4py/meta.py
@@ -1,5 +1,5 @@
 __name__ = 'pm4py'
-VERSION = '2.7.7'
+VERSION = '2.7.8'
 __version__ = VERSION
 __doc__ = 'Process mining for Python'
 __author__ = 'Fraunhofer Institute for Applied Information Technology FIT'

diff --git a/pm4py/objects/bpmn/importer/variants/lxml.py b/pm4py/objects/bpmn/importer/variants/lxml.py
@@ -285,7 +285,10 @@ def apply(path, parameters=None):
     from lxml import etree, objectify
 
     parser = etree.XMLParser(remove_comments=True, encoding=encoding)
-    xml_tree = objectify.parse(path, parser=parser)
+
+    F = open(path, "rb")
+    xml_tree = objectify.parse(F, parser=parser)
+    F.close()
 
     return import_xml_tree_from_root(xml_tree.getroot())
 

diff --git a/pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py b/pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py
@@ -238,7 +238,7 @@ def __export_traces(log, root, parameters=None):
     if parameters is None:
         parameters = {}
 
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
 
     progress = None
     if importlib.util.find_spec("tqdm") and show_progress_bar:

diff --git a/pm4py/objects/log/exporter/xes/variants/line_by_line.py b/pm4py/objects/log/exporter/xes/variants/line_by_line.py
@@ -195,7 +195,7 @@ def export_log_line_by_line(log, fp_obj, encoding, parameters=None):
     if parameters is None:
         parameters = {}
 
-    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
+    show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
 
     progress = None
     if importlib.util.find_spec("tqdm") and show_progress_bar: