Skip to content

Commit

Permalink
Merge branch 'integration' into 147-refactoring-log_to_fea_descr
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Oct 6, 2023
2 parents 84af656 + f4e53a8 commit 034fe7f
Show file tree
Hide file tree
Showing 48 changed files with 236 additions and 285 deletions.
38 changes: 38 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,43 @@
# Changelog of pm4py

## pm4py 2.7.8 (2023.10.06)

### Added

### Changed
* 634b6a5ac1b40963baa76a42a10c3c22176aaf84
f6993293d665e2f7b69c27ce0f09d2df4e889b0b
f0240670292086cb3b6fe523b1646dcfa4c71ddc
* Refactoring OCEL import/export
* c1379120480539f5578a52ce6d76effb4819b3c6
* centralized enabling/disabling of TQDM progress bar + Disabling progress bar in tests
* 08c2c16d17d2cbe26224662032a298f6b0a409a9
* avoiding the necessity of re-creating setup.py when new packages are added to pm4py
* a7dc86f7fd821b5dd229ff404b5afa3b5ad919b4
* disable IM fallthroughs in the simplified interface

### Deprecated

### Fixed
* 063a6d64bae61f1b54444e0b34ec0926b504aa34
* properly closing file objects in different pm4py importers/exporters (XES, PNML, PTML, ...)
* 35f13b65a0523f889748679fbe90cf2d041e1038
* fixing XES importing warnings in obtaining the resulting pd.DataFrame
* ef548ef18f514ad6ad0a32a104f380b322ab72e7
* fixing test/examples execution
* d1b39bde1b14f160c0fff42bdc6b172bb0ae760e
* fix Petri net serialization
* e51c5e1e084a7fd7d13cb8d1381f868435762cca
* fixing TBR diagnostics when the methods are called on pd.DataFrame

### Removed

### Other
* 49a472d002890b35e3f59ef93fd75f2e35455715
* storing stable pm4py Python requirements for the old Python 3.8

---


## pm4py 2.7.7 (2023.09.22)

Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ if __name__ == "__main__":
pm4py can be installed on Python 3.9.x / 3.10.x / 3.11.x / 3.12.x by invoking:
*pip install -U pm4py*

pm4py is also running on older Python environments with different requirements sets, including:
- Python 3.8 (3.8.10): third_party/old_python_deps/requirements_py38.txt

## Requirements
pm4py depends on some other Python packages, with different levels of importance:
* *Essential requirements*: numpy, pandas, deprecation, networkx
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = '2.7'
# The full version, including alpha/beta/rc tags
release = '2.7.7'
release = '2.7.8'

# -- General configuration ---------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion examples/activities_to_alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def execute_script():
dataframe = pm4py.read_xes("../tests/input_data/running-example.xes")
dataframe = pm4py.read_xes("../tests/input_data/running-example.xes", return_legacy_log_object=False)
renamed_dataframe = activities_to_alphabet.apply(dataframe, parameters={constants.PARAMETER_CONSTANT_ACTIVITY_KEY: "concept:name"})
print(renamed_dataframe)

Expand Down
2 changes: 1 addition & 1 deletion examples/cost_based_dfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def execute_script():
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"))
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "roadtraffic100traces.xes"), return_legacy_log_object=False)
cost_based_dfg = df_statistics.get_dfg_graph(log, measure="cost", cost_attribute="amount")
gviz = dfg_visualizer.apply(cost_based_dfg, variant=dfg_visualizer.Variants.COST, parameters={"format": "svg"})
dfg_visualizer.view(gviz)
Expand Down
2 changes: 1 addition & 1 deletion examples/inductive_miner_dfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def execute_script():
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"), return_legacy_log_object=False)
typed_dfg_1 = pm4py.discover_dfg_typed(log)
# in alternative ...
dfg, sa, ea = pm4py.discover_dfg(log)
Expand Down
2 changes: 1 addition & 1 deletion examples/inductive_miner_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def execute_script():
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"))
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "running-example.xes"), return_legacy_log_object=False)
variants = pm4py.get_variants(log)
uvcl = UVCL()
for var, occ in variants.items():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def apply_log(log, list_nets, parameters=None):
if parameters is None:
parameters = {}

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
icache = exec_utils.get_param_value(Parameters.ICACHE, parameters, dict())
mcache = exec_utils.get_param_value(Parameters.MCACHE, parameters, dict())

Expand Down
4 changes: 2 additions & 2 deletions pm4py/algo/conformance/alignments/petri_net/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from typing import Optional, Dict, Any, Union, Tuple
from pm4py.objects.log.obj import EventLog, EventStream, Trace
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.util import typing
from pm4py.util import typing, constants
import pandas as pd


Expand Down Expand Up @@ -328,7 +328,7 @@ def __get_variants_structure(log, parameters):


def __get_progress_bar(num_variants, parameters):
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
progress = None
if importlib.util.find_spec("tqdm") and show_progress_bar and num_variants > 1:
from tqdm.auto import tqdm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def apply_variant(variant, tree, parameters=None):


def _construct_progress_bar(progress_length, parameters):
if exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True) and importlib.util.find_spec("tqdm"):
if exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR) and importlib.util.find_spec("tqdm"):
if progress_length > 1:
from tqdm.auto import tqdm
return tqdm(total=progress_length, desc="aligning log, completed variants :: ")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from enum import Enum
from pm4py.util import exec_utils
from pm4py.util import constants
from pm4py.objects.conversion.log import converter as log_converter


class Parameters(Enum):
Expand Down Expand Up @@ -76,6 +77,8 @@ def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, p
if parameters is None:
parameters = {}

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
diagnostics = {}

Expand Down Expand Up @@ -129,6 +132,8 @@ def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
if parameters is None:
parameters = {}

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
diagnostics = {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pm4py.objects.log.obj import EventLog, Trace, Event
from pm4py.objects.log.util import basic_filter
from pm4py.util import exec_utils
from pm4py.objects.conversion.log import converter as log_converter


class Parameters(Enum):
Expand Down Expand Up @@ -128,6 +129,8 @@ def diagnose_from_trans_fitness(log, trans_fitness, parameters=None):
if parameters is None:
parameters = {}

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

diagnostics = {}
string_attributes = exec_utils.get_param_value(Parameters.STRING_ATTRIBUTES, parameters, [])
numeric_attributes = exec_utils.get_param_value(Parameters.NUMERIC_ATTRIBUTES, parameters, [])
Expand Down Expand Up @@ -209,6 +212,8 @@ def diagnose_from_notexisting_activities(log, notexisting_activities_in_model, p
if parameters is None:
parameters = {}

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

diagnostics = {}
string_attributes = exec_utils.get_param_value(Parameters.STRING_ATTRIBUTES, parameters, [])
numeric_attributes = exec_utils.get_param_value(Parameters.NUMERIC_ATTRIBUTES, parameters, [])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ def apply(log: EventLog, net: PetriNet, initial_marking: Marking, final_marking:
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_util.DEFAULT_NAME_KEY)
consider_activities_not_in_model_in_fitness = exec_utils.get_param_value(Parameters.CONSIDER_ACTIVITIES_NOT_IN_MODEL_IN_FITNESS, parameters, False)

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

if type(log) is not pd.DataFrame:
Expand Down
2 changes: 1 addition & 1 deletion pm4py/algo/discovery/ilp/variants/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def apply(log0: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional
constants.DEFAULT_ARTIFICIAL_START_ACTIVITY)
artificial_end_activity = exec_utils.get_param_value(Parameters.PARAM_ARTIFICIAL_END_ACTIVITY, parameters,
constants.DEFAULT_ARTIFICIAL_END_ACTIVITY)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)

log0 = log_converter.apply(log0, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
log0 = filtering_utils.keep_one_trace_per_variant(log0, parameters=parameters)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def align_fake_log_stop_marking(fake_log, net, marking, final_marking, parameter
if parameters is None:
parameters = {}

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
multiprocessing = exec_utils.get_param_value(Parameters.MULTIPROCESSING, parameters, constants.ENABLE_MULTIPROCESSING_DEFAULT)

progress = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def apply(log: EventLog, net: PetriNet, marking: Marking, final_marking: Marking
executor.Variants.TOKEN_REPLAY)
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, log_lib.util.xes.DEFAULT_NAME_KEY)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)

# default value for precision, when no activated transitions (not even by looking at the initial marking) are found
precision = 1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def apply(log: EventLog, petri_net: PetriNet, initial_marking: Marking, final_ma
token_replay_variant = exec_utils.get_param_value(Parameters.TOKEN_REPLAY_VARIANT, parameters,
executor.Variants.TOKEN_REPLAY)
cleaning_token_flood = exec_utils.get_param_value(Parameters.CLEANING_TOKEN_FLOOD, parameters, False)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)

parameters_tr = {token_replay.Parameters.ACTIVITY_KEY: activity_key,
Expand Down
14 changes: 9 additions & 5 deletions pm4py/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def discover_petri_net_alpha_plus(log: Union[EventLog, pd.DataFrame], activity_k
return alpha_miner.apply(log, variant=alpha_miner.Variants.ALPHA_VERSION_PLUS, parameters=get_properties(log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key))


def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, noise_threshold: float = 0.0, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> Tuple[
def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, noise_threshold: float = 0.0, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> Tuple[
PetriNet, Marking, Marking]:
"""
Discovers a Petri net using the inductive miner algorithm.
Expand All @@ -290,6 +290,7 @@ def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_
:param activity_key: attribute to be used for the activity
:param timestamp_key: attribute to be used for the timestamp
:param case_id_key: attribute to be used as case identifier
:param disable_fallthroughs: disable the Inductive Miner fall-throughs
:rtype: ``Tuple[PetriNet, Marking, Marking]``
.. code-block:: python3
Expand All @@ -308,7 +309,7 @@ def discover_petri_net_inductive(log: Union[EventLog, pd.DataFrame, DFG], multi_
log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)

pt = discover_process_tree_inductive(
log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, disable_fallthroughs=disable_fallthroughs)
from pm4py.convert import convert_to_petri_net
return convert_to_petri_net(pt)

Expand Down Expand Up @@ -357,7 +358,7 @@ def discover_petri_net_heuristics(log: Union[EventLog, pd.DataFrame], dependency
return heuristics_miner.apply(log, parameters=parameters)


def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> ProcessTree:
def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> ProcessTree:
"""
Discovers a process tree using the inductive miner algorithm
Expand All @@ -371,6 +372,7 @@ def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noi
:param multi_processing: boolean that enables/disables multiprocessing in inductive miner
:param timestamp_key: attribute to be used for the timestamp
:param case_id_key: attribute to be used as case identifier
:param disable_fallthroughs: disable the Inductive Miner fall-throughs
:rtype: ``ProcessTree``
.. code-block:: python3
Expand All @@ -393,6 +395,7 @@ def discover_process_tree_inductive(log: Union[EventLog, pd.DataFrame, DFG], noi
log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
parameters["noise_threshold"] = noise_threshold
parameters["multiprocessing"] = multi_processing
parameters["disable_fallthroughs"] = disable_fallthroughs

variant = inductive_miner.Variants.IMf if noise_threshold > 0 else inductive_miner.Variants.IM

Expand Down Expand Up @@ -539,7 +542,7 @@ def discover_eventually_follows_graph(log: Union[EventLog, pd.DataFrame], activi
return get.apply(log, parameters=properties)


def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name") -> BPMN:
def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_threshold: float = 0.0, multi_processing: bool = constants.ENABLE_MULTIPROCESSING_DEFAULT, activity_key: str = "concept:name", timestamp_key: str = "time:timestamp", case_id_key: str = "case:concept:name", disable_fallthroughs: bool = False) -> BPMN:
"""
Discovers a BPMN using the Inductive Miner algorithm
Expand All @@ -553,6 +556,7 @@ def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_thres
:param activity_key: attribute to be used for the activity
:param timestamp_key: attribute to be used for the timestamp
:param case_id_key: attribute to be used as case identifier
:param disable_fallthroughs: disable the Inductive Miner fall-throughs
:rtype: ``BPMN``
.. code-block:: python3
Expand All @@ -571,7 +575,7 @@ def discover_bpmn_inductive(log: Union[EventLog, pd.DataFrame, DFG], noise_thres
log, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)

pt = discover_process_tree_inductive(
log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key)
log, noise_threshold, multi_processing=multi_processing, activity_key=activity_key, timestamp_key=timestamp_key, case_id_key=case_id_key, disable_fallthroughs=disable_fallthroughs)
from pm4py.convert import convert_to_bpmn
return convert_to_bpmn(pt)

Expand Down
2 changes: 1 addition & 1 deletion pm4py/meta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__name__ = 'pm4py'
VERSION = '2.7.7'
VERSION = '2.7.8'
__version__ = VERSION
__doc__ = 'Process mining for Python'
__author__ = 'Fraunhofer Institute for Applied Information Technology FIT'
Expand Down
5 changes: 4 additions & 1 deletion pm4py/objects/bpmn/importer/variants/lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,10 @@ def apply(path, parameters=None):
from lxml import etree, objectify

parser = etree.XMLParser(remove_comments=True, encoding=encoding)
xml_tree = objectify.parse(path, parser=parser)

F = open(path, "rb")
xml_tree = objectify.parse(F, parser=parser)
F.close()

return import_xml_tree_from_root(xml_tree.getroot())

Expand Down
2 changes: 1 addition & 1 deletion pm4py/objects/log/exporter/xes/variants/etree_xes_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def __export_traces(log, root, parameters=None):
if parameters is None:
parameters = {}

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)

progress = None
if importlib.util.find_spec("tqdm") and show_progress_bar:
Expand Down
2 changes: 1 addition & 1 deletion pm4py/objects/log/exporter/xes/variants/line_by_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def export_log_line_by_line(log, fp_obj, encoding, parameters=None):
if parameters is None:
parameters = {}

show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, True)
show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)

progress = None
if importlib.util.find_spec("tqdm") and show_progress_bar:
Expand Down
Loading

0 comments on commit 034fe7f

Please sign in to comment.