Skip to content

Commit

Permalink
fix(pm4py): refactoring discover_dfg_clean method
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Feb 7, 2024
1 parent c4d24d5 commit d36d0d2
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions pm4py/algo/discovery/dfg/variants/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from enum import Enum
from typing import Optional, Dict, Any

import numpy as np
import pandas as pd

from pm4py.objects.dfg.obj import DFG
Expand All @@ -15,9 +16,10 @@ class Parameters(Enum):
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY


CONST_AUX_ACT = 'aux_act_'
CONST_AUX_CASE = 'aux_case_'
CONST_COUNT = 'count_'
CONST_AUX_ACT_START = 'aux_act_start'
CONST_PROCESS_START = '#!$#PROCESS_START#!$#'
CONST_AUX_ACT_END = 'aux_act_end'
CONST_PROCESS_END = '#!$#PROCESS_END#!$#'


def apply(log: pd.DataFrame, parameters: Optional[Dict[str, Any]] = None) -> DFG:
Expand All @@ -28,30 +30,28 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[str, Any]] = None) -> DFG
Parameters.CASE_ID_KEY, parameters, constants.CASE_ATTRIBUTE_GLUE)
time_key = exec_utils.get_param_value(
Parameters.TIMESTAMP_KEY, parameters, xes_util.DEFAULT_TIMESTAMP_KEY)
aux_act = CONST_AUX_ACT + str(time.time())
aux_case = CONST_AUX_CASE + str(time.time())
count = CONST_COUNT + str(time.time())

df = log.sort_values([cid_key, time_key]).loc[:, [cid_key, act_key]].reset_index()
df[aux_act] = df[act_key].shift(-1)
df[aux_case] = df[cid_key].shift(-1)
dfg = DFG()

excl_starter = df.at[0, act_key]
borders = df[(df[cid_key] != df[aux_case])]
starters = borders.groupby([aux_act]).size().reset_index()
starters.columns = [aux_act, count]
starters.loc[starters[aux_act] == excl_starter, count] += 1
aux_act_start = CONST_AUX_ACT_START + str(time.time())
aux_act_end = CONST_AUX_ACT_END + str(time.time())
df[aux_act_start] = df.groupby(cid_key)[act_key].shift(1).replace(np.nan, CONST_PROCESS_START)
df[aux_act_end] = df.groupby(cid_key)[act_key].shift(-1).replace(np.nan, CONST_PROCESS_END)

starters = df[(df[aux_act_start] == CONST_PROCESS_START)]
borders = df[(df[aux_act_end] == CONST_PROCESS_END)]
connections = df[((df[aux_act_start] != CONST_PROCESS_START) & (df[aux_act_end] != CONST_PROCESS_END))]

dfg = DFG()

df = df[(df[cid_key] == df[aux_case])]
for (a, b, f) in list(df.groupby([act_key, aux_act]).size(
).reset_index().itertuples(index=False, name=None)):
for (a, b, f) in list(
connections.groupby([act_key, aux_act_end]).size().reset_index().itertuples(index=False, name=None)):
dfg.graph[(a, b)] += f

for (a, f) in list(starters.itertuples(index=False, name=None)):
for (a, f) in list(starters.groupby([act_key]).size().reset_index().itertuples(index=False, name=None)):
dfg.start_activities[a] += f

for (a, f) in list(borders.groupby(
[act_key]).size().reset_index().itertuples(index=False, name=None)):
for (a, f) in list(borders.groupby([act_key]).size().reset_index().itertuples(index=False, name=None)):
dfg.end_activities[a] += f

return dfg

0 comments on commit d36d0d2

Please sign in to comment.