Skip to content

Commit

Permalink
Merge branch '147-refactoring-log_to_fea_descr' into 'integration'
Browse files Browse the repository at this point in the history
[Priority 2] Refactoring log_to_fea_descr (for LLM abstraction; split in two logical methods)

Closes #147

See merge request process-mining/pm4py/pm4py-core!1108
  • Loading branch information
fit-alessandro-berti committed Oct 9, 2023
2 parents b9f74be + 034fe7f commit b3d88dc
Showing 1 changed file with 53 additions and 10 deletions.
63 changes: 53 additions & 10 deletions pm4py/algo/querying/llm/abstractions/log_to_fea_descr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,24 @@ def __transform_to_string(stru: str) -> str:
return stru


def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> str:
def textual_abstraction_from_fea_df(fea_df: pd.DataFrame, parameters: Optional[Dict[Any, Any]] = None) -> str:
"""
Returns the textual abstraction of ML features extracted from a traditional event log object.
Returns the textual abstraction of ML features already encoded in a feature table
Minimum viable example:
import pm4py
from pm4py.algo.querying.llm.abstractions import log_to_fea_descr
log = pm4py.read_xes("tests/input_data/receipt.xes", return_legacy_log_object=True)
fea_df = pm4py.extract_features_dataframe(log)
text_abstr = log_to_fea_descr.textual_abstraction_from_fea_df(fea_df)
print(text_abstr)
Parameters
---------------
log
Event log / Pandas dataframe
fea_df
Feature table (numeric features; stored as Pandas dataframe)
parameters
Parameters that should be provided to the feature extraction, plus:
- Parameters.INCLUDE_HEADER => includes a descriptive header in the returned text
Expand All @@ -93,12 +103,6 @@ def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[
include_header = exec_utils.get_param_value(Parameters.INCLUDE_HEADER, parameters, True)
max_len = exec_utils.get_param_value(Parameters.MAX_LEN, parameters, constants.OPENAI_MAX_LEN)

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

data, feature_names = log_to_features.apply(log, parameters=parameters)

fea_df = pd.DataFrame(data, columns=feature_names)

cols = []

for c in fea_df.columns:
Expand Down Expand Up @@ -134,3 +138,42 @@ def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[
i = i + 1

return ret


def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> str:
"""
Returns the textual abstraction of ML features extracted from a traditional event log object.
Minimum viable example:
import pm4py
from pm4py.algo.querying.llm.abstractions import log_to_fea_descr
log = pm4py.read_xes("tests/input_data/receipt.xes", return_legacy_log_object=True)
text_abstr = log_to_fea_descr.apply(log)
print(text_abstr)
Parameters
---------------
log
Event log / Pandas dataframe
parameters
Parameters that should be provided to the feature extraction, plus:
- Parameters.INCLUDE_HEADER => includes a descriptive header in the returned text
- Parameters.MAX_LEN => maximum length of the provided text (if necessary, only the most meaningful features are kept)
Returns
---------------
stru
Textual abstraction
"""
if parameters is None:
parameters = {}

log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

data, feature_names = log_to_features.apply(log, parameters=parameters)

fea_df = pd.DataFrame(data, columns=feature_names)

return textual_abstraction_from_fea_df(fea_df, parameters=parameters)

0 comments on commit b3d88dc

Please sign in to comment.