-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathcalculateSEDBetweenEventLogs.py
More file actions
49 lines (44 loc) · 2.04 KB
/
calculateSEDBetweenEventLogs.py
File metadata and controls
49 lines (44 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import argparse
import pandas as pd
from levenshtein import levenshtein
caseIDColName = "Case ID"
activityColName = "Activity"
def get_cases_dict(event_log):
caseToSequenceDict = dict()
currentCase = ""
sequence = None
for index, row in event_log.iterrows():
if row[caseIDColName] != currentCase:
if not sequence is None:
caseToSequenceDict[currentCase] = sequence
currentCase = row[caseIDColName]
sequence = ""
sequence = sequence + "@" + row[activityColName]
caseToSequenceDict[currentCase] = sequence
return caseToSequenceDict
def get_distance(variant1,variant2,distanceMatrix):
if distanceMatrix.get(variant1,None) is not None:
if distanceMatrix[variant1].get(variant2,None) is None:
distanceMatrix[variant1][variant2] = levenshtein(variant1,variant2)
if distanceMatrix.get(variant2,None) is None:
distanceMatrix[variant2] = dict()
distanceMatrix[variant2][variant1] = distanceMatrix[variant1][variant2]
else:
distanceMatrix[variant1] = dict()
distanceMatrix[variant1][variant2] = levenshtein(variant1, variant2)
if distanceMatrix.get(variant2,None) is None:
distanceMatrix[variant2] = dict()
distanceMatrix[variant2][variant1] = distanceMatrix[variant1][variant2]
return distanceMatrix[variant1][variant2]
def get_sed_between_logs(event_log_original, path_algo_log,distanceMatrix=dict()):
event_log2 = pd.read_csv(path_algo_log,delimiter=";")
event_log1_dict = get_cases_dict(event_log_original)
event_log2_dict = get_cases_dict(event_log2)
string_edit_distance = 0
for case in event_log1_dict.keys():
if event_log2_dict.get(case,None) is not None:
string_edit_distance = string_edit_distance + get_distance(event_log1_dict[case],event_log2_dict[case],distanceMatrix)
else:
string_edit_distance = string_edit_distance + event_log1_dict[case].count("@")
print(string_edit_distance)
return string_edit_distance