-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathg20_Pattern_Mining.py
More file actions
77 lines (64 loc) · 2.93 KB
/
g20_Pattern_Mining.py
File metadata and controls
77 lines (64 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
import pandas as pd
import matplotlib.pyplot as plt
import ds_functions as ds
import mlxtend.frequent_patterns as pm
def get_patterns(data,min_sup = 0.001):
patterns: pd.DataFrame = pm.fpgrowth(data, min_support=min_sup, use_colnames=True, verbose=True)
print(len(patterns),'patterns')
return patterns
def plot_patterns(patterns,var_min_sup):
nr_patterns = []
for sup in var_min_sup:
pat = patterns[patterns['support']>=sup]
nr_patterns.append(len(pat))
plt.figure(figsize=(6, 4))
ds.plot_line(var_min_sup, nr_patterns, title='Nr Patterns x Support', xlabel='support', ylabel='Nr Patterns')
plt.show()
def get_rules(patterns, min_conf):
rules = pm.association_rules(patterns, metric='confidence', min_threshold=min_conf*5, support_only=False)
print(f'\tfound {len(rules)} rules')
return rules
def plot_top_rules(rules: pd.DataFrame, metric: str, per_metric: str) -> None:
_, ax = plt.subplots(figsize=(6, 3))
ax.grid(False)
ax.set_axis_off()
ax.set_title(f'TOP 10 per Min {per_metric} - {metric}', fontweight="bold")
text = ''
cols = ['antecedents', 'consequents']
rules[cols] = rules[cols].applymap(lambda x: tuple(x))
for i in range(len(rules)):
rule = rules.iloc[i]
text += f"{rule['antecedents']} ==> {rule['consequents']}"
text += f"(s: {rule['support']:.2f}, c: {rule['confidence']:.2f}, lift: {rule['lift']:.2f})\n"
ax.text(0, 0, text)
plt.show()
def analyse_per_metric(rules: pd.DataFrame, metric: str, metric_values: list) -> list:
print(f'Analyse per {metric}...')
conf = {'avg': [], 'top25%': [], 'top10': []}
lift = {'avg': [], 'top25%': [], 'top10': []}
top_conf = []
top_lift = []
nr_rules = []
for m in metric_values:
rs = rules[rules[metric] >= m]
nr_rules.append(len(rs))
conf['avg'].append(rs['confidence'].mean(axis=0))
lift['avg'].append(rs['lift'].mean(axis=0))
top_conf = rs.nlargest(int(0.25*len(rs)), 'confidence')
conf['top25%'].append(top_conf['confidence'].mean(axis=0))
top_lift = rs.nlargest(int(0.25*len(rs)), 'lift')
lift['top25%'].append(top_lift['lift'].mean(axis=0))
top_conf = rs.nlargest(10, 'confidence')
conf['top10'].append(top_conf['confidence'].mean(axis=0))
top_lift = rs.nlargest(10, 'lift')
lift['top10'].append(top_lift['lift'].mean(axis=0))
_, axs = plt.subplots(1, 2, figsize=(10, 5), squeeze=False)
ds.multiple_line_chart(metric_values, conf, ax=axs[0, 0], title=f'Avg Confidence x {metric}',
xlabel=metric, ylabel='Avg confidence')
ds.multiple_line_chart(metric_values, lift, ax=axs[0, 1], title=f'Avg Lift x {metric}',
xlabel=metric, ylabel='Avg lift')
plt.show()
plot_top_rules(top_conf, 'confidence', metric)
plot_top_rules(top_lift, 'lift', metric)
return nr_rules