-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFp growth.py
More file actions
120 lines (103 loc) · 3.19 KB
/
Fp growth.py
File metadata and controls
120 lines (103 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import numpy as np
import time
from numba import jit
import numba as nb
from multiprocessing import Pool
from itertools import combinations
start_time = time.time()
file_path = 'date/test2.dat'
data =[]
with open(file_path, 'r') as file:
for line in file:
row = [int(x) for x in line.strip().split()]
data.append(row)
#debug
# for i in data:
# print(i)
# #count item count in itemCnt
itemCnt = np.zeros(27,dtype=int)
for i in data:
for j in i:
itemCnt[j]+=1
requireSize = 2
class TreeNode:
def __init__(self, value=None):
self.value = value
self.count = 1
self.children = {}
self.parent = None
class Trie:
def __init__(self):
self.root = TreeNode() # Root is a null node
def insert(self, array):
node = self.root
for element in array:
if itemCnt[element] < requireSize:
continue
if element not in node.children:
new_node = TreeNode(element)
new_node.parent = node # Set parent for traversal
node.children[element] = new_node
node = node.children[element]
node.count += 1
def get_all_path(self):
# nodeCnt={}
paths = []
self.dfs(self.root, [], paths)
return paths
def dfs(self, node, path, paths):
if not node.children: # If no children, it's a leaf
paths.append(path.copy())
for child in node.children.values():
path.append(child)
# nodeCnt[child.value]=child.cnt
self.dfs(child, path, paths)
path.pop() # Remove the last element for backtracking
def print_trie(node, level=0):
# Base case: if the node is None, return
if node is None:
return
# Print the current node
indent = " " * (level * 4)
node_info = f"{indent}Node: {node.value}, Count: {node.count}" if node.value is not None else "Root"
print(node_info)
# Recursively print each child
for child in node.children.values():
print_trie(child, level + 1)
def generate_combinations(arr):
result_set = []
for r in range(1, len(arr) + 1):
for combo in combinations(arr, r):
result_set.append(combo)
return result_set
FPTree=Trie()
for row in data:
row = sorted(row,key=lambda x: itemCnt[x], reverse=True)
# print(row)
FPTree.insert(row)
# print_trie(FPTree.root)
Allpathnode = FPTree.get_all_path()
print(Allpathnode)
frequent_item_set={}
for nodes in Allpathnode:
nodeCnt={}
path = []
# frequent_item_set={}
# path.clear()
# nodeCnt.clear()
for node in nodes:
path.append(node.value)
nodeCnt[node.value]=node.count
Allcomb = generate_combinations(path)
for comb in Allcomb:
comb_set = frozenset(comb)
minval = 10000
for element in comb_set:
minval = min(minval,nodeCnt[element])
if comb_set not in frequent_item_set:
frequent_item_set[comb_set] = minval
else:
frequent_item_set[comb_set] +=minval
# # Allpath.append(FPTree.traverse_to_root(leaf))
for key, value in frequent_item_set.items():
print(f'键:{key},值:{value}')