Skip to content

Commit 38447ab

Browse files
committed
refactoring APIs, hide url_meta
1 parent 7841b13 commit 38447ab

6 files changed

Lines changed: 46 additions & 38 deletions

File tree

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,8 @@ Usage
249249
250250
# dump pattern data
251251
formatter = PatternFormatter()
252-
for url_meta, cluster in pattern_maker.make():
253-
for pattern in formatter.format(url_meta, cluster):
252+
for cluster in pattern_maker.make():
253+
for pattern in formatter.format(cluster):
254254
print(pattern)
255255
256256

src/os_urlpattern/cmdline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ def _load(self, pattern_maker, args):
120120
def _process(self, pattern_maker, args):
121121
formatter = FORMATTERS[args.formatter]()
122122
s = time.time()
123-
for url_meta, pattern_tree in pattern_maker.make(combine=args.formatter == 'ETE'):
123+
for pattern_tree in pattern_maker.make(combine=args.formatter == 'ETE'):
124124
e = time.time()
125125
self._logger.debug('[CLUSTER] %d %.2fs',
126126
pattern_tree.root.count, e - s)
127-
for record in formatter.format(url_meta, pattern_tree):
127+
for record in formatter.format(pattern_tree):
128128
print(record)
129129
s = time.time()
130130

src/os_urlpattern/formatter.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@
88

99
class Formatter(object):
1010

11-
def format(self, url_meta, tree, **kwargs):
11+
def format(self, tree, **kwargs):
1212
pass
1313

1414

1515
class PatternFormatter(Formatter):
16-
def format(self, url_meta, clusterd_tree, **kwargs):
16+
def format(self, clusterd_tree, **kwargs):
17+
url_meta = clusterd_tree.url_meta
1718
for node_path in clusterd_tree.dump_paths():
1819
yield pack(url_meta, [p.pattern for p in node_path[1:]])
1920
break
2021

2122

2223
class ClusterFormatter(PatternFormatter):
23-
def format(self, url_meta, clusterd_tree, **kwargs):
24-
for r in super(ClusterFormatter, self).format(url_meta, clusterd_tree, **kwargs):
24+
def format(self, clusterd_tree, **kwargs):
25+
for r in super(ClusterFormatter, self).format(clusterd_tree, **kwargs):
2526
yield r
2627

2728
for node_path in clusterd_tree.dump_paths():
@@ -30,15 +31,17 @@ def format(self, url_meta, clusterd_tree, **kwargs):
3031

3132

3233
class JsonFormatter(Formatter):
33-
def format(self, url_meta, clusterd_tree, **kwargs):
34+
def format(self, clusterd_tree, **kwargs):
35+
url_meta = clusterd_tree.url_meta
3436
for node_path in clusterd_tree.dump_paths():
3537
p = pack(url_meta, [p.pattern for p in node_path[1:]])
3638
yield json.dumps({'ptn': p, 'cnt': clusterd_tree.count})
3739
break
3840

3941

4042
class ETEFormatter(Formatter):
41-
def format(self, url_meta, pattern_tree, **kwargs):
43+
def format(self, pattern_tree, **kwargs):
44+
url_meta = pattern_tree.url_meta
4245

4346
def f(pattern_node):
4447
sep = Symbols.EMPTY

src/os_urlpattern/pattern_cluster.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ def set_pattern(self, pattern):
110110

111111

112112
class ViewPieceBagBucket(PieceBagBucket):
113-
def __init__(self):
113+
def __init__(self, url_meta):
114114
super(ViewPieceBagBucket, self).__init__()
115-
self._tree = PiecePatternTree()
115+
self._tree = PiecePatternTree(url_meta)
116116

117117
def add(self, view_piece_bag, build_tree=True):
118118
piece_bag = view_piece_bag.piece_bag
@@ -122,21 +122,20 @@ def add(self, view_piece_bag, build_tree=True):
122122
if not build_tree:
123123
return
124124
view = view_piece_bag.view
125+
125126
self._tree.add_from_parsed_pieces(
126127
view.parsed_pieces,
127128
count=piece_bag.count,
128129
uniq=False)
129130

130131
def cluster(self, config, **kwargs):
131-
p_num = len(self.pick().view.parsed_pieces)
132-
url_meta = URLMeta(p_num, [], False)
133-
for single_tree in cluster(config, url_meta, self._tree, **kwargs):
134-
yield self._transfer(single_tree)
132+
for clustered_tree in cluster(config, self._tree, **kwargs):
133+
yield self._transfer(clustered_tree)
135134

136-
def _transfer(self, single_tree):
135+
def _transfer(self, clusterted_tree):
137136
pattern = None
138-
bucket = ViewPieceBagBucket()
139-
for path in single_tree.dump_paths():
137+
bucket = ViewPieceBagBucket(self._tree.url_meta)
138+
for path in clusterted_tree.dump_paths():
140139
piece = u''.join([p.piece for p in path[1:]])
141140
view_piece_bag = self[piece]
142141
bucket.add(view_piece_bag, False)
@@ -252,7 +251,7 @@ def _add_to_forward_cluster(self, piece_bag):
252251

253252
if vl == 3 and self._processor.meta_info.is_last_path():
254253
ldsf_view = LastDotSplitFuzzyView(parsed_piece)
255-
if view.view == ldsf_view.view:
254+
if view == ldsf_view:
256255
view = ldsf_view
257256
p_cls = LastDotSplitFuzzyPatternCluster
258257
elif vl > 3:
@@ -265,7 +264,7 @@ def _add_to_forward_cluster(self, piece_bag):
265264
elif vl - mvl >= self._min_cluster_num:
266265
if mvl == 3 and self._processor.meta_info.is_last_path():
267266
ldsf_view = LastDotSplitFuzzyView(parsed_piece)
268-
if mixed_view.view == ldsf_view.view:
267+
if mixed_view == ldsf_view:
269268
view = ldsf_view
270269
p_cls = LastDotSplitFuzzyPatternCluster
271270
else:
@@ -376,9 +375,9 @@ def _set_pattern(self, bucket, pattern):
376375

377376
def add(self, view_piece_bag):
378377
view = view_piece_bag.view
379-
view = view.view
380378
if view not in self._buckets:
381-
self._buckets[view] = ViewPieceBagBucket()
379+
url_meta = URLMeta(len(view.parsed_pieces), [], False)
380+
self._buckets[view] = ViewPieceBagBucket(url_meta)
382381
self._buckets[view].add(view_piece_bag)
383382

384383

@@ -394,7 +393,7 @@ def _add_to_forward_cluster(self, view_piece_bag):
394393

395394
p_cls = MixedPatternCluster
396395

397-
if view.view == mixed_view.view:
396+
if view == mixed_view:
398397
if self._processor.meta_info.is_last_path():
399398
ldsf_view = LastDotSplitFuzzyView(parsed_piece)
400399
if len(ldsf_view.parsed_pieces) == 1:
@@ -416,7 +415,7 @@ def _add_to_forward_cluster(self, view_piece_bag):
416415
return
417416
elif mvl == 3 and self._processor.meta_info.is_last_path():
418417
ldsf_view = LastDotSplitFuzzyView(parsed_piece)
419-
if mixed_view.view == ldsf_view.view:
418+
if mixed_view == ldsf_view:
420419
view = ldsf_view
421420
p_cls = LastDotSplitFuzzyPatternCluster
422421

@@ -635,12 +634,13 @@ def _create_next_level_processors(self):
635634
processor.add(node, add_children=True)
636635

637636

638-
def split_by_pattern(url_meta, piece_pattern_tree):
637+
def split_by_pattern(piece_pattern_tree):
638+
url_meta = piece_pattern_tree.url_meta
639639
trees = {}
640640
for path in piece_pattern_tree.dump_paths():
641641
pid = digest(url_meta, [p.pattern for p in path[1:]])
642642
if pid not in trees:
643-
trees[pid] = PiecePatternTree()
643+
trees[pid] = PiecePatternTree(url_meta)
644644
tree = trees[pid]
645645
tree.add_from_piece_pattern_node_path(path[1:])
646646

@@ -662,18 +662,19 @@ def _can_be_splited(processor):
662662
return False
663663

664664

665-
def process(config, url_meta, piece_pattern_tree, **kwargs):
665+
def process(config, piece_pattern_tree, **kwargs):
666+
url_meta = piece_pattern_tree.url_meta
666667
meta_info = MetaInfo(url_meta, 0)
667668
processor = ClusterProcessor(config, meta_info, None, **kwargs)
668669
processor.add(piece_pattern_tree.root)
669670
processor.process()
670671
return _can_be_splited(processor)
671672

672673

673-
def cluster(config, url_meta, piece_pattern_tree, **kwargs):
674-
if not process(config, url_meta, piece_pattern_tree, **kwargs):
674+
def cluster(config, piece_pattern_tree, **kwargs):
675+
if not process(config, piece_pattern_tree, **kwargs):
675676
yield piece_pattern_tree
676677
return
677-
for sub_piece_pattern_tree in split_by_pattern(url_meta, piece_pattern_tree):
678-
for tree in cluster(config, url_meta, sub_piece_pattern_tree, **kwargs):
678+
for sub_piece_pattern_tree in split_by_pattern(piece_pattern_tree):
679+
for tree in cluster(config, sub_piece_pattern_tree, **kwargs):
679680
yield tree

src/os_urlpattern/pattern_maker.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Maker(object):
3232
def __init__(self, config, url_meta):
3333
self._config = config
3434
self._url_meta = url_meta
35-
self._piece_pattern_tree = PiecePatternTree()
35+
self._piece_pattern_tree = PiecePatternTree(url_meta)
3636

3737
def load(self, parsed_pieces, count=1, uniq=True, data=None):
3838
return self._piece_pattern_tree.add_from_parsed_pieces(
@@ -44,16 +44,15 @@ def _path_dump_and_load(self, src, dest, index=0):
4444
dest.load_path(path[index:])
4545

4646
def cluster(self):
47-
for clusterd_tree in cluster(self._config, self._url_meta,
48-
self._piece_pattern_tree):
47+
for clusterd_tree in cluster(self._config, self._piece_pattern_tree):
4948
yield clusterd_tree
5049

5150
def make(self, combine):
5251
if combine:
5352
pattern_tree = PatternTree(self._url_meta)
5453
for clustered_tree in self.cluster():
5554
self._path_dump_and_load(clustered_tree, pattern_tree, 1)
56-
yield self._url_meta, pattern_tree
55+
yield pattern_tree
5756
else:
5857
for clustered_tree in self.cluster():
59-
yield self._url_meta, clustered_tree
58+
yield clustered_tree

src/os_urlpattern/piece_pattern_tree.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,13 @@ def extra_data(self):
119119

120120

121121
class PiecePatternTree(object):
122-
def __init__(self):
122+
def __init__(self, url_meta):
123123
self._root = PiecePatternNode(EMPTY_PARSED_PIECE)
124+
self._url_meta = url_meta
125+
126+
@property
127+
def url_meta(self):
128+
return self._url_meta
124129

125130
@property
126131
def root(self):

0 commit comments

Comments
 (0)