@@ -110,9 +110,9 @@ def set_pattern(self, pattern):
110110
111111
112112class ViewPieceBagBucket (PieceBagBucket ):
113- def __init__ (self ):
113+ def __init__ (self , url_meta ):
114114 super (ViewPieceBagBucket , self ).__init__ ()
115- self ._tree = PiecePatternTree ()
115+ self ._tree = PiecePatternTree (url_meta )
116116
117117 def add (self , view_piece_bag , build_tree = True ):
118118 piece_bag = view_piece_bag .piece_bag
@@ -122,21 +122,20 @@ def add(self, view_piece_bag, build_tree=True):
122122 if not build_tree :
123123 return
124124 view = view_piece_bag .view
125+
125126 self ._tree .add_from_parsed_pieces (
126127 view .parsed_pieces ,
127128 count = piece_bag .count ,
128129 uniq = False )
129130
130131 def cluster (self , config , ** kwargs ):
131- p_num = len (self .pick ().view .parsed_pieces )
132- url_meta = URLMeta (p_num , [], False )
133- for single_tree in cluster (config , url_meta , self ._tree , ** kwargs ):
134- yield self ._transfer (single_tree )
132+ for clustered_tree in cluster (config , self ._tree , ** kwargs ):
133+ yield self ._transfer (clustered_tree )
135134
136- def _transfer (self , single_tree ):
135+ def _transfer (self , clusterted_tree ):
137136 pattern = None
138- bucket = ViewPieceBagBucket ()
139- for path in single_tree .dump_paths ():
137+ bucket = ViewPieceBagBucket (self . _tree . url_meta )
138+ for path in clusterted_tree .dump_paths ():
140139 piece = u'' .join ([p .piece for p in path [1 :]])
141140 view_piece_bag = self [piece ]
142141 bucket .add (view_piece_bag , False )
@@ -252,7 +251,7 @@ def _add_to_forward_cluster(self, piece_bag):
252251
253252 if vl == 3 and self ._processor .meta_info .is_last_path ():
254253 ldsf_view = LastDotSplitFuzzyView (parsed_piece )
255- if view . view == ldsf_view . view :
254+ if view == ldsf_view :
256255 view = ldsf_view
257256 p_cls = LastDotSplitFuzzyPatternCluster
258257 elif vl > 3 :
@@ -265,7 +264,7 @@ def _add_to_forward_cluster(self, piece_bag):
265264 elif vl - mvl >= self ._min_cluster_num :
266265 if mvl == 3 and self ._processor .meta_info .is_last_path ():
267266 ldsf_view = LastDotSplitFuzzyView (parsed_piece )
268- if mixed_view . view == ldsf_view . view :
267+ if mixed_view == ldsf_view :
269268 view = ldsf_view
270269 p_cls = LastDotSplitFuzzyPatternCluster
271270 else :
@@ -376,9 +375,9 @@ def _set_pattern(self, bucket, pattern):
376375
377376 def add (self , view_piece_bag ):
378377 view = view_piece_bag .view
379- view = view .view
380378 if view not in self ._buckets :
381- self ._buckets [view ] = ViewPieceBagBucket ()
379+ url_meta = URLMeta (len (view .parsed_pieces ), [], False )
380+ self ._buckets [view ] = ViewPieceBagBucket (url_meta )
382381 self ._buckets [view ].add (view_piece_bag )
383382
384383
@@ -394,7 +393,7 @@ def _add_to_forward_cluster(self, view_piece_bag):
394393
395394 p_cls = MixedPatternCluster
396395
397- if view . view == mixed_view . view :
396+ if view == mixed_view :
398397 if self ._processor .meta_info .is_last_path ():
399398 ldsf_view = LastDotSplitFuzzyView (parsed_piece )
400399 if len (ldsf_view .parsed_pieces ) == 1 :
@@ -416,7 +415,7 @@ def _add_to_forward_cluster(self, view_piece_bag):
416415 return
417416 elif mvl == 3 and self ._processor .meta_info .is_last_path ():
418417 ldsf_view = LastDotSplitFuzzyView (parsed_piece )
419- if mixed_view . view == ldsf_view . view :
418+ if mixed_view == ldsf_view :
420419 view = ldsf_view
421420 p_cls = LastDotSplitFuzzyPatternCluster
422421
@@ -635,12 +634,13 @@ def _create_next_level_processors(self):
635634 processor .add (node , add_children = True )
636635
637636
638- def split_by_pattern (url_meta , piece_pattern_tree ):
637+ def split_by_pattern (piece_pattern_tree ):
638+ url_meta = piece_pattern_tree .url_meta
639639 trees = {}
640640 for path in piece_pattern_tree .dump_paths ():
641641 pid = digest (url_meta , [p .pattern for p in path [1 :]])
642642 if pid not in trees :
643- trees [pid ] = PiecePatternTree ()
643+ trees [pid ] = PiecePatternTree (url_meta )
644644 tree = trees [pid ]
645645 tree .add_from_piece_pattern_node_path (path [1 :])
646646
@@ -662,18 +662,19 @@ def _can_be_splited(processor):
662662 return False
663663
664664
665- def process (config , url_meta , piece_pattern_tree , ** kwargs ):
665+ def process (config , piece_pattern_tree , ** kwargs ):
666+ url_meta = piece_pattern_tree .url_meta
666667 meta_info = MetaInfo (url_meta , 0 )
667668 processor = ClusterProcessor (config , meta_info , None , ** kwargs )
668669 processor .add (piece_pattern_tree .root )
669670 processor .process ()
670671 return _can_be_splited (processor )
671672
672673
673- def cluster (config , url_meta , piece_pattern_tree , ** kwargs ):
674- if not process (config , url_meta , piece_pattern_tree , ** kwargs ):
674+ def cluster (config , piece_pattern_tree , ** kwargs ):
675+ if not process (config , piece_pattern_tree , ** kwargs ):
675676 yield piece_pattern_tree
676677 return
677- for sub_piece_pattern_tree in split_by_pattern (url_meta , piece_pattern_tree ):
678- for tree in cluster (config , url_meta , sub_piece_pattern_tree , ** kwargs ):
678+ for sub_piece_pattern_tree in split_by_pattern (piece_pattern_tree ):
679+ for tree in cluster (config , sub_piece_pattern_tree , ** kwargs ):
679680 yield tree
0 commit comments