11from collections import Counter , OrderedDict , namedtuple
22
3- from .compat import iteritems , itervalues
4- from .parse_utils import URLMeta , digest , number_rule , wildcard_rule
5- from . parsed_piece_view import ( BaseView , LastDotSplitFuzzyView ,
6- MixedView )
3+ from .compat import itervalues
4+ from .parse_utils import ( EMPTY_PARSED_PIECE , URLMeta , number_rule ,
5+ wildcard_rule )
6+ from . parsed_piece_view import BaseView , LastDotSplitFuzzyView , MixedView
77from .pattern import Pattern
8- from .piece_pattern_tree import PiecePatternNode , PiecePatternTree
9- from .utils import Bag
8+ from .piece_pattern_node import (PiecePatternNode , build_from_parsed_pieces ,
9+ build_from_piece_pattern_nodes )
10+ from .utils import Bag , dump_tree
1011
1112
1213class TBag (Bag ):
@@ -112,7 +113,8 @@ def set_pattern(self, pattern):
112113class ViewPieceBagBucket (PieceBagBucket ):
113114 def __init__ (self , url_meta ):
114115 super (ViewPieceBagBucket , self ).__init__ ()
115- self ._tree = PiecePatternTree (url_meta )
116+ self ._url_meta = url_meta
117+ self ._root = PiecePatternNode (EMPTY_PARSED_PIECE )
116118
117119 def add (self , view_piece_bag , build_tree = True ):
118120 piece_bag = view_piece_bag .piece_bag
@@ -123,24 +125,23 @@ def add(self, view_piece_bag, build_tree=True):
123125 return
124126 view = view_piece_bag .view
125127
126- self ._tree .add_from_parsed_pieces (
127- view .parsed_pieces ,
128- count = piece_bag .count ,
129- uniq = False )
128+ build_from_parsed_pieces (
129+ self ._root , view .parsed_pieces , count = piece_bag .count , uniq = False )
130130
131131 def cluster (self , config , ** kwargs ):
132- for clustered_tree in cluster (config , self ._tree , ** kwargs ):
133- yield self ._transfer (clustered_tree )
132+ for clustered in cluster (config , self ._url_meta , self . _root , ** kwargs ):
133+ yield self ._transfer (clustered )
134134
135- def _transfer (self , clusterted_tree ):
135+ def _transfer (self , root ):
136136 pattern = None
137- bucket = ViewPieceBagBucket (self ._tree . url_meta )
138- for path in clusterted_tree . dump_paths ( ):
139- piece = u'' .join ([p .piece for p in path [1 :]])
137+ bucket = ViewPieceBagBucket (self ._url_meta )
138+ for nodes in dump_tree ( root ):
139+ piece = u'' .join ([p .piece for p in nodes [1 :]])
140140 view_piece_bag = self [piece ]
141141 bucket .add (view_piece_bag , False )
142142 if pattern is None :
143- pattern = Pattern (u'' .join ([str (p .pattern ) for p in path [1 :]]))
143+ pattern = Pattern (
144+ u'' .join ([str (p .pattern ) for p in nodes [1 :]]))
144145 return bucket , pattern
145146
146147
@@ -599,7 +600,7 @@ def _process(self):
599600 def add (self , node , add_children = False ):
600601 c = self .get_cluster (PiecePatternCluster )
601602 if add_children :
602- for child in node .iter_children () :
603+ for child in node .children :
603604 c .add (child )
604605 else :
605606 c .add (node )
@@ -634,17 +635,16 @@ def _create_next_level_processors(self):
634635 processor .add (node , add_children = True )
635636
636637
637- def split_by_pattern (piece_pattern_tree ):
638- url_meta = piece_pattern_tree .url_meta
639- trees = {}
640- for path in piece_pattern_tree .dump_paths ():
641- pid = digest (url_meta , [p .pattern for p in path [1 :]])
642- if pid not in trees :
643- trees [pid ] = PiecePatternTree (url_meta )
644- tree = trees [pid ]
645- tree .add_from_piece_pattern_node_path (path [1 :])
638+ def split_by_pattern (root ):
639+ tree_roots = {}
640+ for nodes in dump_tree (root ):
641+ pid = hash (u"/" .join ([str (p .pattern ) for p in nodes ]))
642+ if pid not in tree_roots :
643+ tree_roots [pid ] = PiecePatternNode (EMPTY_PARSED_PIECE )
644+ sub_root = tree_roots [pid ]
645+ build_from_piece_pattern_nodes (sub_root , nodes [1 :])
646646
647- return itervalues (trees )
647+ return itervalues (tree_roots )
648648
649649
650650def _can_be_splited (processor ):
@@ -662,19 +662,18 @@ def _can_be_splited(processor):
662662 return False
663663
664664
665- def process (config , piece_pattern_tree , ** kwargs ):
666- url_meta = piece_pattern_tree .url_meta
665+ def process (config , url_meta , root , ** kwargs ):
667666 meta_info = MetaInfo (url_meta , 0 )
668667 processor = ClusterProcessor (config , meta_info , None , ** kwargs )
669- processor .add (piece_pattern_tree . root )
668+ processor .add (root )
670669 processor .process ()
671670 return _can_be_splited (processor )
672671
673672
674- def cluster (config , piece_pattern_tree , ** kwargs ):
675- if not process (config , piece_pattern_tree , ** kwargs ):
676- yield piece_pattern_tree
673+ def cluster (config , url_meta , root , ** kwargs ):
674+ if not process (config , url_meta , root , ** kwargs ):
675+ yield root
677676 return
678- for sub_piece_pattern_tree in split_by_pattern (piece_pattern_tree ):
679- for tree in cluster (config , sub_piece_pattern_tree , ** kwargs ):
680- yield tree
677+ for sub_root in split_by_pattern (root ):
678+ for clustered in cluster (config , url_meta , sub_root , ** kwargs ):
679+ yield clustered
0 commit comments