1+ """Command line tools.
2+
3+ pattern-make:
4+ Load URLs, cluster and generate URL pattern.
5+
6+ pattern-matcher:
7+ Load pattern, match the URL and get matched results.
8+
9+ """
110from __future__ import print_function
211
312import argparse
4- import logging
13+ import logging . config
514import sys
615import time
716from collections import Counter
8- from logging .config import dictConfig
917
1018from .compat import binary_stdin , binary_stdout
1119from .config import get_default_config
1523from .formatter import FORMATTERS
1624from .pattern_maker import PatternMaker
1725from .pattern_matcher import PatternMatcher
18- from .utils import LogSpeedAdapter , pretty_counter , MemoryUsageFormatter
26+ from .utils import LogSpeedAdapter , MemoryUsageFormatter , pretty_counter
27+
28+ _DEFAULT_LOGGING = {
29+ 'version' : 1 ,
30+ 'disable_existing_loggers' : True ,
31+ 'incremental' : True ,
32+ }
1933
2034
2135def _config_logging (log_level ):
22- dictConfig (_DEFAULT_LOGGING )
36+ logging . config . dictConfig (_DEFAULT_LOGGING )
2337 if log_level == 'NOTSET' :
2438 handler = logging .NullHandler ()
2539 else :
@@ -89,6 +103,7 @@ def add_argument(self, parser):
89103 type = lambda s : s .upper ())
90104
91105 def _load (self , pattern_maker , args ):
106+ load_url = args .formatter == 'CLUSTER'
92107 stats = Counter ()
93108 speed_logger = LogSpeedAdapter (self ._logger , 5000 )
94109 for url in args .file [0 ]:
@@ -100,7 +115,8 @@ def _load(self, pattern_maker, args):
100115 speed_logger .debug ('[LOADING]' )
101116 try :
102117 url = url .decode (DEFAULT_ENCODING )
103- _ , is_new = pattern_maker .load (url )
118+ _ , is_new = pattern_maker .load (
119+ url , meta = url if load_url else None )
104120 if is_new :
105121 stats ['UNIQ' ] += 1
106122 stats ['VALID' ] += 1
@@ -119,20 +135,18 @@ def _load(self, pattern_maker, args):
119135 self ._logger .debug ('[LOADED] %s' , pretty_counter (stats ))
120136
121137 def _process (self , pattern_maker , args ):
138+ combine = args .formatter == 'ETE'
122139 formatter = FORMATTERS [args .formatter ]()
123140 s = time .time ()
124- combine = args .formatter == 'ETE'
125141 for maker in pattern_maker .makers :
126- for url_meta , root in maker .make (combine ):
142+ for root in maker .make (combine ):
127143 e = time .time ()
128144 self ._logger .debug ('[CLUSTER] %d %.2fs' , root .count , e - s )
129- for record in formatter .format (url_meta , root ):
145+ for record in formatter .format (maker . url_meta , root ):
130146 print (record )
131147 s = time .time ()
132148
133149 def _confirm_config (self , args ):
134- if args .formatter != 'CLUSTER' :
135- self ._config .set ('make' , 'drop_url' , 'true' )
136150 self ._config .freeze ()
137151
138152 def run (self , args ):
@@ -165,7 +179,7 @@ def add_argument(self, parser):
165179 def _load (self , pattern_matcher , args ):
166180 stats = Counter ()
167181 io_input = args .pattern_file [0 ]
168- self ._logger .debug ('[LOAD] Start %s' , io_input .name )
182+ self ._logger .debug ('[LOAD] Pattrn file: %s' , io_input .name )
169183 speed_logger = LogSpeedAdapter (self ._logger , 1000 )
170184 for line in io_input :
171185 speed_logger .debug ('[LOADING]' )
@@ -224,13 +238,6 @@ def run(self, args):
224238 self ._match (pattern_matcher , args )
225239
226240
227- _DEFAULT_LOGGING = {
228- 'version' : 1 ,
229- 'disable_existing_loggers' : True ,
230- 'incremental' : True ,
231- }
232-
233-
234241def _execute (command , argv = None ):
235242 argv = argv or sys .argv
236243 parser = argparse .ArgumentParser ()
0 commit comments