@@ -194,6 +194,17 @@ public static class SelectorMapper
194194 private JexlScript expr = null ;
195195 private ErrorTracker errorTracker ;
196196
197+ // Cached counter references for performance
198+ private Counter urlFiltersRejectedCounter ;
199+ private Counter scheduleRejectedCounter ;
200+ private Counter waitForUpdateCounter ;
201+ private Counter exprRejectedCounter ;
202+ private Counter statusRejectedCounter ;
203+ private Counter scoreTooLowCounter ;
204+ private Counter intervalRejectedCounter ;
205+ private Counter hostsAffectedPerHostOverflowCounter ;
206+ private Counter urlsSkippedPerHostOverflowCounter ;
207+
197208 @ Override
198209 public void setup (
199210 Mapper <Text , CrawlDatum , FloatWritable , SelectorEntry >.Context context )
@@ -219,6 +230,32 @@ public void setup(
219230 expr = JexlUtil .parseExpression (conf .get (GENERATOR_EXPR , null ));
220231 // Initialize error tracker with cached counters
221232 errorTracker = new ErrorTracker (NutchMetrics .GROUP_GENERATOR , context );
233+ // Initialize cached counter references
234+ initCounters (context );
235+ }
236+
237+ /**
238+ * Initialize cached counter references to avoid repeated lookups in hot paths.
239+ */
240+ private void initCounters (Context context ) {
241+ urlFiltersRejectedCounter = context .getCounter (
242+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_URL_FILTERS_REJECTED_TOTAL );
243+ scheduleRejectedCounter = context .getCounter (
244+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_SCHEDULE_REJECTED_TOTAL );
245+ waitForUpdateCounter = context .getCounter (
246+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_WAIT_FOR_UPDATE_TOTAL );
247+ exprRejectedCounter = context .getCounter (
248+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_EXPR_REJECTED_TOTAL );
249+ statusRejectedCounter = context .getCounter (
250+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_STATUS_REJECTED_TOTAL );
251+ scoreTooLowCounter = context .getCounter (
252+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_SCORE_TOO_LOW_TOTAL );
253+ intervalRejectedCounter = context .getCounter (
254+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_INTERVAL_REJECTED_TOTAL );
255+ hostsAffectedPerHostOverflowCounter = context .getCounter (
256+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_HOSTS_AFFECTED_PER_HOST_OVERFLOW_TOTAL );
257+ urlsSkippedPerHostOverflowCounter = context .getCounter (
258+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_URLS_SKIPPED_PER_HOST_OVERFLOW_TOTAL );
222259 }
223260
224261 @ Override
@@ -230,8 +267,7 @@ public void map(Text key, CrawlDatum value, Context context)
230267 // URLFilters
231268 try {
232269 if (filters .filter (url .toString ()) == null ) {
233- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
234- NutchMetrics .GENERATOR_URL_FILTERS_REJECTED_TOTAL ).increment (1 );
270+ urlFiltersRejectedCounter .increment (1 );
235271 return ;
236272 }
237273 } catch (URLFilterException e ) {
@@ -245,8 +281,7 @@ public void map(Text key, CrawlDatum value, Context context)
245281 if (!schedule .shouldFetch (url , crawlDatum , curTime )) {
246282 LOG .debug ("-shouldFetch rejected '{}', fetchTime={}, curTime={}" , url ,
247283 crawlDatum .getFetchTime (), curTime );
248- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
249- NutchMetrics .GENERATOR_SCHEDULE_REJECTED_TOTAL ).increment (1 );
284+ scheduleRejectedCounter .increment (1 );
250285 return ;
251286 }
252287
@@ -255,8 +290,7 @@ public void map(Text key, CrawlDatum value, Context context)
255290 if (oldGenTime != null ) { // awaiting fetch & update
256291 if (oldGenTime .get () + genDelay > curTime ) { // still wait for
257292 // update
258- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
259- NutchMetrics .GENERATOR_WAIT_FOR_UPDATE_TOTAL ).increment (1 );
293+ waitForUpdateCounter .increment (1 );
260294 return ;
261295 }
262296 }
@@ -271,31 +305,27 @@ public void map(Text key, CrawlDatum value, Context context)
271305 // check expr
272306 if (expr != null ) {
273307 if (!crawlDatum .execute (expr , key .toString ())) {
274- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
275- NutchMetrics .GENERATOR_EXPR_REJECTED_TOTAL ).increment (1 );
308+ exprRejectedCounter .increment (1 );
276309 return ;
277310 }
278311 }
279312
280313 if (restrictStatus != -1 && restrictStatus != crawlDatum .getStatus ()) {
281- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
282- NutchMetrics .GENERATOR_STATUS_REJECTED_TOTAL ).increment (1 );
314+ statusRejectedCounter .increment (1 );
283315 return ;
284316 }
285317
286318 // consider only entries with a score superior to the threshold
287319 if (!Float .isNaN (scoreThreshold ) && sort < scoreThreshold ) {
288- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
289- NutchMetrics .GENERATOR_SCORE_TOO_LOW_TOTAL ).increment (1 );
320+ scoreTooLowCounter .increment (1 );
290321 return ;
291322 }
292323
293324 // consider only entries with a retry (or fetch) interval lower than
294325 // threshold
295326 if (intervalThreshold != -1
296327 && crawlDatum .getFetchInterval () > intervalThreshold ) {
297- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
298- NutchMetrics .GENERATOR_INTERVAL_REJECTED_TOTAL ).increment (1 );
328+ intervalRejectedCounter .increment (1 );
299329 return ;
300330 }
301331
@@ -332,6 +362,10 @@ public static class SelectorReducer extends
332362 private Map <String , HostDatum > hostDatumCache = new HashMap <>();
333363 private ErrorTracker errorTracker ;
334364
365+ // Cached counter references for performance
366+ private Counter hostsAffectedPerHostOverflowCounter ;
367+ private Counter urlsSkippedPerHostOverflowCounter ;
368+
335369 public void readHostDb () throws IOException {
336370 if (conf .get (GENERATOR_HOSTDB ) == null ) {
337371 return ;
@@ -426,10 +460,22 @@ public void setup(Context context) throws IOException {
426460 }
427461 // Initialize error tracker with cached counters
428462 errorTracker = new ErrorTracker (NutchMetrics .GROUP_GENERATOR , context );
463+ // Initialize cached counter references
464+ initReducerCounters (context );
429465
430466 readHostDb ();
431467 }
432468
469+ /**
470+ * Initialize cached counter references to avoid repeated lookups in hot paths.
471+ */
472+ private void initReducerCounters (Context context ) {
473+ hostsAffectedPerHostOverflowCounter = context .getCounter (
474+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_HOSTS_AFFECTED_PER_HOST_OVERFLOW_TOTAL );
475+ urlsSkippedPerHostOverflowCounter = context .getCounter (
476+ NutchMetrics .GROUP_GENERATOR , NutchMetrics .GENERATOR_URLS_SKIPPED_PER_HOST_OVERFLOW_TOTAL );
477+ }
478+
433479 @ Override
434480 public void cleanup (Context context )
435481 throws IOException , InterruptedException {
@@ -555,15 +601,13 @@ public void reduce(FloatWritable key, Iterable<SelectorEntry> values,
555601 hostCount [1 ] = 1 ;
556602 } else {
557603 if (hostCount [1 ] == (maxCount +1 )) {
558- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
559- NutchMetrics .GENERATOR_HOSTS_AFFECTED_PER_HOST_OVERFLOW_TOTAL ).increment (1 );
604+ hostsAffectedPerHostOverflowCounter .increment (1 );
560605 LOG .info (
561606 "Host or domain {} has more than {} URLs for all {} segments. Additional URLs won't be included in the fetchlist." ,
562607 hostordomain , maxCount , maxNumSegments );
563608 }
564609 // skip this entry
565- context .getCounter (NutchMetrics .GROUP_GENERATOR ,
566- NutchMetrics .GENERATOR_URLS_SKIPPED_PER_HOST_OVERFLOW_TOTAL ).increment (1 );
610+ urlsSkippedPerHostOverflowCounter .increment (1 );
567611 continue ;
568612 }
569613 }
0 commit comments