From 15ce37d0944eb08e00576fc2cdc06b2e9f713f9c Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 7 Jan 2026 23:05:54 +0100 Subject: [PATCH] NUTCH-3144 URLUtil unit tests fail after upgrade to crawler-commons 1.6 - adapt unit tests to changes introduced in https://github.com/crawler-commons/crawler-commons/pull/478 - test for example given in Javadoc of getDomainSuffix --- src/java/org/apache/nutch/util/URLUtil.java | 2 +- src/test/org/apache/nutch/util/TestURLUtil.java | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/nutch/util/URLUtil.java b/src/java/org/apache/nutch/util/URLUtil.java index afd6f13857..44c6309d2a 100644 --- a/src/java/org/apache/nutch/util/URLUtil.java +++ b/src/java/org/apache/nutch/util/URLUtil.java @@ -262,7 +262,7 @@ public static String getDomainSuffix(URL url) { EffectiveTldFinder.EffectiveTLD suffix = EffectiveTldFinder.getEffectiveTLD(host, true); if (suffix != null) { - return suffix.getDomain(); + return suffix.getSuffix(); } return null; diff --git a/src/test/org/apache/nutch/util/TestURLUtil.java b/src/test/org/apache/nutch/util/TestURLUtil.java index 32dda0929d..092edb9c18 100644 --- a/src/test/org/apache/nutch/util/TestURLUtil.java +++ b/src/test/org/apache/nutch/util/TestURLUtil.java @@ -147,6 +147,8 @@ public void testGetDomainSuffix() throws Exception { assertEquals("2000.hu", URLUtil.getDomainSuffix(url)); // test non-ASCII + url = new URL("https://www.taiuru.māori.nz/"); + assertEquals("xn--mori-qsa.nz", URLUtil.getDomainSuffix(url)); url = new URL("http://www.example.flå.no"); assertEquals("xn--fl-zia.no", URLUtil.getDomainSuffix(url)); url = new URL("http://www.example.栃木.jp");