Skip to content

Commit 5cfc5e9

Browse files
committed
fix: add crawl parameter in the queries, remove useless concatenation
1 parent ce9547c commit 5cfc5e9

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

  • src/main/java/org/commoncrawl/whirlwind

src/main/java/org/commoncrawl/whirlwind/Duck.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,16 @@ public static void run(Algorithm algo, String crawl, String localPrefix)
263263

264264
// Count total records
265265
out.printf("Total records for crawl: %s%n", crawl);
266-
try (ResultSet rs = executeWithRetry(stmt, "SELECT COUNT(*) as cnt FROM ccindex")) {
266+
try (ResultSet rs = executeWithRetry(stmt,
267+
"SELECT COUNT(*) as cnt FROM ccindex " + "WHERE subset = 'warc' AND crawl = '"+crawl+"'")) {
267268
if (rs.next()) {
268269
out.println(rs.getLong("cnt"));
269270
}
270271
}
271272

272273
// Query for our specific row
273-
String selectQuery = "" + "SELECT * FROM ccindex WHERE subset = 'warc' " + "AND crawl = 'CC-MAIN-2024-22' "
274-
+ "AND url_host_tld = 'org' " + "AND url_host_registered_domain = 'wikipedia.org' "
274+
String selectQuery = "SELECT * FROM ccindex WHERE subset = 'warc' AND crawl = '" + crawl + "' "
275+
+ "AND url_host_tld = 'org' AND url_host_registered_domain = 'wikipedia.org' "
275276
+ "AND url = 'https://an.wikipedia.org/wiki/Escopete'";
276277

277278
out.println("Our one row:");

0 commit comments

Comments
 (0)