Skip to content

Commit fbf8147

Browse files
committed
fix(makefile): avoid reimplementing the wheel
1 parent 87c6ca9 commit fbf8147

1 file changed

Lines changed: 8 additions & 16 deletions

File tree

Makefile

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -52,20 +52,12 @@ CC-MAIN-2024-22.warc.paths.gz:
5252
# @echo "warning! this might take 1-10 minutes"
5353
# python duck.py cloudfront
5454
#
55-
ensure_jwarc:
56-
@echo "Ensuring JWarc JAR is present"
57-
@if [ ! -f jwarc.jar ] ; then \
58-
echo "jwarc.jar not found, downloading..." ; \
59-
curl -fL -o jwarc.jar https://github.com/iipc/jwarc/releases/download/v0.33.0/jwarc-0.33.0.jar ; \
60-
else \
61-
echo "jwarc.jar found." ; \
62-
fi
6355

64-
get_jwarc:
56+
jwarc.jar:
6557
@echo "downloading JWarc JAR"
6658
curl -fL -o jwarc.jar https://github.com/iipc/jwarc/releases/download/v0.33.0/jwarc-0.33.0.jar
6759

68-
wreck_the_warc: build ensure_jwarc
60+
wreck_the_warc: build jwarc.jar
6961
@echo
7062
@echo we will break and then fix this warc
7163
cp data/whirlwind.warc.gz data/testing.warc.gz
@@ -76,24 +68,24 @@ wreck_the_warc: build ensure_jwarc
7668
gzip data/testing.warc
7769
@echo
7870
@echo showing the records in the compressed warc - note the offsets of request and response are
79-
java -jar jwarc-0.33.0.jar ls data/testing.warc.gz
71+
java -jar jwarc.jar ls data/testing.warc.gz
8072
@echo
8173
@echo access the request record - failing
82-
java -jar jwarc-0.33.0.jar extract data/testing.warc.gz 3734 || /usr/bin/true
74+
java -jar jwarc.jar extract data/testing.warc.gz 3734 || /usr/bin/true
8375
@echo
8476
@echo access the response record - failing
85-
java -jar jwarc-0.33.0.jar extract data/testing.warc.gz 3734 || /usr/bin/true
77+
java -jar jwarc.jar extract data/testing.warc.gz 3734 || /usr/bin/true
8678
@echo
8779
@echo "now let's do it the right way"
8880
gzip -d data/testing.warc.gz
8981
mvn -q exec:java -Dexec.mainClass=org.commoncrawl.whirlwind.RecompressWARC -Dexec.args="data/testing.warc data/testing.warc.gz"
9082
@echo
9183
@echo showing the records in the compressed warc - note the skewed offsets of request and response
92-
java -jar jwarc-0.33.0.jar ls data/testing.warc.gz
84+
java -jar jwarc.jar ls data/testing.warc.gz
9385
@echo
9486
@echo access the request record - works
95-
java -jar jwarc-0.33.0.jar extract data/testing.warc.gz 518 | head
87+
java -jar jwarc.jar extract data/testing.warc.gz 518 | head
9688
@echo
9789
@echo access the response record - works
98-
java -jar jwarc-0.33.0.jar extract data/testing.warc.gz 1027 | head -n 20
90+
java -jar jwarc.jar extract data/testing.warc.gz 1027 | head -n 20
9991
@echo

0 commit comments

Comments
 (0)