Skip to content

Commit 393e1eb

Browse files
committed
Add extra context to the validation error logging
Also: * ensure it is only printed once * add a couple of extra test cases
1 parent 5dc1bf3 commit 393e1eb

8 files changed

Lines changed: 116 additions & 64 deletions

File tree

src/csv2rdf/csvw.clj

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
(defn annotate-tables [tabular-source metadata-source]
2424
(processing/get-metadata tabular-source metadata-source))
2525

26-
(defn- validate-rows
26+
(defn- rows-are-valid?
2727
"Validates the CSVW schema for the given tabular file, metadata and options.
2828
2929
`tabular-source` and `metadata-source` can be any of the following
@@ -36,20 +36,22 @@
3636
3737
If metadata-source is non-nil then processing will start from the
3838
asscociated metadata document, otherwise it will start from
39-
tabular-source."
39+
tabular-source.
40+
41+
Returns true if validation errors were detected"
4042
[tabular-source metadata-source]
4143
(let [{:keys [tables] :as metadata} (processing/get-metadata tabular-source metadata-source)
4244
table-group-dialect (:dialect metadata)
4345
output-tables (remove properties/suppress-output? tables)
44-
;;ctx (table-group-context mode metadata) ;; TODO this might be useful later when iterating over tables
45-
]
46-
47-
(util/liberal-mapcat (fn [{:keys [url dialect] :as table}]
48-
;;(validated-rows ctx table table-group-dialect)
49-
(let [dialect (or dialect table-group-dialect)]
50-
(csv/annotated-rows url table dialect)))
46+
results (util/liberal-mapcat (fn [{:keys [url dialect] :as table}]
47+
(let [dialect (or dialect table-group-dialect)]
48+
(csv/annotated-rows url table dialect)))
5149

52-
output-tables)))
50+
output-tables)]
51+
(->> results
52+
(mapcat :cells)
53+
(mapcat :errors)
54+
empty?)))
5355

5456
(defn only-validate-schema
5557
"Only validate the data against the schemas in the metadata file, and
@@ -58,14 +60,7 @@
5860
Returns a map with the key `:data-validation-errors?` set to a
5961
boolean indicating whether any schema errors occurred."
6062
[{:keys [tabular-source metadata-source]}]
61-
(let [errors? (atom false)]
62-
(doseq [{:keys [cells] row-number :source-number :as _row} (validate-rows tabular-source metadata-source)
63-
{:keys [errors column-number column] :as _cell} cells
64-
:when (seq errors)]
65-
(reset! errors? true)
66-
(doseq [error errors]
67-
(println (format "Row #%d col #%d (column '%s') has error: " row-number column-number (:name column)) error)))
68-
{:data-validation-errors? @errors?}))
63+
{:data-validation-errors? (rows-are-valid? tabular-source metadata-source)})
6964

7065
(defn csv->rdf
7166
"Runs the CSVW process for the given tabular or metadata data sources

src/csv2rdf/logging.clj

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,11 @@
2929
(warn [_this _msg])
3030
(error [_this _msg]))
3131

32-
(defn memory-logger []
33-
(->MemoryLogger (atom []) (atom [])))
32+
(defn memory-logger
33+
([]
34+
(memory-logger (atom []) (atom [])))
35+
([warnings errors]
36+
(->MemoryLogger warnings errors)))
3437

3538
(def ^:dynamic *logger* (->ForwardingLogger))
3639

src/csv2rdf/tabular/csv.clj

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
(ns csv2rdf.tabular.csv
2-
(:require [clojure.string :as string]
2+
(:require [clojure.string :as str]
3+
[clojure.string :as string]
34
[csv2rdf.metadata.dialect :as dialect]
45
[csv2rdf.tabular.csv.reader :as reader]
56
[csv2rdf.metadata.table :as table]
@@ -29,7 +30,7 @@
2930
(column/from-titles idx titles default-lang))
3031
titles)]
3132
{:comments (mapv :comment comment-rows)
32-
:columns columns}))
33+
:columns columns}))
3334

3435
(defn ^{:tabular-spec "8.10.3"} validate-data-rows
3536
"Validates the data rows in the tabular file and extracts any embedded comments. The row number of any rows
@@ -102,9 +103,13 @@
102103
cell))
103104
parsed-cells)))
104105

105-
(defn parse-row-cells [{:keys [cells] :as row} table {:keys [skipColumns] :as options}]
106+
(defn parse-row-cells
107+
[{:keys [cells source-row-number] :as row}
108+
{:keys [url] :as table}
109+
{:keys [skipColumns]}]
106110
(let [columns (table/columns table)
107-
cell-values (concat (drop skipColumns cells) (repeat "")) ;;extend cells to cover any virtual columns
111+
;;extend cells to cover any virtual columns
112+
cell-values (concat (drop skipColumns cells) (repeat ""))
108113
cell-column-pairs (map vector cell-values columns)
109114
parsed-cells (map-indexed (fn [col-idx [cell column]]
110115
(let [result (cell/parse-cell cell column)
@@ -115,8 +120,16 @@
115120
:column column)))
116121
cell-column-pairs)]
117122
;;log cell errors
118-
(doseq [err (mapcat cell/errors parsed-cells)]
119-
(logging/log-warning err))
123+
(doseq [{:keys [errors column-number column]} parsed-cells
124+
:when (seq errors)]
125+
(doseq [error errors]
126+
(logging/log-warning
127+
(format "Row #%s col #%s (column '%s') in file: %s has error: %s"
128+
source-row-number
129+
column-number
130+
(:name column)
131+
(last (str/split (.toString url) #"/"))
132+
error))))
120133

121134
(assoc row :parsed-cells parsed-cells)))
122135

@@ -130,14 +143,14 @@
130143
(assoc column-value-bindings :_row number :_sourceRow source-row-number)))
131144

132145
(defn get-cell-template-bindings [{:keys [column-number source-column-number column] :as cell}]
133-
{:_name (util/percent-decode (properties/column-name column))
134-
:_column column-number
146+
{:_name (util/percent-decode (properties/column-name column))
147+
:_column column-number
135148
:_sourceColumn source-column-number})
136149

137150
(defn get-cell-urls [bindings table {:keys [column] :as cell}]
138-
(let [property-urls {:aboutUrl (some-> (properties/about-url column) (template-property/resolve-uri-template-property bindings table))
151+
(let [property-urls {:aboutUrl (some-> (properties/about-url column) (template-property/resolve-uri-template-property bindings table))
139152
:propertyUrl (some-> (properties/property-url column) (template-property/resolve-uri-template-property bindings table))
140-
:valueUrl (some-> (properties/value-url column) (template-property/resolve-value-uri-template-property cell column bindings table))}]
153+
:valueUrl (some-> (properties/value-url column) (template-property/resolve-value-uri-template-property cell column bindings table))}]
141154
(util/filter-values some? property-urls)))
142155

143156
(defn annotate-row [{:keys [number source-row-number parsed-cells] :as data-row} table title-column-indexes]
@@ -149,10 +162,10 @@
149162
property-urls (get-cell-urls bindings table cell)]
150163
(merge cell property-urls)))
151164
parsed-cells)]
152-
{:number number
165+
{:number number
153166
:source-number source-row-number
154167
:cells (vec cells)
155-
:titles (get-row-titles title-column-indexes parsed-cells)}))
168+
:titles (get-row-titles title-column-indexes parsed-cells)}))
156169

157170
(defn skip-to-data-rows [rows {:keys [skipRows num-header-rows] :as options}]
158171
(let [row-offset (+ skipRows num-header-rows)]

test/csv2rdf/main_test.clj

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,58 @@
11
(ns csv2rdf.main-test
2-
(:require [csv2rdf.main :as sut]
2+
(:require [csv2rdf.logging :as logging]
3+
[csv2rdf.main :as sut]
34
[clojure.test :as t]))
45

56
;; See issue 47
67
;; Resolving template property URIs with values containing spaces should work
78

8-
(defmacro capture
9-
"Capture return value of body and stdout, and return a hashmap
10-
of :return-value and :stdout."
11-
[body]
12-
`(let [s# (new java.io.StringWriter)]
13-
(binding [*out* s#]
14-
(let [ret# ~body]
15-
{:return-value ret#
16-
:stdout (str s#)}))))
9+
10+
(defn test-validate-data [tabular-file
11+
metadata-file
12+
failures]
13+
(let [is-valid? (empty? failures)
14+
warnings (atom [])
15+
errors (atom [])]
16+
(logging/with-logger
17+
(logging/memory-logger warnings errors)
18+
(t/is (= (sut/inner-main ["-t" tabular-file
19+
"-u" metadata-file
20+
"--validate-data"])
21+
{:data-validation-errors? is-valid?}))
22+
(t/is (= @warnings failures))
23+
(t/is (= @errors [])))))
1724

1825
(t/deftest inner-main-test-validate-data
19-
(t/testing "--validate-data")
20-
(let [{:keys [return-value stdout]}
21-
(capture (sut/inner-main ["-t" "./test/examples/validation/success.csv"
22-
"-u" "./test/examples/validation/named-numbers.json"
23-
"--validate-data"]))]
24-
(t/is (= {:data-validation-errors? false} return-value))
25-
(t/is (= "" stdout)))
26+
(t/testing "--validate-data"
2627

27-
(let [{:keys [return-value stdout]}
28-
(capture (sut/inner-main ["-t" "./test/examples/validation/fail-1.csv"
29-
"-u" "./test/examples/validation/named-numbers.json"
30-
"--validate-data"]))]
31-
(t/is (= {:data-validation-errors? true} return-value))
32-
(t/is (= "Row #3 col #2 (column 'number') has error: Cannot parse 'two' as type 'int': For input string: \"two\"\n"
33-
stdout)))
28+
(test-validate-data
29+
"./test/examples/validation/success.csv"
30+
"./test/examples/validation/named-numbers.json"
31+
[])
32+
(test-validate-data
33+
"./test/examples/validation/fail-1.csv"
34+
"./test/examples/validation/named-numbers.json"
35+
["Row #3 col #2 (column 'number') in file: fail-1.csv has error: Cannot parse 'two' as type 'int': For input string: \"two\""])
36+
(test-validate-data
37+
"./test/examples/validation/fail-2.csv"
38+
"./test/examples/validation/named-numbers.json"
39+
["Row #3 col #2 (column 'number') in file: fail-2.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""])
40+
(test-validate-data
41+
"./test/examples/validation/fail-3.csv"
42+
"./test/examples/validation/named-numbers.json"
43+
["Row #3 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""
44+
"Row #4 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'four' as type 'int': For input string: \"four\""
45+
"Row #5 col #2 (column 'number') in file: fail-3.csv has error: Cannot parse 'five' as type 'int': For input string: \"five\""])
3446

35-
(let [{:keys [return-value stdout]}
36-
(capture (sut/inner-main ["-t" "./test/examples/validation/fail-2.csv"
37-
"-u" "./test/examples/validation/named-numbers.json"
38-
"--validate-data"]))]
39-
(t/is (= {:data-validation-errors? true} return-value))
40-
(t/is (= "Row #3 col #2 (column 'number') has error: Cannot parse 'three' as type 'int': For input string: \"three\"\n"
41-
stdout))))
47+
(test-validate-data
48+
"./test/examples/validation/fail-4.csv"
49+
"./test/examples/validation/named-numbers.json"
50+
["Row #3 col #2 (column 'number') in file: fail-4.csv has error: Column value required"])
51+
(test-validate-data
52+
"./test/examples/validation/success.csv"
53+
"./test/examples/validation/named-numbers-incorrect-schema.json"
54+
["Row #2 col #1 (column 'name') in file: success.csv has error: Cannot parse 'one' as type 'int': For input string: \"one\""
55+
"Row #3 col #1 (column 'name') in file: success.csv has error: Cannot parse 'two' as type 'int': For input string: \"two\""
56+
"Row #4 col #1 (column 'name') in file: success.csv has error: Cannot parse 'three' as type 'int': For input string: \"three\""
57+
"Row #5 col #1 (column 'name') in file: success.csv has error: Cannot parse 'four' as type 'int': For input string: \"four\""
58+
"Row #6 col #1 (column 'name') in file: success.csv has error: Cannot parse 'five' as type 'int': For input string: \"five\""])))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
name,number
2+
one,1
3+
3,three
4+
four,four
5+
five,five
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name,number
2+
one,1
3+
2,
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"@context": "http://www.w3.org/ns/csvw",
3+
"tableSchema": {
4+
"columns": [
5+
{
6+
"name": "name",
7+
"datatype": "int",
8+
"required": true
9+
},
10+
{
11+
"name": "number",
12+
"required": true,
13+
"datatype": "int"
14+
}
15+
]
16+
}
17+
}

test/examples/validation/named-numbers.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
{
22
"@context": "http://www.w3.org/ns/csvw",
3-
"url": "fail-2.csv",
43
"tableSchema": {
54
"columns": [
65
{

0 commit comments

Comments
 (0)