diff --git a/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java b/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java index 45b2ac4b0..5baaeaca1 100644 --- a/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java +++ b/metafacture-csv/src/main/java/org/metafacture/csv/CsvDecoder.java @@ -17,6 +17,7 @@ package org.metafacture.csv; import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; import org.metafacture.framework.StreamReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; @@ -31,7 +32,6 @@ import java.io.IOException; import java.io.StringReader; -import java.util.List; /** * Decodes lines of CSV files. First line may be interpreted as header. @@ -91,50 +91,51 @@ private void initializeCsvParser() { @Override public void process(final String string) { assert !isClosed(); - final String[] parts = parseCsv(string); - if (hasHeader) { - if (header.length == 0) { - header = parts; - } - else if (parts.length == header.length) { - getReceiver().startRecord(String.valueOf(++count)); - for (int i = 0; i < parts.length; ++i) { - getReceiver().literal(header[i], parts[i]); + try ( + StringReader sr = new StringReader(string); + CSVReader reader = new CSVReaderBuilder(sr).withCSVParser(parser).build() + ) { + String[] parts; + while ((parts = parseCsv(reader)) != null) { + if (hasHeader) { + if (header.length == 0) { + header = parts; + } + else if (parts.length == header.length) { + getReceiver().startRecord(String.valueOf(++count)); + for (int i = 0; i < parts.length; ++i) { + getReceiver().literal(header[i], parts[i]); + } + getReceiver().endRecord(); + } + else { + throw new IllegalArgumentException( + String.format( + "wrong number of columns (expected %s, was %s) in input line: %s", + header.length, parts.length, string)); + } + } + else { + getReceiver().startRecord(String.valueOf(++count)); + for (int i = 0; i < parts.length; ++i) { + getReceiver().literal(String.valueOf(i), parts[i]); + } + getReceiver().endRecord(); } - getReceiver().endRecord(); - } - else { - throw new IllegalArgumentException( - String.format( - "wrong number of columns (expected %s, was %s) in input line: %s", - header.length, parts.length, string)); } } - else { - getReceiver().startRecord(String.valueOf(++count)); - for (int i = 0; i < parts.length; ++i) { - getReceiver().literal(String.valueOf(i), parts[i]); - } - getReceiver().endRecord(); + catch (final IOException e) { + throw new MetafactureException(e); } } - private String[] parseCsv(final String csv) { - String[] parts = new String[0]; + private String[] parseCsv(final CSVReader reader) { try { - final CSVReader reader = new CSVReaderBuilder(new StringReader(csv)) - .withCSVParser(parser) - .build(); - final List lines = reader.readAll(); - if (lines.size() > 0) { - parts = lines.get(0); - } - reader.close(); + return reader.readNext(); } catch (final IOException | CsvException e) { - e.printStackTrace(); + throw new MetafactureException(e); } - return parts; } /** diff --git a/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java b/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java index e6402fd0c..241de8fe2 100644 --- a/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java +++ b/metafacture-csv/src/test/java/org/metafacture/csv/CsvDecoderTest.java @@ -16,9 +16,14 @@ package org.metafacture.csv; +import org.metafacture.framework.MetafactureException; import org.metafacture.framework.StreamReceiver; +import com.opencsv.exceptions.CsvMalformedLineException; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.InOrder; @@ -78,6 +83,49 @@ public void testQuoted() { ordered.verify(receiver).endRecord(); } + @Test + public void testInvalidSyntax() { + assertException(MetafactureException.class, CsvMalformedLineException.class, + "Unterminated quoted field at end of CSV line", "a,\"b1,b2,b3,c"); // missing closing " + } + + @Test + public void testInvalidColumns() { + assertException(IllegalArgumentException.class, null, + "wrong number of columns (expected 3, was 2)", "a,b"); // missing third column + } + + @Test + public void testMultilineSingleRow() { + decoder.process("a,\"b1,b2,\nb3\",c"); + final InOrder ordered = Mockito.inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).literal("h1", "a"); + ordered.verify(receiver).literal("h2", "b1,b2,\nb3"); + ordered.verify(receiver).literal("h3", "c"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + Mockito.verifyNoMoreInteractions(receiver); + } + + @Test + public void testMultilineMultipleRows() { + decoder.process("a,\"b1,b2,\nb3\",c\na2,b4,c2"); + final InOrder ordered = Mockito.inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).literal("h1", "a"); + ordered.verify(receiver).literal("h2", "b1,b2,\nb3"); + ordered.verify(receiver).literal("h3", "c"); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).startRecord("2"); + ordered.verify(receiver).literal("h1", "a2"); + ordered.verify(receiver).literal("h2", "b4"); + ordered.verify(receiver).literal("h3", "c2"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + Mockito.verifyNoMoreInteractions(receiver); + } + @Test public void testTabSeparated() { @@ -110,4 +158,24 @@ public void issue496_escaping() { ordered.verify(receiver).endRecord(); } + private void assertException(final Class exceptionClass, final Class expectedClass, final String expectedMessage, final String input) { + final Throwable thrownException = Assert.assertThrows(exceptionClass, () -> decoder.process(input)); + final Throwable actualException; + + final String expectedSuffix; + + if (expectedClass != null) { + actualException = thrownException.getCause(); + Assert.assertEquals(expectedClass, actualException.getClass()); + + expectedSuffix = ". "; + } + else { + actualException = thrownException; + expectedSuffix = " in "; + } + + MatcherAssert.assertThat(actualException.getMessage(), CoreMatchers.startsWith(expectedMessage + expectedSuffix)); + } + }