2424public abstract class CsvSpecs {
2525 public interface Builder {
2626 /**
27- * Copy all of the parameters from {@code specs} into {@code this} builder.
27+ * Copy all the parameters from {@code specs} into {@code this} builder.
2828 */
2929 Builder from (CsvSpecs specs );
3030
@@ -117,6 +117,34 @@ public interface Builder {
117117 */
118118 Builder headerValidator (Predicate <String > headerValidator );
119119
120+ /**
121+ * True if the input is organized into fixed width columns rather than delimited by a delimiter.
122+ */
123+ Builder hasFixedWidthColumns (boolean hasFixedWidthColumns );
124+
125+ /**
126+ * When {@link #hasFixedWidthColumns} is set, the library either determines the column widths from the header
127+ * row (provided {@link #hasHeaderRow} is set), or the column widths can be specified explicitly by the caller.
128+ * If the caller wants to specify them explicitly, they can use this method. It is an error to set this
129+ * parameter if {@link #hasFixedWidthColumns} is false. Note that because the library is tolerant of the last
130+ * cell being shorter or wider than expected, the value specified here for the width of the last column is
131+ * simply a placeholder; its value is ignored.
132+ */
133+ Builder fixedColumnWidths (Iterable <Integer > fixedColumnWidths );
134+
135+ /**
136+ * This setting controls what units fixed width columns are measured in. When true, fixed width columns are
137+ * measured in Unicode code points. When false, fixed width columns are measured in UTF-16 units (aka Java
138+ * chars). The difference arises when encountering characters outside the Unicode Basic Multilingual Plane. For
139+ * example, the Unicode code point 💔 (U+1F494) is one Unicode code point, but takes two Java chars to
140+ * represent. Along these lines, the string 💔💔💔 would fit in a column of width 3 when utf32CountingMode is
141+ * true, but would require a column width of at least 6 when utf32CountingMode is false. The default setting of
142+ * true is arguably more natural for users (the number of characters they see matches the visual width of the
143+ * column). But some programs may want the value of false because they are counting Java chars. It is an error
144+ * to set this parameter if {@link #hasFixedWidthColumns} is false.
145+ */
146+ Builder useUtf32CountingConvention (boolean useUtf32CountingConvention );
147+
120148 /**
121149 * Number of data rows to skip before processing data. This is useful when you want to parse data in chunks.
122150 * Typically used together with {@link Builder#numRows}. Defaults to 0.
@@ -160,7 +188,7 @@ public interface Builder {
160188
161189 /**
162190 * The field delimiter character (the character that separates one column from the next). Must be 7-bit ASCII.
163- * Defaults to {code ','}.
191+ * Defaults to {code ','}. It is an error to set this parameter if {@link #hasFixedWidthColumns} is true.
164192 */
165193 Builder delimiter (char delimiter );
166194
@@ -179,6 +207,8 @@ public interface Builder {
179207 * <li>hello, there
180208 * <li>456
181209 * </ul>
210+ *
211+ * It is an error to set this parameter if {@link #hasFixedWidthColumns} is true.
182212 */
183213 Builder quote (char quote );
184214
@@ -188,7 +218,8 @@ public interface Builder {
188218 Builder ignoreSurroundingSpaces (boolean ignoreSurroundingSpaces );
189219
190220 /**
191- * Whether to trim leading and trailing blanks from inside quoted values. Defaults to {@code false}.
221+ * Whether to trim leading and trailing blanks from inside quoted values. Defaults to {@code false}. It is an
222+ * error to set this parameter if {@link #hasFixedWidthColumns} is true.
192223 */
193224 Builder trim (boolean trim );
194225
@@ -224,6 +255,38 @@ void check() {
224255 if (!hasHeaderRow () && skipHeaderRows () > 0 ) {
225256 problems .add ("skipHeaderRows != 0 but hasHeaderRow is not set" );
226257 }
258+
259+ for (final Integer colWidth : fixedColumnWidths ()) {
260+ if (colWidth < 1 ) {
261+ problems .add (String .format ("Fixed column width %d is invalid" , colWidth ));
262+ }
263+ }
264+
265+ // Certain items must not be set in fixed-width column mode. Other items must not be set in delimited column
266+ // mode.
267+ if (hasFixedWidthColumns ()) {
268+ final String format = "Incompatible parameters: can't set %s when hasFixedWidthColumns is true" ;
269+ if (quote () != defaultQuote ) {
270+ problems .add (String .format (format , "quote" ));
271+ }
272+
273+ if (delimiter () != defaultDelimiter ) {
274+ problems .add (String .format (format , "delimiter" ));
275+ }
276+
277+ if (trim () != defaultTrim ) {
278+ problems .add (String .format (format , "trim" ));
279+ }
280+ } else {
281+ final String format = "Incompatible parameters: can't set %s when hasFixedWidthColumns is false" ;
282+ if (fixedColumnWidths ().size () != 0 ) {
283+ problems .add (String .format (format , "fixedColumnWidths" ));
284+ }
285+
286+ if (useUtf32CountingConvention () != defaultUtf32CountingConvention ) {
287+ problems .add (String .format (format , "useUtf32CountingConvention" ));
288+ }
289+ }
227290 if (problems .isEmpty ()) {
228291 return ;
229292 }
@@ -340,6 +403,32 @@ public Predicate<String> headerValidator() {
340403 return c -> true ;
341404 }
342405
406+ /**
407+ * See {@link Builder#hasFixedWidthColumns}.
408+ */
409+ @ Default
410+ public boolean hasFixedWidthColumns () {
411+ return false ;
412+ }
413+
414+ /**
415+ * See {@link Builder#fixedColumnWidths}.
416+ */
417+ @ Default
418+ public List <Integer > fixedColumnWidths () {
419+ return Collections .emptyList ();
420+ }
421+
422+ private static final boolean defaultUtf32CountingConvention = true ;
423+
424+ /**
425+ * See {@link Builder#useUtf32CountingConvention}.
426+ */
427+ @ Default
428+ public boolean useUtf32CountingConvention () {
429+ return defaultUtf32CountingConvention ;
430+ }
431+
343432 /**
344433 * See {@link Builder#skipRows}.
345434 */
@@ -396,20 +485,25 @@ public long skipHeaderRows() {
396485 return 0 ;
397486 }
398487
488+ private final char defaultDelimiter = ',' ;
489+
399490 /**
400491 * See {@link Builder#delimiter}.
401492 */
402493 @ Default
403494 public char delimiter () {
404- return ',' ;
495+ return defaultDelimiter ;
405496 }
406497
498+
499+ private static final char defaultQuote = '"' ;
500+
407501 /**
408502 * See {@link Builder#quote}.
409503 */
410504 @ Default
411505 public char quote () {
412- return '"' ;
506+ return defaultQuote ;
413507 }
414508
415509 /**
@@ -420,12 +514,14 @@ public boolean ignoreSurroundingSpaces() {
420514 return true ;
421515 }
422516
517+ private static boolean defaultTrim = false ;
518+
423519 /**
424520 * See {@link Builder#trim}.
425521 */
426522 @ Default
427523 public boolean trim () {
428- return false ;
524+ return defaultTrim ;
429525 }
430526
431527 /**
0 commit comments