@@ -40,6 +40,97 @@ is_suffix_char(char ch)
4040 return (ch >= ' a' && ch <= ' z' ) || (ch >= ' A' && ch <= ' Z' ) || ch == ' %' ;
4141}
4242
43+ std::optional<char >
44+ separated_string::detect_separator (const string_fragment& str)
45+ {
46+ struct sep_state {
47+ char ss_char;
48+ size_t ss_count{0 };
49+ };
50+
51+ size_t comma = 0 ;
52+ size_t tab = 0 ;
53+ size_t semi = 0 ;
54+ size_t vbar = 0 ;
55+ size_t space = 0 ;
56+
57+ auto in_quote = false ;
58+ auto has_leading_spaces = false ;
59+
60+ auto cur = str.cursor ();
61+ while (cur.lookahead () == ' ' ) {
62+ (void ) cur.next ();
63+ has_leading_spaces = true ;
64+ }
65+ while (true ) {
66+ auto ch = cur.next ();
67+ if (!ch) {
68+ break ;
69+ }
70+
71+ auto behind = cur.lookbehind ();
72+ auto ahead = cur.lookahead ();
73+ if (in_quote) {
74+ if (ch == ' "' ) {
75+ in_quote = false ;
76+ }
77+ } else if (ch == ' "' ) {
78+ in_quote = true ;
79+ } else if (ch == ' \t ' ) {
80+ if (behind && behind != ' \t ' ) {
81+ tab += 1 ;
82+ }
83+ } else if (ch == ' ,' ) {
84+ if (behind && ahead && behind != ' ' && ahead != ' ' ) {
85+ comma += 1 ;
86+ }
87+ } else if (ch == ' ;' ) {
88+ if (behind && ahead && behind != ' ' && ahead != ' ' ) {
89+ semi += 1 ;
90+ }
91+ } else if (ch == ' |' ) {
92+ if (behind && ahead && behind != ' ' && ahead != ' ' ) {
93+ vbar += 1 ;
94+ }
95+ } else if (ch == ' ' ) {
96+ if (behind && ahead && behind != ' ' && ahead == ' ' ) {
97+ space += 1 ;
98+ }
99+ }
100+ }
101+
102+ if (has_leading_spaces) {
103+ if (space > 0 ) {
104+ return ' ' ;
105+ }
106+ return std::nullopt ;
107+ }
108+
109+ if (in_quote) {
110+ return std::nullopt ;
111+ }
112+
113+ std::array<sep_state, 5 > states = {{
114+ {' ,' , comma},
115+ {' \t ' , tab},
116+ {' ;' , semi},
117+ {' |' , vbar},
118+ {' ' , space},
119+ }};
120+
121+ std::sort (states.begin (),
122+ states.end (),
123+ [](const sep_state& a, const sep_state& b) {
124+ return a.ss_count > b.ss_count ;
125+ });
126+
127+ if (states[0 ].ss_count == 0 || states[0 ].ss_count == states[1 ].ss_count ) {
128+ return std::nullopt ;
129+ }
130+
131+ return states[0 ].ss_char ;
132+ }
133+
43134std::string
44135separated_string::unescape_quoted (string_fragment sf)
45136{
@@ -108,7 +199,22 @@ separated_string::iterator::update()
108199 const char * p = this ->i_pos ;
109200 while (p < data_end) {
110201 if (!in_quotes && *p == sep_ch) {
111- break ;
202+ if (sep_ch == ' ' && p + 1 < data_end) {
203+ if ((!this ->i_parent .ss_expected_count
204+ || this ->i_index + 1
205+ < this ->i_parent .ss_expected_count .value ())
206+ && p + 1 < data_end && *(p + 1 ) == ' ' )
207+ {
208+ while (p + 1 < data_end && *(p + 1 ) == ' ' ) {
209+ p += 1 ;
210+ }
211+ break ;
212+ }
213+ state = TRAIL_WS ;
214+ p += 1 ;
215+ } else {
216+ break ;
217+ }
112218 }
113219 const char c = *p;
114220
@@ -206,7 +312,8 @@ separated_string::iterator::update()
206312 // end of input, convention says one more empty cell should be
207313 // emitted. Defer it to the next update() call via
208314 // i_pending_ghost so the user still sees the current cell first.
209- if (p < data_end && p + 1 == data_end) {
315+ if (p < data_end && p + 1 == data_end && this ->i_parent .ss_separator != ' ' )
316+ {
210317 this ->i_pending_ghost = true ;
211318 }
212319 this ->i_next_pos = (p < data_end) ? p + 1 : data_end;
0 commit comments