Skip to content

Commit ca0ca5d

Browse files
committed
🍒 pick 47c7218: 🐛 Validate RawData and wait to continue literals [backports #660]
This parses a RawData string into an array of `text`, `literal`, and `literal8` parts. This fixes embedded literals so they correctly wait for server continuation request before sending. Non-synchronizing literals are also parsed correctly. This adds `Net::IMAP::RawText` which sends verbatim (like `RawData` did previously), and handles `text` validations: * `text` can't contain CR, LF, or NULL * `text` must be ASCII compatible or valid UTF-8 The existing `Literal` and `Literal8` classes handle literal validation: * `literal` can't contain NULL byte, but `literal8` can Additionally, `RawData` validates that: * embedded literal bytesize must be <= remaining string bytesize * final `text` cannot end with `{number}` (in case a `CRLF` comes after) This does _not_ make RawData arguments safe from every type of injection attack. However, without losing any significant flexibility, this _does_ prevent unescaped `CRLF` from creating a _command_ injection.
1 parent 3116c7d commit ca0ca5d

3 files changed

Lines changed: 312 additions & 2 deletions

File tree

lib/net/imap/command_data.rb

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,85 @@ def validate
148148
end
149149
end
150150

151+
# Represents IMAP +text+ data, which may contain any 7-bit ASCII character,
152+
# except for +NULL+, +CR+, or +LF+. +text+ is extended to allow any
153+
# multibyte +UTF-8+ character when either +UTF8=ACCEPT+ or +IMAP4rev2+ have
154+
# been enabled, or when the server supports only +IMAP4rev2+ and not earlier
155+
# IMAP revisions, or when the server advertises +UTF8=ONLY+.
156+
#
157+
# NOTE: The current implementation does not validate whether the connection
158+
# currently supports UTF-8. Future versions may change.
159+
#
160+
# The string's bytes must be valid ASCII or valid UTF-8. The string's
161+
# reported encoding is ignored, but the string is _not_ transcoded.
162+
class RawText < CommandData # :nodoc:
163+
def initialize(data:)
164+
data = String(data.to_str)
165+
data = if data.encoding in Encoding::ASCII | Encoding::UTF_8
166+
-data
167+
elsif data.ascii_only?
168+
-(data.dup.force_encoding("ASCII"))
169+
else
170+
-(data.dup.force_encoding("UTF-8"))
171+
end
172+
super
173+
validate
174+
end
175+
176+
def validate
177+
if data.include?("\0")
178+
raise DataFormatError, "NULL byte must be binary literal encoded"
179+
elsif !data.valid_encoding?
180+
raise DataFormatError, "invalid UTF-8 must be literal encoded"
181+
elsif /[\r\n]/.match?(data)
182+
raise DataFormatError, "CR and LF bytes must be literal encoded"
183+
end
184+
end
185+
186+
def ascii_only? = data.ascii_only?
187+
188+
def send_data(imap, tag) = imap.__send__(:put_string, data)
189+
end
190+
151191
class RawData < CommandData # :nodoc:
152-
def send_data(imap, tag)
153-
imap.__send__(:put_string, data)
192+
def initialize(data:)
193+
data = split_parts(data)
194+
super
195+
validate
196+
end
197+
198+
def send_data(imap, tag) = data.each do _1.send_data(imap, tag) end
199+
200+
def validate
201+
return unless data.last in RawText(data: text)
202+
if text.rindex(/~?\{[1-9]\d*\+?\}\z/n)
203+
raise DataFormatError, "RawData cannot end with literal continuation"
204+
end
205+
end
206+
207+
private
208+
209+
def split_parts(data)
210+
data = data.b # dups and ensures BINARY encoding
211+
parts = []
212+
while data.match(/(~)?\{(0|[1-9]\d*)(\+)?\}\r\n/n)
213+
text, binary, bytesize, non_sync, data = $`, !!$1, $2, !!$3, $'
214+
bytesize = Integer bytesize, 10
215+
parts << RawText[text] unless text.empty?
216+
parts << extract_literal(data, binary:, bytesize:, non_sync:)
217+
data[0, bytesize] = ""
218+
end
219+
parts << RawText[data] unless data.empty?
220+
parts
221+
end
222+
223+
def extract_literal(data, binary:, bytesize:, non_sync:)
224+
if data.bytesize < bytesize
225+
raise DataFormatError, "Too few bytes in string for literal, " \
226+
"expected: %s, remaining: %s" % [bytesize, data.bytesize]
227+
end
228+
literal = data.byteslice(0, bytesize)
229+
(binary ? Literal8 : Literal).new(data: literal, non_sync:)
154230
end
155231
end
156232

test/net/imap/test_command_data.rb

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ class CommandDataTest < Net::IMAP::TestCase
1010
Flag = Net::IMAP::Flag
1111
Literal = Net::IMAP::Literal
1212
Literal8 = Net::IMAP::Literal8
13+
RawText = Net::IMAP::RawText
14+
RawData = Net::IMAP::RawData
1315

1416
Output = Net::IMAP::Data.define(:name, :args, :kwargs)
1517
TAG = Module.new.freeze
@@ -151,4 +153,207 @@ def send_data(*data, tag: TAG)
151153
], imap.output
152154
end
153155

156+
class RawTextTest < CommandDataTest
157+
test "basic ASCII string" do
158+
imap.send_data RawText.new('foo "bar" (baz)')
159+
assert_equal [Output.put_string('foo "bar" (baz)')], imap.output
160+
end
161+
162+
test "allows IMAP atom-special symbols" do
163+
imap.send_data RawText.new('foo "bar" (baz)')
164+
imap.send_data RawText.new("(){}[]%*\"\\")
165+
imap.send_data RawText.new("(((((((((((((((( unbalanced ]]]]]]]]]]]]]")
166+
assert_equal [
167+
Output.put_string('foo "bar" (baz)'),
168+
Output.put_string("(){}[]%*\"\\"),
169+
Output.put_string("(((((((((((((((( unbalanced ]]]]]]]]]]]]]"),
170+
], imap.output
171+
end
172+
173+
test "ASCII compatible string with another encodings" do
174+
imap.send_data RawText.new("foo bar".encode("cp1252"))
175+
assert_equal [
176+
Output.put_string("foo bar"),
177+
], imap.output
178+
end
179+
180+
test "allows ASCII control chars" do
181+
text = RawText.new("beep\b beep\b escape!\e delete this:\x1f")
182+
imap.send_data text
183+
assert_equal [
184+
Output.put_string("beep\b beep\b escape!\e delete this:\x1f"),
185+
], imap.output
186+
end
187+
188+
data(
189+
"NULL" => ["with \0 NULL", /NULL\b.+\bbyte/i],
190+
"CR" => ["with \r CR", /CR\b.+\bbyte/i],
191+
"LF" => ["with \n LF", /LF\b.+\bbyte/i],
192+
)
193+
test "invalid ASCII byte" do |(text, error_message)|
194+
try_multiple_encodings(error_message, text)
195+
end
196+
197+
# See Table 3-7, Well-Formed UTF-8 Byte Sequences, in The Unicode Standard:
198+
# https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G27506
199+
data(
200+
"incomplete 2 byte sequence" => "\xc3".b,
201+
"invalid 2 byte sequence" => "\xc3\x7f".b,
202+
"incomplete 3 byte sequence" => "\xe0\x80\x80".b,
203+
"invalid 3 byte sequence" => "\xe0\x80\x80".b,
204+
"incomplete 4 byte sequence" => "\xf1\x80\x80".b,
205+
"invalid 4 byte sequence" => "\xf0\x80\x80\x80".b,
206+
"first byte too high" => "\xff\xaa\xaa\xaa".b,
207+
"UTF-16 surrogate pair" => "\xFE\xFF\xD8\x3D\xDC\xA3\xFE\x0F".b,
208+
"windows-1252" => "åêïõü".encode("windows-1252"),
209+
)
210+
test "invalid UTF-8" do |text|
211+
try_multiple_encodings(/invalid UTF-8/i, text)
212+
end
213+
214+
def with_multiple_encodings(data)
215+
yield data.b # BINARY
216+
yield data.dup.force_encoding("ASCII")
217+
yield data.dup.force_encoding("UTF-8")
218+
yield data.dup.force_encoding("cp1252")
219+
end
220+
221+
def try_multiple_encodings(error_message, data)
222+
with_multiple_encodings(data) do |encoded|
223+
assert_raise_with_message(DataFormatError, error_message) do
224+
RawText[encoded]
225+
end
226+
end
227+
end
228+
end
229+
230+
class RawDataTest < CommandDataTest
231+
test "simple raw text" do
232+
raw = RawData.new('foo "bar" baz')
233+
assert_equal [RawText['foo "bar" baz']], raw.data
234+
imap.send_data raw
235+
assert_equal [Output.put_string('foo "bar" baz')], imap.output
236+
end
237+
238+
test "a single literal" do
239+
raw = RawData.new("{7}\r\nfoo bar")
240+
assert_equal [Literal["foo bar", false]], raw.data
241+
imap.send_data raw, tag: "t1"
242+
assert_equal [
243+
Output.send_literal("foo bar", "t1", non_sync: false),
244+
], imap.output
245+
end
246+
247+
test "literals embedded between text" do
248+
raw = RawData.new("foo bar {3}\r\nbaz {4+}\r\nquux etc")
249+
assert_equal [
250+
RawText["foo bar "],
251+
Literal["baz", false],
252+
RawText[" "],
253+
Literal["quux", true], # non-synchronizing
254+
RawText[" etc"],
255+
], raw.data
256+
imap.send_data raw, tag: "t2"
257+
assert_equal [
258+
Output.put_string("foo bar "),
259+
Output.send_literal("baz", "t2", non_sync: false),
260+
Output.put_string(" "),
261+
Output.send_literal("quux", "t2", non_sync: true),
262+
Output.put_string(" etc"),
263+
], imap.output
264+
end
265+
266+
test "empty literals" do
267+
raw = RawData.new("{0}\r\n{0+}\r\n~{0}\r\n~{0+}\r\n")
268+
assert_equal [
269+
Literal["", false],
270+
Literal["", true],
271+
Literal8["", false],
272+
Literal8["", true],
273+
], raw.data
274+
imap.send_data raw, tag: "t2.2"
275+
assert_equal [
276+
Output.send_literal("", "t2.2", non_sync: false),
277+
Output.send_literal("", "t2.2", non_sync: true),
278+
Output.send_binary_literal("", "t2.2", non_sync: false),
279+
Output.send_binary_literal("", "t2.2", non_sync: true),
280+
], imap.output
281+
end
282+
283+
test "raw text embedded between literals" do
284+
raw = RawData.new("{3}\r\nfoo bar")
285+
assert_equal [
286+
Literal["foo", false],
287+
RawText[" bar"]
288+
], raw.data
289+
imap.send_data raw, tag: "t3"
290+
assert_equal [
291+
Output.send_literal("foo", "t3", non_sync: false),
292+
Output.put_string(" bar"),
293+
], imap.output
294+
end
295+
296+
test "raw text followed by literal" do
297+
raw = RawData.new("foo {3}\r\nbar")
298+
assert_equal [
299+
RawText["foo "],
300+
Literal["bar", false],
301+
], raw.data
302+
imap.send_data raw, tag: "t4"
303+
assert_equal [
304+
Output.put_string("foo "),
305+
Output.send_literal("bar", "t4", non_sync: false),
306+
], imap.output
307+
imap.clear
308+
end
309+
310+
test "binary literal with regular literal" do
311+
raw = RawData.new("foo ~{7}\r\n\0bar\r\nbaz {4}\r\nquux")
312+
assert_equal [
313+
RawText["foo "],
314+
Literal8["\0bar\r\nb", false],
315+
RawText["az "],
316+
Literal["quux", false],
317+
], raw.data
318+
imap.send_data raw, tag: "t5"
319+
assert_equal [
320+
Output.put_string("foo "),
321+
Output.send_binary_literal("\0bar\r\nb", "t5", non_sync: false),
322+
Output.put_string("az "),
323+
Output.send_literal("quux", "t5", non_sync: false),
324+
], imap.output
325+
end
326+
327+
data(
328+
"CR" => "with \r byte",
329+
"LF" => "with \n byte",
330+
"NULL" => "with \0 byte",
331+
"CRLF" => "with \r\n bytes",
332+
)
333+
test "invalid bytes in raw text" do |data|
334+
assert_raise_with_message(DataFormatError, /must be.* literal encoded/i) do
335+
RawData.new(data:)
336+
end
337+
end
338+
339+
test "invalid literal" do |data|
340+
assert_raise_with_message(DataFormatError, /too few bytes/i) do
341+
RawData.new(data: "invalid literal {123}\r\ntoo small")
342+
end
343+
344+
assert_raise_with_message(DataFormatError, /NULL byte.*in.*literal/i) do
345+
RawData.new(data: "invalid literal {10}\r\ncontains \0 null")
346+
end
347+
end
348+
349+
test "invalid literal ending ('{123}')" do
350+
assert_raise(DataFormatError) do RawData.new(data: "literal {123}") end
351+
assert_raise(DataFormatError) do RawData.new(data: "literal+ {123+}") end
352+
assert_raise(DataFormatError) do RawData.new(data: "~literal ~{123}") end
353+
assert_raise(DataFormatError) do RawData.new(data: "~literal+ ~{123+}") end
354+
raw = RawData.new(data: " {123} ")
355+
assert_equal [RawText[" {123} "]], raw.data
356+
end
357+
end
358+
154359
end

test/net/imap/test_imap.rb

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,35 @@ def test_send_symbol_as_flag
678678
end
679679
end
680680

681+
def test_raw_data
682+
with_fake_server do |server, imap|
683+
server.on "TEST", &:done_ok
684+
685+
imap.__send__(:send_command, "TEST", Net::IMAP::RawData.new("foo bar"))
686+
assert_equal "foo bar", server.commands.pop.args
687+
688+
imap.__send__(:send_command, "TEST",
689+
Net::IMAP::RawData.new("{3}\r\nfoo"),
690+
Net::IMAP::RawData.new("~{4}\r\n\0bar"))
691+
assert_equal "{3}\r\nfoo ~{4}\r\n\0bar", server.commands.pop.args
692+
693+
# RawData must pass basic validation before sending command
694+
[
695+
"with \0 NULL",
696+
"with \r CR",
697+
"with \n LF",
698+
"with \r\n CRLF",
699+
"{1234}\r\nliteral is too small",
700+
"{1}\r\n\0 literal contains NULL",
701+
].each do |data|
702+
assert_raise(Net::IMAP::DataFormatError) do
703+
imap.__send__(:send_command, "TEST", Net::IMAP::RawData[data:])
704+
end
705+
assert_empty server.commands
706+
end
707+
end
708+
end
709+
681710
test("send PartialRange args") do
682711
with_fake_server do |server, imap|
683712
server.on "TEST", &:done_ok

0 commit comments

Comments
 (0)