Skip to content

Commit 47c7218

Browse files
committed
🐛 Validate RawData and wait to continue literals
This parses a RawData string into an array of `text`, `literal`, and `literal8` parts. This fixes embedded literals so they correctly wait for server continuation request before sending. Non-synchronizing literals are also parsed correctly. This adds `Net::IMAP::RawText` which sends verbatim (like `RawData` did previously), and handles `text` validations: * `text` can't contain CR, LF, or NULL * `text` must be ASCII compatible or valid UTF-8 The existing `Literal` and `Literal8` classes handle literal validation: * `literal` can't contain NULL byte, but `literal8` can Additionally, `RawData` validates that: * embedded literal bytesize must be <= remaining string bytesize * final `text` cannot end with `{number}` (in case a `CRLF` comes after) This does _not_ make RawData arguments safe from every type of injection attack. However, without losing any significant flexibility, this _does_ prevent unescaped `CRLF` from creating a _command_ injection.
1 parent 0ec4fd3 commit 47c7218

3 files changed

Lines changed: 312 additions & 2 deletions

File tree

lib/net/imap/command_data.rb

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,85 @@ def validate
154154
end
155155
end
156156

157+
# Represents IMAP +text+ data, which may contain any 7-bit ASCII character,
158+
# except for +NULL+, +CR+, or +LF+. +text+ is extended to allow any
159+
# multibyte +UTF-8+ character when either +UTF8=ACCEPT+ or +IMAP4rev2+ have
160+
# been enabled, or when the server supports only +IMAP4rev2+ and not earlier
161+
# IMAP revisions, or when the server advertises +UTF8=ONLY+.
162+
#
163+
# NOTE: The current implementation does not validate whether the connection
164+
# currently supports UTF-8. Future versions may change.
165+
#
166+
# The string's bytes must be valid ASCII or valid UTF-8. The string's
167+
# reported encoding is ignored, but the string is _not_ transcoded.
168+
class RawText < CommandData # :nodoc:
169+
def initialize(data:)
170+
data = String(data.to_str)
171+
data = if data.encoding in Encoding::ASCII | Encoding::UTF_8
172+
-data
173+
elsif data.ascii_only?
174+
-(data.dup.force_encoding("ASCII"))
175+
else
176+
-(data.dup.force_encoding("UTF-8"))
177+
end
178+
super
179+
validate
180+
end
181+
182+
def validate
183+
if data.include?("\0")
184+
raise DataFormatError, "NULL byte must be binary literal encoded"
185+
elsif !data.valid_encoding?
186+
raise DataFormatError, "invalid UTF-8 must be literal encoded"
187+
elsif /[\r\n]/.match?(data)
188+
raise DataFormatError, "CR and LF bytes must be literal encoded"
189+
end
190+
end
191+
192+
def ascii_only? = data.ascii_only?
193+
194+
def send_data(imap, tag) = imap.__send__(:put_string, data)
195+
end
196+
157197
class RawData < CommandData # :nodoc:
158-
def send_data(imap, tag)
159-
imap.__send__(:put_string, data)
198+
def initialize(data:)
199+
data = split_parts(data)
200+
super
201+
validate
202+
end
203+
204+
def send_data(imap, tag) = data.each do _1.send_data(imap, tag) end
205+
206+
def validate
207+
return unless data.last in RawText(data: text)
208+
if text.rindex(/~?\{[1-9]\d*\+?\}\z/n)
209+
raise DataFormatError, "RawData cannot end with literal continuation"
210+
end
211+
end
212+
213+
private
214+
215+
def split_parts(data)
216+
data = data.b # dups and ensures BINARY encoding
217+
parts = []
218+
while data.match(/(~)?\{(0|[1-9]\d*)(\+)?\}\r\n/n)
219+
text, binary, bytesize, non_sync, data = $`, !!$1, $2, !!$3, $'
220+
bytesize = NumValidator.coerce_number64 bytesize
221+
parts << RawText[text] unless text.empty?
222+
parts << extract_literal(data, binary:, bytesize:, non_sync:)
223+
data.bytesplice(0, bytesize, "")
224+
end
225+
parts << RawText[data] unless data.empty?
226+
parts
227+
end
228+
229+
def extract_literal(data, binary:, bytesize:, non_sync:)
230+
if data.bytesize < bytesize
231+
raise DataFormatError, "Too few bytes in string for literal, " \
232+
"expected: %s, remaining: %s" % [bytesize, data.bytesize]
233+
end
234+
literal = data.byteslice(0, bytesize)
235+
(binary ? Literal8 : Literal).new(data: literal, non_sync:)
160236
end
161237
end
162238

test/net/imap/test_command_data.rb

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ class CommandDataTest < Net::IMAP::TestCase
1010
Flag = Net::IMAP::Flag
1111
Literal = Net::IMAP::Literal
1212
Literal8 = Net::IMAP::Literal8
13+
RawText = Net::IMAP::RawText
14+
RawData = Net::IMAP::RawData
1315

1416
Output = Data.define(:name, :args, :kwargs)
1517
TAG = Module.new.freeze
@@ -162,4 +164,207 @@ class StringFormatterTest < Net::IMAP::TestCase
162164
end
163165
end
164166

167+
class RawTextTest < CommandDataTest
168+
test "basic ASCII string" do
169+
imap.send_data RawText.new('foo "bar" (baz)')
170+
assert_equal [Output.put_string('foo "bar" (baz)')], imap.output
171+
end
172+
173+
test "allows IMAP atom-special symbols" do
174+
imap.send_data RawText.new('foo "bar" (baz)')
175+
imap.send_data RawText.new("(){}[]%*\"\\")
176+
imap.send_data RawText.new("(((((((((((((((( unbalanced ]]]]]]]]]]]]]")
177+
assert_equal [
178+
Output.put_string('foo "bar" (baz)'),
179+
Output.put_string("(){}[]%*\"\\"),
180+
Output.put_string("(((((((((((((((( unbalanced ]]]]]]]]]]]]]"),
181+
], imap.output
182+
end
183+
184+
test "ASCII compatible string with another encodings" do
185+
imap.send_data RawText.new("foo bar".encode("cp1252"))
186+
assert_equal [
187+
Output.put_string("foo bar"),
188+
], imap.output
189+
end
190+
191+
test "allows ASCII control chars" do
192+
text = RawText.new("beep\b beep\b escape!\e delete this:\x1f")
193+
imap.send_data text
194+
assert_equal [
195+
Output.put_string("beep\b beep\b escape!\e delete this:\x1f"),
196+
], imap.output
197+
end
198+
199+
data(
200+
"NULL" => ["with \0 NULL", /NULL\b.+\bbyte/i],
201+
"CR" => ["with \r CR", /CR\b.+\bbyte/i],
202+
"LF" => ["with \n LF", /LF\b.+\bbyte/i],
203+
)
204+
test "invalid ASCII byte" do |(text, error_message)|
205+
try_multiple_encodings(error_message, text)
206+
end
207+
208+
# See Table 3-7, Well-Formed UTF-8 Byte Sequences, in The Unicode Standard:
209+
# https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-3/#G27506
210+
data(
211+
"incomplete 2 byte sequence" => "\xc3".b,
212+
"invalid 2 byte sequence" => "\xc3\x7f".b,
213+
"incomplete 3 byte sequence" => "\xe0\x80\x80".b,
214+
"invalid 3 byte sequence" => "\xe0\x80\x80".b,
215+
"incomplete 4 byte sequence" => "\xf1\x80\x80".b,
216+
"invalid 4 byte sequence" => "\xf0\x80\x80\x80".b,
217+
"first byte too high" => "\xff\xaa\xaa\xaa".b,
218+
"UTF-16 surrogate pair" => "\xFE\xFF\xD8\x3D\xDC\xA3\xFE\x0F".b,
219+
"windows-1252" => "åêïõü".encode("windows-1252"),
220+
)
221+
test "invalid UTF-8" do |text|
222+
try_multiple_encodings(/invalid UTF-8/i, text)
223+
end
224+
225+
def with_multiple_encodings(data)
226+
yield data.b # BINARY
227+
yield data.dup.force_encoding("ASCII")
228+
yield data.dup.force_encoding("UTF-8")
229+
yield data.dup.force_encoding("cp1252")
230+
end
231+
232+
def try_multiple_encodings(error_message, data)
233+
with_multiple_encodings(data) do |encoded|
234+
assert_raise_with_message(DataFormatError, error_message) do
235+
RawText[encoded]
236+
end
237+
end
238+
end
239+
end
240+
241+
class RawDataTest < CommandDataTest
242+
test "simple raw text" do
243+
raw = RawData.new('foo "bar" baz')
244+
assert_equal [RawText['foo "bar" baz']], raw.data
245+
imap.send_data raw
246+
assert_equal [Output.put_string('foo "bar" baz')], imap.output
247+
end
248+
249+
test "a single literal" do
250+
raw = RawData.new("{7}\r\nfoo bar")
251+
assert_equal [Literal["foo bar", false]], raw.data
252+
imap.send_data raw, tag: "t1"
253+
assert_equal [
254+
Output.send_literal("foo bar", "t1", non_sync: false),
255+
], imap.output
256+
end
257+
258+
test "literals embedded between text" do
259+
raw = RawData.new("foo bar {3}\r\nbaz {4+}\r\nquux etc")
260+
assert_equal [
261+
RawText["foo bar "],
262+
Literal["baz", false],
263+
RawText[" "],
264+
Literal["quux", true], # non-synchronizing
265+
RawText[" etc"],
266+
], raw.data
267+
imap.send_data raw, tag: "t2"
268+
assert_equal [
269+
Output.put_string("foo bar "),
270+
Output.send_literal("baz", "t2", non_sync: false),
271+
Output.put_string(" "),
272+
Output.send_literal("quux", "t2", non_sync: true),
273+
Output.put_string(" etc"),
274+
], imap.output
275+
end
276+
277+
test "empty literals" do
278+
raw = RawData.new("{0}\r\n{0+}\r\n~{0}\r\n~{0+}\r\n")
279+
assert_equal [
280+
Literal["", false],
281+
Literal["", true],
282+
Literal8["", false],
283+
Literal8["", true],
284+
], raw.data
285+
imap.send_data raw, tag: "t2.2"
286+
assert_equal [
287+
Output.send_literal("", "t2.2", non_sync: false),
288+
Output.send_literal("", "t2.2", non_sync: true),
289+
Output.send_binary_literal("", "t2.2", non_sync: false),
290+
Output.send_binary_literal("", "t2.2", non_sync: true),
291+
], imap.output
292+
end
293+
294+
test "raw text embedded between literals" do
295+
raw = RawData.new("{3}\r\nfoo bar")
296+
assert_equal [
297+
Literal["foo", false],
298+
RawText[" bar"]
299+
], raw.data
300+
imap.send_data raw, tag: "t3"
301+
assert_equal [
302+
Output.send_literal("foo", "t3", non_sync: false),
303+
Output.put_string(" bar"),
304+
], imap.output
305+
end
306+
307+
test "raw text followed by literal" do
308+
raw = RawData.new("foo {3}\r\nbar")
309+
assert_equal [
310+
RawText["foo "],
311+
Literal["bar", false],
312+
], raw.data
313+
imap.send_data raw, tag: "t4"
314+
assert_equal [
315+
Output.put_string("foo "),
316+
Output.send_literal("bar", "t4", non_sync: false),
317+
], imap.output
318+
imap.clear
319+
end
320+
321+
test "binary literal with regular literal" do
322+
raw = RawData.new("foo ~{7}\r\n\0bar\r\nbaz {4}\r\nquux")
323+
assert_equal [
324+
RawText["foo "],
325+
Literal8["\0bar\r\nb", false],
326+
RawText["az "],
327+
Literal["quux", false],
328+
], raw.data
329+
imap.send_data raw, tag: "t5"
330+
assert_equal [
331+
Output.put_string("foo "),
332+
Output.send_binary_literal("\0bar\r\nb", "t5", non_sync: false),
333+
Output.put_string("az "),
334+
Output.send_literal("quux", "t5", non_sync: false),
335+
], imap.output
336+
end
337+
338+
data(
339+
"CR" => "with \r byte",
340+
"LF" => "with \n byte",
341+
"NULL" => "with \0 byte",
342+
"CRLF" => "with \r\n bytes",
343+
)
344+
test "invalid bytes in raw text" do |data|
345+
assert_raise_with_message(DataFormatError, /must be.* literal encoded/i) do
346+
RawData.new(data:)
347+
end
348+
end
349+
350+
test "invalid literal" do |data|
351+
assert_raise_with_message(DataFormatError, /too few bytes/i) do
352+
RawData.new(data: "invalid literal {123}\r\ntoo small")
353+
end
354+
355+
assert_raise_with_message(DataFormatError, /NULL byte.*in.*literal/i) do
356+
RawData.new(data: "invalid literal {10}\r\ncontains \0 null")
357+
end
358+
end
359+
360+
test "invalid literal ending ('{123}')" do
361+
assert_raise(DataFormatError) do RawData.new(data: "literal {123}") end
362+
assert_raise(DataFormatError) do RawData.new(data: "literal+ {123+}") end
363+
assert_raise(DataFormatError) do RawData.new(data: "~literal ~{123}") end
364+
assert_raise(DataFormatError) do RawData.new(data: "~literal+ ~{123+}") end
365+
raw = RawData.new(data: " {123} ")
366+
assert_equal [RawText[" {123} "]], raw.data
367+
end
368+
end
369+
165370
end

test/net/imap/test_imap.rb

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,35 @@ def test_send_symbol_as_flag
650650
end
651651
end
652652

653+
def test_raw_data
654+
with_fake_server do |server, imap|
655+
server.on "TEST", &:done_ok
656+
657+
imap.__send__(:send_command, "TEST", Net::IMAP::RawData.new("foo bar"))
658+
assert_equal "foo bar", server.commands.pop.args
659+
660+
imap.__send__(:send_command, "TEST",
661+
Net::IMAP::RawData.new("{3}\r\nfoo"),
662+
Net::IMAP::RawData.new("~{4}\r\n\0bar"))
663+
assert_equal "{3}\r\nfoo ~{4}\r\n\0bar", server.commands.pop.args
664+
665+
# RawData must pass basic validation before sending command
666+
[
667+
"with \0 NULL",
668+
"with \r CR",
669+
"with \n LF",
670+
"with \r\n CRLF",
671+
"{1234}\r\nliteral is too small",
672+
"{1}\r\n\0 literal contains NULL",
673+
].each do |data|
674+
assert_raise(Net::IMAP::DataFormatError) do
675+
imap.__send__(:send_command, "TEST", Net::IMAP::RawData[data:])
676+
end
677+
assert_empty server.commands
678+
end
679+
end
680+
end
681+
653682
test("send PartialRange args") do
654683
with_fake_server do |server, imap|
655684
server.on "TEST", &:done_ok

0 commit comments

Comments
 (0)