Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 71 additions & 42 deletions lib/rubygems/yaml_serializer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,28 @@ def initialize(items: [], tag: nil, anchor: nil)
AliasRef = Struct.new(:name, keyword_init: true)

class Parser
MAPPING_KEY_RE = /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/
# A plain (unquoted) mapping key followed by ":". "#" inside a key is
# literal unless preceded by a space (which would start a comment).
MAPPING_KEY_RE = /^((?:[^#:\s]|:[^ ])(?:[^#:]|:[^ ]|(?<=[^ ])#)*):(?:[ ]+(.*))?$/
# A quoted mapping key followed by ":". A whole-line quoted scalar
# cannot match because its closing quote is at the end of the line.
QUOTED_KEY_RE = /^("(?:[^"\\]|\\.)*"|'(?:[^']|'')*'):(?:[ ]+(.*))?$/
MAX_NESTING_DEPTH = 1_000

STRING_UNESCAPES = {
"\\\\" => "\\",
"\\\"" => "\"",
"\\0" => "\0",
"\\a" => "\a",
"\\b" => "\b",
"\\t" => "\t",
"\\n" => "\n",
"\\v" => "\v",
"\\f" => "\f",
"\\r" => "\r",
"\\e" => "\e",
}.freeze

def initialize(source)
@lines = source.split("\n")
@anchors = {}
Expand Down Expand Up @@ -93,24 +112,13 @@ def parse_node(base_indent)

if stripped.start_with?("- ") || stripped == "-"
parse_sequence(indent, anchor)
elsif stripped.start_with?("\"") && stripped.end_with?("\"")
# We don't need to care about the following case here:
# 1. "value with comment" # ...
# 2. "key": "value"
#
# 1. must not happen because YAMLSerializer doesn't emit any
# comment. YAMLSerializer parses only YAML that is generated
# by YAMLSerializer.
#
# 2. must not happen because #parse_node isn't used non
# top-level mapping. Non top-level mapping always uses
# #parse_mapping. Top-level mapping never use the '"key":
# "value"' form because all top-level keys
# ("!ruby/object:Gem::Specification"'s keys) are known and
# #emit_specification doesn't quote anything.
parse_plain_scalar(indent, anchor)
elsif stripped.start_with?("'") && stripped.end_with?("'")
# See also the above note for double quotation.
elsif QUOTED_KEY_RE.match?(stripped)
# A mapping whose key is quoted, e.g. '"have: colon": value'.
parse_mapping(indent, anchor)
elsif (stripped.start_with?("\"") && stripped.end_with?("\"")) ||
(stripped.start_with?("'") && stripped.end_with?("'"))
# A whole-line quoted scalar, e.g. '"system: foo: bar"'. It may
# contain ": ", so it must be checked before MAPPING_KEY_RE.
parse_plain_scalar(indent, anchor)
elsif stripped =~ MAPPING_KEY_RE && !stripped.start_with?("!ruby/object:")
parse_mapping(indent, anchor)
Expand Down Expand Up @@ -154,7 +162,8 @@ def parse_sequence_item(content, indent)
elsif content.start_with?("-")
@lines.unshift("#{" " * (indent + 2)}#{content}")
parse_node(indent)
elsif content =~ MAPPING_KEY_RE && !content.start_with?("!ruby/object:")
elsif QUOTED_KEY_RE.match?(content) ||
(content =~ MAPPING_KEY_RE && !content.start_with?("!ruby/object:"))
@lines.unshift("#{" " * (indent + 2)}#{content}")
parse_node(indent)
elsif content.start_with?("|")
Expand All @@ -169,13 +178,18 @@ def parse_mapping(indent, anchor)
while @lines.any?
line = @lines[0]
stripped = line.lstrip
break unless line.size - stripped.size == indent &&
stripped =~ MAPPING_KEY_RE && !stripped.start_with?("!ruby/object:")
key = $1.strip
@lines.shift
val = strip_comment($2.to_s.strip)
break unless line.size - stripped.size == indent

key = decode_binary_tag(key) if key.start_with?("!binary ")
if (match = QUOTED_KEY_RE.match(stripped))
key = coerce(match[1])
elsif (match = MAPPING_KEY_RE.match(stripped)) && !stripped.start_with?("!ruby/object:")
key = match[1].strip
key = decode_binary_tag(key) if key.start_with?("!binary ")
else
break
end
@lines.shift
val = strip_comment(match[2].to_s.strip)

val_anchor, val = consume_value_anchor(val)
value = parse_mapping_value(val, indent)
Expand Down Expand Up @@ -289,14 +303,8 @@ def coerce(val, depth = 0)
val = val.sub(/^! /, "") if val.start_with?("! ")

if val =~ /^"(.*)"$/
$1.gsub(/\\["nrt\\]/) do |m|
case m
when '\\"' then '"'
when "\\n" then "\n"
when "\\r" then "\r"
when "\\t" then "\t"
when "\\\\" then "\\"
end
$1.gsub(/\\(?:["\\0abtnvfre]|x\h{2})/) do |m|
STRING_UNESCAPES[m] || m[2..].to_i(16).chr(Encoding::UTF_8)
end
elsif val =~ /^'(.*)'$/
$1.gsub(/''/, "'")
Expand Down Expand Up @@ -412,7 +420,7 @@ def skip_blank_and_comments

def strip_comment(val)
return val unless val.include?("#")
return val if val.lstrip.start_with?("#")
return "" if val.lstrip.start_with?("#")

in_single = false
in_double = false
Expand All @@ -436,7 +444,9 @@ def strip_comment(val)
case ch
when "'" then in_single = true
when '"' then in_double = true
when "#" then return val[0...i].rstrip
when "#"
# A "#" starts a comment only when preceded by whitespace.
return val[0...i].rstrip if [" ", "\t"].include?(val[i - 1])
end
end
end
Expand Down Expand Up @@ -684,6 +694,20 @@ def normalize_array_field(value)
end

class Emitter
STRING_ESCAPES = {
"\\" => "\\\\",
"\"" => "\\\"",
"\0" => "\\0",
"\a" => "\\a",
"\b" => "\\b",
"\t" => "\\t",
"\n" => "\\n",
"\v" => "\\v",
"\f" => "\\f",
"\r" => "\\r",
"\e" => "\\e",
}.freeze

def emit(obj)
"---#{emit_node(obj, 0)}"
end
Expand All @@ -706,7 +730,7 @@ def emit_node(obj, indent, quote: false)
when Numeric, Symbol, TrueClass, FalseClass
" #{obj.inspect}\n"
else
" #{obj.to_s.inspect}\n"
" #{quote_string(obj.to_s)}\n"
end
end

Expand Down Expand Up @@ -767,6 +791,7 @@ def emit_hash(hash, indent)
hash.each do |k, v|
is_symbol = k.is_a?(Symbol) || (k.is_a?(String) && k.start_with?(":"))
key_str = k.is_a?(Symbol) ? k.inspect : k.to_s
key_str = quote_string(key_str) if !is_symbol && needs_quoting?(key_str)
parts << "#{pad(indent)}#{key_str}:#{emit_node(v, indent + 2, quote: is_symbol)}"
end
parts.join
Expand All @@ -793,7 +818,7 @@ def emit_string(str, indent, quote: false)
if str.include?("\n")
emit_block_scalar(str, indent)
elsif needs_quoting?(str, quote)
" #{str.to_s.inspect}\n"
" #{quote_string(str)}\n"
else
" #{str}\n"
end
Expand All @@ -809,14 +834,18 @@ def emit_block_scalar(str, indent)
res
end

def needs_quoting?(str, quote)
quote || str.empty? ||
str =~ /^[!*&:@%$]/ || str =~ /^-?\d+(\.\d+)?$/ || str =~ /^[<>=-]/ ||
str == "true" || str == "false" || str == "nil" ||
def needs_quoting?(str, quote = false)
quote || str.empty? || str != str.strip || str =~ /[[:cntrl:]]/ ||
str =~ /^[!*&:@%$"'|`]/ || str =~ /^-?\d+(\.\d+)?$/ || str =~ /^[<>=-]/ ||
str == "true" || str == "false" || str == "nil" || str == "null" || str == "~" ||
str.include?(":") || str.include?("#") || str.include?("[") || str.include?("]") ||
str.include?("{") || str.include?("}") || str.include?(",")
end

def quote_string(str)
%("#{str.gsub(/[\\"]|[[:cntrl:]]/) {|c| STRING_ESCAPES[c] || format("\\x%02X", c.ord) }}")
end

def pad(indent)
" " * indent
end
Expand Down
130 changes: 118 additions & 12 deletions test/rubygems/test_gem_safe_yaml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -709,31 +709,137 @@ def test_roundtrip_specification_with_windows_paths
end

def test_roundtrip_specification_with_metadata
metadata = {
"changelog_uri" => "https://example.com/CHANGELOG.md",
"source_code_uri" => "https://github.com/example/metadata-test",
"bug_tracker_uri" => "https://github.com/example/metadata-test/issues",
"allowed_push_host" => "https://rubygems.org",
"\"double_quoted\"" => "\"quoted_value\"",
"'single_quoted'" => "'quoted_value'",
"have:colon" => "value:colon",
"have space" => "value space",
}
spec = Gem::Specification.new do |s|
s.name = "metadata-test"
s.version = "1.0.0"
s.authors = ["Test"]
s.summary = "A gem with metadata"
s.files = ["lib/foo.rb"]
s.require_paths = ["lib"]
s.metadata = {
"changelog_uri" => "https://example.com/CHANGELOG.md",
"source_code_uri" => "https://github.com/example/metadata-test",
"bug_tracker_uri" => "https://github.com/example/metadata-test/issues",
"allowed_push_host" => "https://rubygems.org",
}
s.metadata = metadata
end

yaml = yaml_dump(spec)
loaded = Gem::SafeYAML.safe_load(yaml)

assert_kind_of Gem::Specification, loaded
assert_kind_of Hash, loaded.metadata
assert_equal 4, loaded.metadata.size
assert_equal "https://example.com/CHANGELOG.md", loaded.metadata["changelog_uri"]
assert_equal "https://github.com/example/metadata-test", loaded.metadata["source_code_uri"]
assert_equal "https://github.com/example/metadata-test/issues", loaded.metadata["bug_tracker_uri"]
assert_equal "https://rubygems.org", loaded.metadata["allowed_push_host"]
assert_equal metadata, loaded.metadata
end

def test_roundtrip_specification_with_quoted_first_metadata_key
metadata = {
"\"double_quoted\"" => "\"quoted_value\"",
"'single_quoted'" => "'quoted_value'",
"have:colon" => "value:colon",
}
spec = Gem::Specification.new do |s|
s.name = "metadata-test"
s.version = "1.0.0"
s.authors = ["Test"]
s.summary = "A gem with metadata"
s.metadata = metadata
end

loaded = Gem::SafeYAML.safe_load(yaml_dump(spec))

assert_kind_of Gem::Specification, loaded
assert_equal "metadata-test", loaded.name
assert_equal metadata, loaded.metadata
end

def test_roundtrip_specification_with_special_metadata_keys
metadata = {
"have: colon-space" => "value: colon-space",
"have#hash" => "value#hash",
"padded" => " padded value ",
"looks_null" => "null",
}
spec = Gem::Specification.new do |s|
s.name = "metadata-test"
s.version = "1.0.0"
s.authors = ["Test"]
s.summary = "A gem with metadata"
s.metadata = metadata
end

loaded = Gem::SafeYAML.safe_load(yaml_dump(spec))

assert_kind_of Gem::Specification, loaded
assert_equal metadata, loaded.metadata
end

def test_roundtrip_specification_with_control_character_metadata
metadata = {
"bell" => "bell\a",
"escape" => "esc\e[0m",
"control" => "soh\x01del\x7F",
"tab" => "tab\tinside",
}
spec = Gem::Specification.new do |s|
s.name = "metadata-test"
s.version = "1.0.0"
s.authors = ["Test"]
s.summary = "A gem with metadata"
s.metadata = metadata
end

loaded = Gem::SafeYAML.safe_load(yaml_dump(spec))

assert_kind_of Gem::Specification, loaded
assert_equal metadata, loaded.metadata
end

def test_load_escaped_control_characters
yaml = <<~YAML
---
key: "bell\\aesc\\enull\\0soh\\x01del\\x7F"
YAML

assert_equal({ "key" => "bell\aesc\enull\0soh\x01del\x7F" }, yaml_load(yaml))
end

def test_load_comment_only_mapping_value
yaml = <<~YAML
---
commented: # comment
plain: value # trailing comment
quoted: "kept # inside quotes"
YAML

expected = {
"commented" => nil,
"plain" => "value",
"quoted" => "kept # inside quotes",
}
assert_equal expected, yaml_load(yaml)
end

def test_load_psych_style_quoted_mapping_keys
yaml = <<~YAML
---
'"double_quoted"': '"quoted_value"'
"'single_quoted'": "'quoted_value'"
'have: colon-space': v
key#hash: value#hash
YAML

expected = {
"\"double_quoted\"" => "\"quoted_value\"",
"'single_quoted'" => "'quoted_value'",
"have: colon-space" => "v",
"key#hash" => "value#hash",
}
assert_equal expected, yaml_load(yaml)
end

def test_roundtrip_version
Expand Down
Loading