Skip to content

Commit d9db413

Browse files
committed
[PortsR] Adds preliminary ruby parser
1 parent b108b63 commit d9db413

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

ports-r/parser.rb

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
require 'json'
2+
3+
def tokenize(expression)
4+
pattern = /\s*(,@|[('`,)]|"(?:\\.|[^\\"])*"|;.*|[^\s('"`;,)]*)(.*)/
5+
tokens = []
6+
7+
expression.split("\n").each do |line|
8+
part = line
9+
while match = part.match(pattern) and match[0] != ''
10+
token = match[1]
11+
if token && token != '' && !token.start_with?(';')
12+
tokens << token
13+
end
14+
part = match[2]
15+
end
16+
end
17+
return tokens
18+
end
19+
20+
def parse_tokens(tokens)
21+
return [] if tokens.empty?
22+
23+
token = tokens.shift
24+
if token == '('
25+
list = []
26+
while tokens[0] != ')'
27+
list << parse_tokens(tokens)
28+
end
29+
tokens.shift # Remove ')'
30+
return list
31+
elsif token == ')'
32+
raise "Unexpected ')'"
33+
elsif token == "'"
34+
return [:quote, parse_tokens(tokens)]
35+
elsif token == "`"
36+
return [:quasiquote, parse_tokens(tokens)]
37+
elsif token == ","
38+
return [:unquote, parse_tokens(tokens)]
39+
elsif token == ",@"
40+
return [:"unquote-splicing", parse_tokens(tokens)]
41+
else
42+
return parse_atom(token)
43+
end
44+
end
45+
46+
def parse_atom(token)
47+
lower_token = token.downcase
48+
if lower_token == '#t' || lower_token == '#true'
49+
return true
50+
end
51+
if lower_token == '#f' || lower_token == '#false'
52+
return false
53+
end
54+
if token[0] == '"'
55+
raw_string = token[1..-2]
56+
return raw_string.gsub(/\\n/, "\n")
57+
.gsub(/\\r/, "\r")
58+
.gsub(/\\t/, "\t")
59+
end
60+
61+
# Try to parse as integer
62+
begin
63+
integer = Integer(token)
64+
return integer
65+
rescue ArgumentError
66+
# Not an integer
67+
end
68+
69+
# Try to parse as float
70+
begin
71+
float = Float(token)
72+
return float
73+
rescue ArgumentError
74+
# Not a number
75+
end
76+
77+
return token.to_sym # Use Ruby's built-in symbols
78+
end
79+
80+
def parse_without_expand(input_string)
81+
tokens = tokenize(input_string)
82+
return parse_tokens(tokens)
83+
end
84+
85+
def matches(structure, target)
86+
if target.is_a?(Array)
87+
return false unless structure.is_a?(Array)
88+
return false unless structure.length == target.length
89+
90+
result = true
91+
target.each_with_index do |t, i|
92+
result = result && matches(structure[i], t)
93+
end
94+
return result
95+
elsif target == "Boolean"
96+
return structure == true || structure == false
97+
elsif target == "String"
98+
return structure.is_a?(String)
99+
elsif target == "Character"
100+
return structure.is_a?(String) && structure.length == 1
101+
elsif target == "Symbol"
102+
return structure.is_a?(Symbol) # Check against Ruby's built-in Symbol class
103+
elsif target == "Number"
104+
return structure.is_a?(Numeric)
105+
end
106+
end
107+
108+
if __FILE__ == $PROGRAM_NAME
109+
test_table = JSON.parse(File.read('ports/syntax-tests.json'))
110+
111+
test_table.each do |entry|
112+
if entry.is_a?(String)
113+
puts "\n"
114+
puts entry
115+
next
116+
end
117+
118+
parse_result = parse_without_expand(entry[0])
119+
if matches(parse_result, entry[1])
120+
puts "✅: #{entry}"
121+
else
122+
puts "❌: #{entry} got #{parse_result.inspect} instead"
123+
end
124+
end
125+
puts "End of test run"
126+
end

0 commit comments

Comments
 (0)