Skip to content

Commit b11cec8

Browse files
author
lostflydev
committed
Implement URI for pure Wasm (scala-wasm#151)
FIX review comments - RegExpImpl: add Flags object (Global/CaseInsensitive as Int constants) and change compile(str, flags: String) to compile(str, flags: Int) - Use java.util.function.Function for replaceAll - URI: restore _fld: use RegExpImpl.impl.Repr pattern, remove parseURI helper; use RegExpImpl.impl.exec/matches/exists/getOrElse directly in the constructor - URI: keep uriRe and ipv6Re as single compiled vals (no *ReStr split), inline testIPv6, remove empty javalib/net comment from Build.scala - URI: all quote patterns use Int flags; quoteReplace uses RegExpImpl.impl execFrom/matchStart/matchEnd loop - use += string concat instead of loop for path join
1 parent dacf3e8 commit b11cec8

3 files changed

Lines changed: 110 additions & 127 deletions

File tree

javalib/src/main/scala/java/net/URI.scala

Lines changed: 62 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import scala.annotation.tailrec
1616

1717
import java.nio._
1818
import java.nio.charset.{CodingErrorAction, StandardCharsets}
19+
import java.util.{Arrays => juArrays}
20+
import java.util.ScalaOps._
1921
import java.util.regex.RegExpImpl
2022

2123
final class URI(origStr: String) extends Serializable with Comparable[URI] {
@@ -29,14 +31,14 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] {
2931
* This is a local val for the primary constructor. It is a val,
3032
* since we'll set it to null after initializing all fields.
3133
*/
32-
private[this] var _parsed: Array[String] = URI.parseURI(origStr)
33-
if (_parsed == null)
34+
private[this] var _fld = RegExpImpl.impl.exec(URI.uriRe, origStr)
35+
if (!RegExpImpl.impl.matches(_fld))
3436
throw new URISyntaxException(origStr, "Malformed URI")
3537

36-
private val _isAbsolute = _parsed(AbsScheme) != null
37-
private val _isOpaque = _parsed(AbsOpaquePart) != null
38+
private val _isAbsolute = RegExpImpl.impl.exists(_fld, AbsScheme)
39+
private val _isOpaque = RegExpImpl.impl.exists(_fld, AbsOpaquePart)
3840

39-
@inline private def fld(idx: Int): String = _parsed(idx)
41+
@inline private def fld(idx: Int): String = RegExpImpl.impl.getOrElse(_fld, idx, null)
4042

4143
@inline private def fld(absIdx: Int, relIdx: Int): String =
4244
if (_isAbsolute) fld(absIdx) else fld(relIdx)
@@ -90,7 +92,7 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] {
9092
private val _fragment = fld(Fragment)
9193

9294
// End of default ctor. Unset helper field
93-
_parsed = null
95+
_fld = null.asInstanceOf[RegExpImpl.impl.Repr]
9496

9597
def this(scheme: String, ssp: String, fragment: String) =
9698
this(URI.uriStr(scheme, ssp, fragment))
@@ -216,7 +218,6 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] {
216218
else {
217219
val origPath = _path
218220

219-
// Use String#split which works on both JS and pure Wasm targets
220221
val segments = origPath.split("/", -1)
221222

222223
// Step 1: Remove all "." segments
@@ -279,20 +280,9 @@ final class URI(origStr: String) extends Serializable with Comparable[URI] {
279280
// segment (according to JavaDoc). If the path is absolute, the first
280281
// segment is "" so the `contains(':')` returns false.
281282
val prependDot = outIdx != 0 && segments(0).contains(":")
282-
283-
// Build the new path from segments[0..outIdx)
284-
val newPath = {
285-
val sb = new java.lang.StringBuilder()
286-
if (prependDot)
287-
sb.append(".")
288-
var i = 0
289-
while (i < outIdx) {
290-
if (i != 0 || prependDot) sb.append("/")
291-
sb.append(segments(i))
292-
i += 1
293-
}
294-
sb.toString
295-
}
283+
val normalized =
284+
juArrays.asList(segments).subList(0, outIdx).scalaOps.mkString("", "/", "")
285+
val newPath = if (prependDot) "./" + normalized else normalized
296286

297287
// Only create new instance if anything changed
298288
if (newPath == origPath)
@@ -437,26 +427,18 @@ object URI {
437427
// (25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])| # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 (IPv4-mapped IPv6 addresses and IPv4-translated addresses)
438428
// ([0-9a-fA-F]{1,4}:){1,4}:
439429
// ((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}
440-
// (25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]) # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 (IPv4-Embedded IPv6 Address)
430+
// (25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]) // 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 (IPv4-Embedded IPv6 Address)
441431
}
442432

443-
private val ipv6ReStr = "^" + ipv6address + "$"
444-
445-
private[this] lazy val ipv6RePat =
446-
RegExpImpl.impl.compile(ipv6ReStr, "i")
447-
448-
/** Test whether a host string is an IPv6 address. */
449-
private def testIPv6(host: String): Boolean = {
450-
import RegExpImpl.impl
451-
impl.matches(impl.exec(ipv6RePat, host))
452-
}
433+
private val ipv6Re =
434+
RegExpImpl.impl.compile("^" + ipv6address + "$", RegExpImpl.Flags.CaseInsensitive)
453435

454436
// URI syntax parser. Based on RFC2396, RFC2732 and adaptations according to
455437
// JavaDoc.
456438
// - http://www.ietf.org/rfc/rfc2396.txt (see Appendix A for complete syntax)
457439
// - http://www.ietf.org/rfc/rfc2732.txt
458440

459-
private val uriReStr = {
441+
private val uriRe = {
460442
// We don't use any interpolators here to allow for constant folding
461443

462444
///////////////////
@@ -596,30 +578,9 @@ object URI {
596578
"((?:" + net_path + "|(" + abs_path + ")|(" + rel_path + "))(?:\\?" + query + ")?)"
597579

598580
// URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
599-
"^(?:" + absoluteURI + "|" + relativeURI + ")(?:#" + fragment + ")?$"
600-
}
581+
val uriRef = "^(?:" + absoluteURI + "|" + relativeURI + ")(?:#" + fragment + ")?$"
601582

602-
private[this] lazy val uriRePat =
603-
RegExpImpl.impl.compile(uriReStr, "i")
604-
605-
/** Parse a URI string, returning an Array[String] of matched groups
606-
* (null for non-matching groups), or null if the string is not a valid URI.
607-
*/
608-
private def parseURI(str: String): Array[String] = {
609-
import RegExpImpl.impl
610-
val result = impl.exec(uriRePat, str)
611-
if (!impl.matches(result)) {
612-
null
613-
} else {
614-
val len = Fields.Fragment + 1
615-
val arr = new Array[String](len)
616-
var i = 0
617-
while (i < len) {
618-
arr(i) = impl.getOrElse(result, i, null)
619-
i += 1
620-
}
621-
arr
622-
}
583+
RegExpImpl.impl.compile(uriRef, RegExpImpl.Flags.CaseInsensitive)
623584
}
624585

625586
private object Fields {
@@ -676,7 +637,7 @@ object URI {
676637
resStr += quoteUserInfo(userInfo) + "@"
677638

678639
if (host != null) {
679-
if (testIPv6(host))
640+
if (RegExpImpl.impl.matches(RegExpImpl.impl.exec(URI.ipv6Re, host)))
680641
resStr += "[" + host + "]"
681642
else
682643
resStr += host
@@ -799,60 +760,45 @@ object URI {
799760
res
800761
}
801762

802-
/** Replace all matches of a compiled regex pattern with percent-encoded form. */
803-
private def quoteReplace(str: String,
804-
pattern: RegExpImpl.impl.PatRepr): String = {
805-
import RegExpImpl.impl
806-
val sb = new java.lang.StringBuilder()
807-
var lastEnd = 0
808-
var result = impl.execFrom(pattern, str, 0)
809-
while (impl.matchStart(result) != -1) {
810-
val start = impl.matchStart(result)
811-
val end = impl.matchEnd(pattern, result)
812-
sb.append(str, lastEnd, start)
813-
sb.append(quoteStrFn(str.substring(start, end)))
814-
lastEnd = end
815-
result = impl.execFrom(pattern, str, end)
816-
}
817-
sb.append(str, lastEnd, str.length)
818-
sb.toString
763+
private object QuoteStrMapper extends java.util.function.Function[String, String] {
764+
def apply(t: String): String = quoteStrFn(t)
819765
}
820766

821767
/** matches any character not in unreserved, punct, escaped or other */
822-
private val userInfoQuoteReStr = {
823-
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
824-
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%]
825-
"[\u0000- \"#/<>?@\\[-\\^`{-}" +
826-
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
827-
"%(?![0-9a-f]{2})"
768+
private[this] lazy val userInfoQuotePat = {
769+
RegExpImpl.impl.compile(
770+
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
771+
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%]
772+
"[\u0000- \"#/<>?@\\[-\\^`{-}" +
773+
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
774+
"%(?![0-9a-f]{2})",
775+
RegExpImpl.Flags.Global | RegExpImpl.Flags.CaseInsensitive
776+
)
828777
}
829778

830-
private[this] lazy val userInfoQuotePat =
831-
RegExpImpl.impl.compile(userInfoQuoteReStr, "ig")
832-
833779
/** Quote any character not in unreserved, punct, escaped or other */
834780
private def quoteUserInfo(str: String): String =
835-
quoteReplace(str, userInfoQuotePat)
781+
RegExpImpl.impl.replaceAll(userInfoQuotePat, str, QuoteStrMapper)
836782

837783
/** matches any character not in unreserved, punct, escaped, other or equal
838784
* to '/' or '@'
839785
*/
840-
private val pathQuoteReStr = {
841-
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
842-
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@/]
843-
"[\u0000- \"#<>?\\[-\\^`{-}" +
844-
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
845-
"%(?![0-9a-f]{2})"
786+
private[this] lazy val pathQuotePat = {
787+
RegExpImpl.impl.compile(
788+
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
789+
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@/]
790+
"[\u0000- \"#<>?\\[-\\^`{-}" +
791+
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
792+
"%(?![0-9a-f]{2})",
793+
RegExpImpl.Flags.Global | RegExpImpl.Flags.CaseInsensitive
794+
)
846795
}
847796

848-
private[this] lazy val pathQuotePat =
849-
RegExpImpl.impl.compile(pathQuoteReStr, "ig")
850-
851797
/** Quote any character not in unreserved, punct, escaped, other or equal
852798
* to '/' or '@'
853799
*/
854800
private def quotePath(str: String): String =
855-
quoteReplace(str, pathQuotePat)
801+
RegExpImpl.impl.replaceAll(pathQuotePat, str, QuoteStrMapper)
856802

857803
/** matches any character not in unreserved, punct, escaped, other or equal
858804
* to '@', '[' or ']'
@@ -861,51 +807,49 @@ object URI {
861807
* in IPv6 addresses, but technically speaking they are in reserved
862808
* due to RFC2732).
863809
*/
864-
private val authorityQuoteReStr = {
865-
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
866-
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@\[\]]
867-
"[\u0000- \"#/<>?\\^`{-}" +
868-
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
869-
"%(?![0-9a-f]{2})"
810+
private[this] lazy val authorityQuotePat = {
811+
RegExpImpl.impl.compile(
812+
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
813+
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=%@\[\]]
814+
"[\u0000- \"#/<>?\\^`{-}" +
815+
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
816+
"%(?![0-9a-f]{2})",
817+
RegExpImpl.Flags.Global | RegExpImpl.Flags.CaseInsensitive
818+
)
870819
}
871820

872-
private[this] lazy val authorityQuotePat =
873-
RegExpImpl.impl.compile(authorityQuoteReStr, "ig")
874-
875821
/** Quote any character not in unreserved, punct, escaped, other or equal
876822
* to '@'
877823
*/
878824
private def quoteAuthority(str: String): String =
879-
quoteReplace(str, authorityQuotePat)
825+
RegExpImpl.impl.replaceAll(authorityQuotePat, str, QuoteStrMapper)
880826

881827
/** matches any character not in unreserved, reserved, escaped or other */
882-
private val illegalQuoteReStr = {
883-
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
884-
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=?/\\[\\]%]
885-
"[\u0000- \"#<>@\\^`{-}" +
886-
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
887-
"%(?![0-9a-f]{2})"
828+
private[this] lazy val illegalQuotePat = {
829+
RegExpImpl.impl.compile(
830+
// !other = [\u0000-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]
831+
// Char class is: [:!other:^a-z0-9-_.!~*'(),;:$&+=?/\\[\\]%]
832+
"[\u0000- \"#<>@\\^`{-}" +
833+
"\u007f-\u00a0\u1680\u2000-\u200a\u202f\u205f\u3000\u2028\u2029]|" +
834+
"%(?![0-9a-f]{2})",
835+
RegExpImpl.Flags.Global | RegExpImpl.Flags.CaseInsensitive
836+
)
888837
}
889838

890-
private[this] lazy val illegalQuotePat =
891-
RegExpImpl.impl.compile(illegalQuoteReStr, "ig")
892-
893839
/** Quote any character not in unreserved, reserved, escaped or other */
894840
private def quoteIllegal(str: String): String =
895-
quoteReplace(str, illegalQuotePat)
841+
RegExpImpl.impl.replaceAll(illegalQuotePat, str, QuoteStrMapper)
896842

897843
/** matches characters not in ASCII
898844
*
899845
* Note: It is important that the match is maximal, since we might encounter
900846
* surrogates that need to be encoded in one shot.
901847
*/
902-
private val nonASCIIQuoteReStr = "[^\u0000-\u007F]+"
903-
904848
private[this] lazy val nonASCIIQuotePat =
905-
RegExpImpl.impl.compile(nonASCIIQuoteReStr, "g")
849+
RegExpImpl.impl.compile("[^\u0000-\u007F]+", RegExpImpl.Flags.Global)
906850

907851
private def quoteNonASCII(str: String): String =
908-
quoteReplace(str, nonASCIIQuotePat)
852+
RegExpImpl.impl.replaceAll(nonASCIIQuotePat, str, QuoteStrMapper)
909853

910854
/** Case-insensitive comparison that accepts `null` values.
911855
*

javalib/src/main/scala/java/util/regex/RegExpImpl.scala

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ private[java] sealed abstract class RegExpImpl {
2727

2828
def compile(patternStr: String): PatRepr
2929
def compile(patternStr: String, global: Boolean): PatRepr
30-
def compile(patternStr: String, flags: String): PatRepr
30+
def compile(patternStr: String, flags: Int): PatRepr
3131
def exec(pattern: PatRepr, string: String): Repr
3232
def matches(r: Repr): Boolean
3333
def exists(r: Repr, index: Int): Boolean
@@ -36,9 +36,20 @@ private[java] sealed abstract class RegExpImpl {
3636
def execFrom(pattern: PatRepr, string: String, startPos: Int): Repr
3737
def matchStart(r: Repr): Int
3838
def matchEnd(pattern: PatRepr, r: Repr): Int
39+
40+
def replaceAll(
41+
pattern: PatRepr,
42+
string: String,
43+
replacer: java.util.function.Function[String, String]
44+
): String
3945
}
4046

4147
private[java] object RegExpImpl {
48+
object Flags {
49+
final val Global = 0x01
50+
final val CaseInsensitive = 0x02
51+
}
52+
4253
val impl = linkTimeIf[RegExpImpl](targetPureWasm) {
4354
JavaRegExpImpl
4455
} {
@@ -59,8 +70,13 @@ private[java] object RegExpImpl {
5970
else new js.RegExp(patternStr)
6071
}
6172

62-
def compile(patternStr: String, flags: String): PatRepr =
63-
new js.RegExp(patternStr, flags)
73+
def compile(patternStr: String, flags: Int): PatRepr = {
74+
val jsFlags = {
75+
(if ((flags & Flags.Global) != 0) "g" else "") +
76+
(if ((flags & Flags.CaseInsensitive) != 0) "i" else "")
77+
}
78+
new js.RegExp(patternStr, jsFlags)
79+
}
6480

6581
def exec(pattern: PatRepr, string: String): Repr = pattern.exec(string)
6682
def matches(r: Repr): Boolean = r != null
@@ -84,6 +100,20 @@ private[java] object RegExpImpl {
84100
if (r == null) -1
85101
else pattern.lastIndex
86102
}
103+
104+
def replaceAll(
105+
pattern: PatRepr,
106+
string: String,
107+
replacer: java.util.function.Function[String, String]
108+
): String = {
109+
import js.JSStringOps._
110+
if (!pattern.global)
111+
throw new IllegalArgumentException("replaceAll requires a global pattern")
112+
val jsFunc: js.Function1[String, String] = { (matched: String) =>
113+
replacer.apply(matched)
114+
}
115+
string.jsReplace(pattern, jsFunc)
116+
}
87117
}
88118

89119
private object JavaRegExpImpl extends RegExpImpl {
@@ -93,10 +123,11 @@ private[java] object RegExpImpl {
93123
def compile(patternStr: String): PatRepr = Pattern.compile(patternStr)
94124
def compile(patternStr: String, global: Boolean): PatRepr = Pattern.compile(patternStr)
95125

96-
def compile(patternStr: String, flags: String): PatRepr = {
97-
var javaFlags = 0
98-
if (flags.contains("i")) javaFlags |= Pattern.CASE_INSENSITIVE
99-
Pattern.compile(patternStr, javaFlags)
126+
def compile(patternStr: String, flags: Int): PatRepr = {
127+
var patFlags = 0
128+
if ((flags & Flags.CaseInsensitive) != 0)
129+
patFlags |= Pattern.CASE_INSENSITIVE
130+
Pattern.compile(patternStr, patFlags)
100131
}
101132

102133
def exec(pattern: PatRepr, string: String): Repr = pattern.matcher(string)
@@ -123,5 +154,15 @@ private[java] object RegExpImpl {
123154
if (r == null) -1
124155
else r.end()
125156
}
157+
158+
def replaceAll(
159+
pattern: PatRepr,
160+
string: String,
161+
replacer: java.util.function.Function[String, String]
162+
): String = {
163+
pattern.matcher(string).replaceAll(new java.util.function.Function[MatchResult, String] {
164+
def apply(result: MatchResult): String = replacer.apply(result.group())
165+
})
166+
}
126167
}
127168
}

0 commit comments

Comments
 (0)