parse method
Creates a new Uri
object by parsing a URI string.
If start
and end
are provided, they must specify a valid substring
of uri
, and only the substring from start
to end
is parsed as a URI.
The uri
must not be null
.
If the uri
string is not valid as a URI or URI reference,
a FormatException is thrown.
Implementation
static Uri parse(String uri, [int start = 0, int end]) {
// This parsing will not validate percent-encoding, IPv6, etc.
// When done splitting into parts, it will call, e.g., [_makeFragment]
// to do the final parsing.
//
// Important parts of the RFC 3986 used here:
// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
//
// hier-part = "//" authority path-abempty
// / path-absolute
// / path-rootless
// / path-empty
//
// URI-reference = URI / relative-ref
//
// absolute-URI = scheme ":" hier-part [ "?" query ]
//
// relative-ref = relative-part [ "?" query ] [ "#" fragment ]
//
// relative-part = "//" authority path-abempty
// / path-absolute
// / path-noscheme
// / path-empty
//
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
//
// authority = [ userinfo "@" ] host [ ":" port ]
// userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
// host = IP-literal / IPv4address / reg-name
// port = *DIGIT
// reg-name = *( unreserved / pct-encoded / sub-delims )
//
// path = path-abempty ; begins with "/" or is empty
// / path-absolute ; begins with "/" but not "//"
// / path-noscheme ; begins with a non-colon segment
// / path-rootless ; begins with a segment
// / path-empty ; zero characters
//
// path-abempty = *( "/" segment )
// path-absolute = "/" [ segment-nz *( "/" segment ) ]
// path-noscheme = segment-nz-nc *( "/" segment )
// path-rootless = segment-nz *( "/" segment )
// path-empty = 0<pchar>
//
// segment = *pchar
// segment-nz = 1*pchar
// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
// ; non-zero-length segment without any colon ":"
//
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
//
// query = *( pchar / "/" / "?" )
//
// fragment = *( pchar / "/" / "?" )
end ??= uri.length;
// Special case data:URIs. Ignore case when testing.
if (end >= start + 5) {
int dataDelta = _startsWithData(uri, start);
if (dataDelta == 0) {
// The case is right.
if (start > 0 || end < uri.length) uri = uri.substring(start, end);
return UriData._parse(uri, 5, null).uri;
} else if (dataDelta == 0x20) {
return UriData._parse(uri.substring(start + 5, end), 0, null).uri;
}
// Otherwise the URI doesn't start with "data:" or any case variant of it.
}
// The following index-normalization belongs with the scanning, but is
// easier to do here because we already have extracted variables from the
// indices list.
var indices = List<int>(8);
// Set default values for each position.
// The value will either be correct in some cases where it isn't set
// by the scanner, or it is clearly recognizable as an unset value.
indices
..[0] = 0
..[_schemeEndIndex] = start - 1
..[_hostStartIndex] = start - 1
..[_notSimpleIndex] = start - 1
..[_portStartIndex] = start
..[_pathStartIndex] = start
..[_queryStartIndex] = end
..[_fragmentStartIndex] = end;
var state = _scan(uri, start, end, _uriStart, indices);
// Some states that should be non-simple, but the URI ended early.
// Paths that end at a ".." must be normalized to end in "../".
if (state >= _nonSimpleEndStates) {
indices[_notSimpleIndex] = end;
}
int schemeEnd = indices[_schemeEndIndex];
if (schemeEnd >= start) {
// Rescan the scheme part now that we know it's not a path.
state = _scan(uri, start, schemeEnd, _schemeStart, indices);
if (state == _schemeStart) {
// Empty scheme.
indices[_notSimpleIndex] = schemeEnd;
}
}
// The returned positions are limited by the scanners ability to write only
// one position per character, and only the current position.
// Scanning from left to right, we only know whether something is a scheme
// or a path when we see a `:` or `/`, and likewise we only know if the first
// `/` is part of the path or is leading an authority component when we see
// the next character.
int hostStart = indices[_hostStartIndex] + 1;
int portStart = indices[_portStartIndex];
int pathStart = indices[_pathStartIndex];
int queryStart = indices[_queryStartIndex];
int fragmentStart = indices[_fragmentStartIndex];
// We may discover scheme while handling special cases.
String scheme;
// Derive some positions that weren't set to normalize the indices.
if (fragmentStart < queryStart) queryStart = fragmentStart;
// If pathStart isn't set (it's before scheme end or host start), then
// the path is empty, or there is no authority part and the path
// starts with a non-simple character.
if (pathStart < hostStart) {
// There is an authority, but no path. The path would start with `/`
// if it was there.
pathStart = queryStart;
} else if (pathStart <= schemeEnd) {
// There is a scheme, but no authority.
pathStart = schemeEnd + 1;
}
// If there is an authority with no port, set the port position
// to be at the end of the authority (equal to pathStart).
// This also handles a ":" in a user-info component incorrectly setting
// the port start position.
if (portStart < hostStart) portStart = pathStart;
assert(hostStart == start || schemeEnd <= hostStart);
assert(hostStart <= portStart);
assert(schemeEnd <= pathStart);
assert(portStart <= pathStart);
assert(pathStart <= queryStart);
assert(queryStart <= fragmentStart);
bool isSimple = indices[_notSimpleIndex] < start;
if (isSimple) {
// Check/do normalizations that weren't detected by the scanner.
// This includes removal of empty port or userInfo,
// or scheme specific port and path normalizations.
if (hostStart > schemeEnd + 3) {
// Always be non-simple if URI contains user-info.
// The scanner doesn't set the not-simple position in this case because
// it's setting the host-start position instead.
isSimple = false;
} else if (portStart > start && portStart + 1 == pathStart) {
// If the port is empty, it should be omitted.
// Pathological case, don't bother correcting it.
isSimple = false;
} else if (queryStart < end &&
(queryStart == pathStart + 2 &&
uri.startsWith("..", pathStart)) ||
(queryStart > pathStart + 2 &&
uri.startsWith("/..", queryStart - 3))) {
// The path ends in a ".." segment. This should be normalized to "../".
// We didn't detect this while scanning because a query or fragment was
// detected at the same time (which is why we only need to check this
// if there is something after the path).
isSimple = false;
} else {
// There are a few scheme-based normalizations that
// the scanner couldn't check.
// That means that the input is very close to simple, so just do
// the normalizations.
if (schemeEnd == start + 4) {
// Do scheme based normalizations for file, http.
if (uri.startsWith("file", start)) {
scheme = "file";
if (hostStart <= start) {
// File URIs should have an authority.
// Paths after an authority should be absolute.
String schemeAuth = "file://";
int delta = 2;
if (!uri.startsWith("/", pathStart)) {
schemeAuth = "file:///";
delta = 3;
}
uri = schemeAuth + uri.substring(pathStart, end);
schemeEnd -= start;
hostStart = 7;
portStart = 7;
pathStart = 7;
queryStart += delta - start;
fragmentStart += delta - start;
start = 0;
end = uri.length;
} else if (pathStart == queryStart) {
// Uri has authority and empty path. Add "/" as path.
if (start == 0 && end == uri.length) {
uri = uri.replaceRange(pathStart, queryStart, "/");
queryStart += 1;
fragmentStart += 1;
end += 1;
} else {
uri = "${uri.substring(start, pathStart)}/"
"${uri.substring(queryStart, end)}";
schemeEnd -= start;
hostStart -= start;
portStart -= start;
pathStart -= start;
queryStart += 1 - start;
fragmentStart += 1 - start;
start = 0;
end = uri.length;
}
}
} else if (uri.startsWith("http", start)) {
scheme = "http";
// HTTP URIs should not have an explicit port of 80.
if (portStart > start &&
portStart + 3 == pathStart &&
uri.startsWith("80", portStart + 1)) {
if (start == 0 && end == uri.length) {
uri = uri.replaceRange(portStart, pathStart, "");
pathStart -= 3;
queryStart -= 3;
fragmentStart -= 3;
end -= 3;
} else {
uri = uri.substring(start, portStart) +
uri.substring(pathStart, end);
schemeEnd -= start;
hostStart -= start;
portStart -= start;
pathStart -= 3 + start;
queryStart -= 3 + start;
fragmentStart -= 3 + start;
start = 0;
end = uri.length;
}
}
}
} else if (schemeEnd == start + 5 && uri.startsWith("https", start)) {
scheme = "https";
// HTTPS URIs should not have an explicit port of 443.
if (portStart > start &&
portStart + 4 == pathStart &&
uri.startsWith("443", portStart + 1)) {
if (start == 0 && end == uri.length) {
uri = uri.replaceRange(portStart, pathStart, "");
pathStart -= 4;
queryStart -= 4;
fragmentStart -= 4;
end -= 3;
} else {
uri = uri.substring(start, portStart) +
uri.substring(pathStart, end);
schemeEnd -= start;
hostStart -= start;
portStart -= start;
pathStart -= 4 + start;
queryStart -= 4 + start;
fragmentStart -= 4 + start;
start = 0;
end = uri.length;
}
}
}
}
}
if (isSimple) {
if (start > 0 || end < uri.length) {
uri = uri.substring(start, end);
schemeEnd -= start;
hostStart -= start;
portStart -= start;
pathStart -= start;
queryStart -= start;
fragmentStart -= start;
}
return _SimpleUri(uri, schemeEnd, hostStart, portStart, pathStart,
queryStart, fragmentStart, scheme);
}
return _Uri.notSimple(uri, start, end, schemeEnd, hostStart, portStart,
pathStart, queryStart, fragmentStart, scheme);
}