forked from symfony/dom-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
UriResolver.php
136 lines (112 loc) · 3.5 KB
/
UriResolver.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\DomCrawler;
/**
* The UriResolver class takes an URI (relative, absolute, fragment, etc.)
* and turns it into an absolute URI against another given base URI.
*
* @author Fabien Potencier <[email protected]>
* @author Grégoire Pineau <[email protected]>
*/
class UriResolver
{
/**
* Resolves a URI according to a base URI.
*
* For example if $uri=/foo/bar and $baseUri=https://symfony.com it will
* return https://symfony.com/foo/bar
*
* If the $uri is not absolute you must pass an absolute $baseUri
*/
public static function resolve(string $uri, ?string $baseUri): string
{
$uri = trim($uri);
// absolute URL?
if (null !== parse_url($uri, \PHP_URL_SCHEME)) {
return $uri;
}
if (null === $baseUri) {
throw new \InvalidArgumentException('The URI is relative, so you must define its base URI passing an absolute URL.');
}
// empty URI
if (!$uri) {
return $baseUri;
}
// an anchor
if ('#' === $uri[0]) {
return self::cleanupAnchor($baseUri).$uri;
}
$baseUriCleaned = self::cleanupUri($baseUri);
if ('?' === $uri[0]) {
return $baseUriCleaned.$uri;
}
// absolute URL with relative schema
if (str_starts_with($uri, '//')) {
return preg_replace('#^([^/]*)//.*$#', '$1', $baseUriCleaned).$uri;
}
$baseUriCleaned = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUriCleaned);
// absolute path
if ('/' === $uri[0]) {
return $baseUriCleaned.$uri;
}
// relative path
$path = parse_url(substr($baseUri, \strlen($baseUriCleaned)), \PHP_URL_PATH) ?? '';
$path = self::canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
return $baseUriCleaned.('' === $path || '/' !== $path[0] ? '/' : '').$path;
}
/**
* Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
*/
private static function canonicalizePath(string $path): string
{
if ('' === $path || '/' === $path) {
return $path;
}
if (str_ends_with($path, '.')) {
$path .= '/';
}
$output = [];
foreach (explode('/', $path) as $segment) {
if ('..' === $segment) {
array_pop($output);
} elseif ('.' !== $segment) {
$output[] = $segment;
}
}
return implode('/', $output);
}
/**
* Removes the query string and the anchor from the given uri.
*/
private static function cleanupUri(string $uri): string
{
return self::cleanupQuery(self::cleanupAnchor($uri));
}
/**
* Removes the query string from the uri.
*/
private static function cleanupQuery(string $uri): string
{
if (false !== $pos = strpos($uri, '?')) {
return substr($uri, 0, $pos);
}
return $uri;
}
/**
* Removes the anchor from the uri.
*/
private static function cleanupAnchor(string $uri): string
{
if (false !== $pos = strpos($uri, '#')) {
return substr($uri, 0, $pos);
}
return $uri;
}
}