1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
|
<?php
namespace Automattic\WooCommerce\Internal\Utilities;
/** * Provides an easy method of assessing URLs, including filepaths (which will be silently * converted to a file:// URL if provided). */ class URL { /** * Components of the URL being assessed. * * The keys match those potentially returned by the parse_url() function, except * that they are always defined and 'drive' (Windows drive letter) has been added. * * @var string|null[] */ private $components = array( 'drive' => null, 'fragment' => null, 'host' => null, 'pass' => null, 'path' => null, 'port' => null, 'query' => null, 'scheme' => null, 'user' => null, );
/** * If the URL (or filepath) is absolute. * * @var bool */ private $is_absolute;
/** * If the URL (or filepath) represents a directory other than the root directory. * * This is useful at different points in the process, when deciding whether to re-apply * a trailing slash at the end of processing or when we need to calculate how many * directory traversals are needed to form a (grand-)parent URL. * * @var bool */ private $is_non_root_directory;
/** * The components of the URL's path. * * For instance, in the case of "file:///srv/www/wp.site" (noting that a file URL has * no host component) this would contain: * * [ "srv", "www", "wp.site" ] * * In the case of a non-file URL such as "https://example.com/foo/bar/baz" (noting the * host is not part of the path) it would contain: * * [ "foo", "bar", "baz" ] * * @var array */ private $path_parts = array();
/** * The URL. * * @var string */ private $url;
/** * Creates and processes the provided URL (or filepath). * * @throws URLException If the URL (or filepath) is seriously malformed. * * @param string $url The URL (or filepath). */ public function __construct( string $url ) { $this->url = $url; $this->preprocess(); $this->process_path(); }
/** * Makes all slashes forward slashes, converts filepaths to file:// URLs, and * other processing to help with comprehension of filepaths. * * @throws URLException If the URL is seriously malformed. */ private function preprocess() { // For consistency, all slashes should be forward slashes. $this->url = str_replace( '\\', '/', $this->url );
// Windows: capture the drive letter if provided. if ( preg_match( '#^(file://)?([a-z]):/(?!/).*#i', $this->url, $matches ) ) { $this->components['drive'] = $matches[2]; }
/* * If there is no scheme, assume and prepend "file://". An exception is made for cases where the URL simply * starts with exactly two forward slashes, which indicates 'any scheme' (most commonly, that is used when * there is freedom to switch between 'http' and 'https'). */ if ( ! preg_match( '#^[a-z]+://#i', $this->url ) && ! preg_match( '#^//(?!/)#', $this->url ) ) { $this->url = 'file://' . $this->url; }
$parsed_components = wp_parse_url( $this->url );
// If we received a really badly formed URL, let's go no further. if ( false === $parsed_components ) { throw new URLException( sprintf( /* translators: %s is the URL. */ __( '%s is not a valid URL.', 'woocommerce' ), $this->url ) ); }
$this->components = array_merge( $this->components, $parsed_components );
// File URLs cannot have a host. However, the initial path segment *or* the Windows drive letter // (if present) may be incorrectly be interpreted as the host name. if ( 'file' === $this->components['scheme'] && ! empty( $this->components['host'] ) ) { // If we do not have a drive letter, then simply merge the host and the path together. if ( null === $this->components['drive'] ) { $this->components['path'] = $this->components['host'] . ( $this->components['path'] ?? '' ); }
// Restore the host to null in this situation. $this->components['host'] = null; } }
/** * Simplifies the path if possible, by resolving directory traversals to the extent possible * without touching the filesystem. */ private function process_path() { $segments = explode( '/', $this->components['path'] ); $this->is_absolute = substr( $this->components['path'], 0, 1 ) === '/' || ! empty( $this->components['host'] ); $this->is_non_root_directory = substr( $this->components['path'], -1, 1 ) === '/' && strlen( $this->components['path'] ) > 1; $resolve_traversals = 'file' !== $this->components['scheme'] || $this->is_absolute; $retain_traversals = false;
// Clean the path. foreach ( $segments as $part ) { // Drop empty segments. if ( strlen( $part ) === 0 || '.' === $part ) { continue; }
// Directory traversals created with percent-encoding syntax should also be detected. $is_traversal = str_ireplace( '%2e', '.', $part ) === '..';
// Resolve directory traversals (if allowed: see further comment relating to this). if ( $resolve_traversals && $is_traversal ) { if ( count( $this->path_parts ) > 0 && ! $retain_traversals ) { $this->path_parts = array_slice( $this->path_parts, 0, count( $this->path_parts ) - 1 ); continue; } elseif ( $this->is_absolute ) { continue; } }
/* * Consider allowing directory traversals to be resolved (ie, the process that converts 'foo/bar/../baz' to * 'foo/baz'). * * 1. For this decision point, we are only concerned with relative filepaths (in all other cases, * $resolve_traversals will already be true). * 2. This is a 'one time' and unidirectional operation. We only wish to flip from false to true, and we * never wish to do this more than once. * 3. We only flip the switch after we have examined all leading '..' traversal segments. */ if ( false === $resolve_traversals && '..' !== $part && 'file' === $this->components['scheme'] && ! $this->is_absolute ) { $resolve_traversals = true; }
/* * Set a flag indicating that traversals should be retained. This is done to ensure we don't prematurely * discard traversals at the start of the path. */ $retain_traversals = $resolve_traversals && '..' === $part;
// Retain this part of the path. $this->path_parts[] = $part; }
// Protect against empty relative paths. if ( count( $this->path_parts ) === 0 && ! $this->is_absolute ) { $this->path_parts = array( '.' ); $this->is_non_root_directory = true; }
// Reform the path from the processed segments, appending a leading slash if it is absolute and restoring // the Windows drive letter if we have one. $this->components['path'] = ( $this->is_absolute ? '/' : '' ) . implode( '/', $this->path_parts ) . ( $this->is_non_root_directory ? '/' : '' ); }
/** * Returns the processed URL as a string. * * @return string */ public function __toString(): string { return $this->get_url(); }
/** * Returns all possible parent URLs for the current URL. * * @return string[] */ public function get_all_parent_urls(): array { $max_parent = count( $this->path_parts ); $parents = array();
/* * If we are looking at a relative path that begins with at least one traversal (example: "../../foo") * then we should only return one parent URL (otherwise, we'd potentially have to return an infinite * number of parent URLs since we can't know how far the tree extends). */ if ( $max_parent > 0 && ! $this->is_absolute && '..' === $this->path_parts[0] ) { $max_parent = 1; }
for ( $level = 1; $level <= $max_parent; $level++ ) { $parents[] = $this->get_parent_url( $level ); }
return $parents; }
/** * Outputs the parent URL. * * For example, if $this->get_url() returns "https://example.com/foo/bar/baz" then * this method will return "https://example.com/foo/bar/". * * When a grand-parent is needed, the optional $level parameter can be used. By default * this is set to 1 (parent). 2 will yield the grand-parent, 3 will yield the great * grand-parent, etc. * * If a level is specified that exceeds the number of path segments, this method will * return false. * * @param int $level Used to indicate the level of parent. * * @return string|false */ public function get_parent_url( int $level = 1 ) { if ( $level < 1 ) { $level = 1; }
$parts_count = count( $this->path_parts ); $parent_path_parts_to_keep = $parts_count - $level;
/* * With the exception of file URLs, we do not allow obtaining (grand-)parent directories that require * us to describe them using directory traversals. For example, given "http://hostname/foo/bar/baz.png" we do * not permit determining anything more than 2 levels up (we cannot go beyond "http://hostname/"). */ if ( 'file' !== $this->components['scheme'] && $parent_path_parts_to_keep < 0 ) { return false; }
// In the specific case of an absolute filepath describing the root directory, there can be no parent. if ( 'file' === $this->components['scheme'] && $this->is_absolute && empty( $this->path_parts ) ) { return false; }
// Handle cases where the path starts with one or more 'dot segments'. Since the path has already been // processed, we can be confident that any such segments are at the start of the path. if ( $parts_count > 0 && ( '.' === $this->path_parts[0] || '..' === $this->path_parts[0] ) ) { // Determine the index of the last dot segment (ex: given the path '/../../foo' it would be 1). $single_dots = array_keys( $this->path_parts, '.', true ); $double_dots = array_keys( $this->path_parts, '..', true ); $max_dot_index = max( array_merge( $single_dots, $double_dots ) );
// Prepend the required number of traversals and discard unnecessary trailing segments. $last_traversal = $max_dot_index + ( $this->is_non_root_directory ? 1 : 0 ); $parent_path = str_repeat( '../', $level ) . join( '/', array_slice( $this->path_parts, 0, $last_traversal ) ); } elseif ( $parent_path_parts_to_keep < 0 ) { // For relative filepaths only, we use traversals to describe the requested parent. $parent_path = untrailingslashit( str_repeat( '../', $parent_path_parts_to_keep * -1 ) ); } else { // Otherwise, in a very simple case, we just remove existing parts. $parent_path = implode( '/', array_slice( $this->path_parts, 0, $parent_path_parts_to_keep ) ); }
if ( $this->is_relative() && '' === $parent_path ) { $parent_path = '.'; }
// Append a trailing slash, since a parent is always a directory. The only exception is the current working directory. $parent_path .= '/';
// For absolute paths, apply a leading slash (does not apply if we have a root path). if ( $this->is_absolute && 0 !== strpos( $parent_path, '/' ) ) { $parent_path = '/' . $parent_path; }
// Form the parent URL (ditching the query and fragment, if set). $parent_url = $this->get_url( array( 'path' => $parent_path, 'query' => null, 'fragment' => null, ) );
// We process the parent URL through a fresh instance of this class, for consistency. return ( new self( $parent_url ) )->get_url(); }
/** * Outputs the processed URL. * * Borrows from https://www.php.net/manual/en/function.parse-url.php#106731 * * @param array $component_overrides If provided, these will override values set in $this->components. * * @return string */ public function get_url( array $component_overrides = array() ): string { $components = array_merge( $this->components, $component_overrides );
$scheme = null !== $components['scheme'] ? $components['scheme'] . '://' : '//'; $host = null !== $components['host'] ? $components['host'] : ''; $port = null !== $components['port'] ? ':' . $components['port'] : ''; $path = $this->get_path( $components['path'] );
// Special handling for hostless URLs (typically, filepaths) referencing the current working directory. if ( '' === $host && ( '' === $path || '.' === $path ) ) { $path = './'; }
$user = null !== $components['user'] ? $components['user'] : ''; $pass = null !== $components['pass'] ? ':' . $components['pass'] : ''; $user_pass = ( ! empty( $user ) || ! empty( $pass ) ) ? $user . $pass . '@' : '';
$query = null !== $components['query'] ? '?' . $components['query'] : ''; $fragment = null !== $components['fragment'] ? '#' . $components['fragment'] : '';
return $scheme . $user_pass . $host . $port . $path . $query . $fragment; }
/** * Outputs the path. Especially useful if it was a a regular filepath that was passed in originally. * * @param string|null $path_override If provided this will be used as the URL path. Does not impact drive letter. * * @return string */ public function get_path( ?string $path_override = null ): string { return ( $this->components['drive'] ? $this->components['drive'] . ':' : '' ) . ( $path_override ?? $this->components['path'] ); }
/** * Indicates if the URL or filepath was absolute. * * @return bool True if absolute, else false. */ public function is_absolute(): bool { return $this->is_absolute; }
/** * Indicates if the URL or filepath was relative. * * @return bool True if relative, else false. */ public function is_relative(): bool { return ! $this->is_absolute; } }
|