webadmin.php

/var/www/html_uk/wp-content/plugins/wordpress-importer/php-toolkit/DataLiberation/URL/functions.php

<?php

namespace WordPress\DataLiberation\URL;

use Rowbot\URL\URL;
use WordPress\DataLiberation\BlockMarkup\BlockMarkupUrlProcessor;


/**
 * Migrate URLs in post content. See WPRewriteUrlsTests for
 * specific examples. TODO: A better description.
 *
 * Example:
 *
 * ```php
 * php > wp_rewrite_urls([
 *   'block_markup' => '<!-- wp:image {"src": "http://legacy-blog.com/image.jpg"} -->',
 *   'url-mapping' => [
 *     'http://legacy-blog.com' => 'https://modern-webstore.org'
 *   ]
 * ])
 * <!-- wp:image {"src":"https:\/\/modern-webstore.org\/image.jpg"} -->
 * ```
 *
 * @TODO Use a proper JSON parser and encoder to:
 * * Support UTF-16 characters
 * * Gracefully handle recoverable encoding issues
 * * Avoid changing the whitespace in the same manner as
 *   we do in WP_HTML_Tag_Processor. e.g. if we start with:
 *
 * ```html
 * <!-- wp:block {"url":"https://w.org"}` -->
 *                     ^ no space here
 * ```
 *
 * then it would be nice to re-encode that block markup also without the space character. This is similar
 * to how the tag processor avoids changing parts of the tag it doesn't need to change.
 */
function wp_rewrite_urls( $options ) {
    if ( empty( $options['base_url'] ) ) {
        // Use first from-url as base_url if not specified.
        $from_urls           = array_keys( $options['url-mapping'] );
        $options['base_url'] = $from_urls[0];
    }

    $url_mapping = array();
    foreach ( $options['url-mapping'] as $from_url_string => $to_url_string ) {
        $url_mapping[] = array(
            'from_url' => WPURL::parse( $from_url_string ),
            'to_url'   => WPURL::parse( $to_url_string ),
        );
    }

    $p = new BlockMarkupUrlProcessor( $options['block_markup'], $options['base_url'] );
    while ( $p->next_url() ) {
        $parsed_url = $p->get_parsed_url();
        foreach ( $url_mapping as $mapping ) {
            if ( is_child_url_of( $parsed_url, $mapping['from_url'] ) ) {
                $p->replace_base_url( $mapping['to_url'] );
                break;
            }
        }
    }

    return $p->get_updated_html();
}

/**
 * Check if a given URL matches the current site URL.
 *
 * @param  URL    $child  The URL to check.
 * @param  string $parent_url  The current site URL to compare against.
 *
 * @return bool Whether the URL matches the current site URL.
 */
function is_child_url_of( $child, $parent_url ) {
    $parent_url                       = is_string( $parent_url ) ? WPURL::parse( $parent_url ) : $parent_url;
    $child                            = is_string( $child ) ? WPURL::parse( $child ) : $child;
    $child_pathname_no_trailing_slash = rtrim( urldecode( $child->pathname ), '/' );

    if ( false === $child || false === $parent_url ) {
        return false;
    }

    if ( $parent_url->hostname !== $child->hostname ) {
        return false;
    }

    if ( $parent_url->protocol !== $child->protocol ) {
        return false;
    }

    $parent_pathname = urldecode( $parent_url->pathname );

    return (
        // Direct match.
        $parent_pathname === $child_pathname_no_trailing_slash ||
        $parent_pathname === $child_pathname_no_trailing_slash . '/' ||
        // Path prefix.
        0 === strncmp( $child_pathname_no_trailing_slash . '/', $parent_pathname, strlen( $parent_pathname ) )
    );
}

/**
 * Decodes the first n **encoded bytes** a URL-encoded string.
 *
 * For example, `urldecode_n( '%22is 6 %3C 6?%22 – asked Achilles', 1 )` returns
 * '"is 6 %3C 6?%22 – asked Achilles' because only the first encoded byte is decoded.
 *
 * @param  string $input  The string to decode.
 * @param  int    $decode_n  The number of bytes to decode in $input
 *
 * @return string The decoded string.
 */
function urldecode_n( $input, $decode_n ) {
    // Fast paths: nothing to do.
    if ( $decode_n <= 0 || false === strpos( $input, '%' ) ) {
            return $input;
    }

    $result = '';
    $at     = 0;
    while ( true ) {
        if ( $at + 3 > strlen( $input ) ) {
            break;
        }

        $last_at = $at;
        $at     += strcspn( $input, '%', $at );
        // Consume bytes except for the percent sign.
        $result .= substr( $input, $last_at, $at - $last_at );

        // If we've already decoded the requested number of bytes, stop.
        if ( strlen( $result ) >= $decode_n ) {
            break;
        }

        ++$at;
        if ( $at > strlen( $input ) ) {
            break;
        }

        $decodable_length = strspn(
            $input,
            '0123456789ABCDEFabcdef',
            $at,
            2
        );

        if ( 2 === $decodable_length ) {
            // Decodes the urlencoded hex sequence from URL.
            // Note: This decodes bytes, not characters. It will recover the original byte sequence,
            // not necessarily any valid UTF-8 characters.
            $result .= chr( hexdec( $input[ $at ] . $input[ $at + 1 ] ) );
            $at     += 2;
        } else {
            // Consume the next byte and move on.
            $result .= '%';
        }
    }
    $result .= substr( $input, $at );

    return $result;
}