diff options
Diffstat (limited to 'vendor/symfony/polyfill-intl-idn/Idn.php')
-rw-r--r-- | vendor/symfony/polyfill-intl-idn/Idn.php | 1848 |
1 files changed, 923 insertions, 925 deletions
diff --git a/vendor/symfony/polyfill-intl-idn/Idn.php b/vendor/symfony/polyfill-intl-idn/Idn.php index fee3026..3cbb65a 100644 --- a/vendor/symfony/polyfill-intl-idn/Idn.php +++ b/vendor/symfony/polyfill-intl-idn/Idn.php @@ -1,925 +1,923 @@ -<?php - -/* - * This file is part of the Symfony package. - * - * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me> - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ - -namespace Symfony\Polyfill\Intl\Idn; - -use Exception; -use Normalizer; -use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges; -use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex; - -/** - * @see https://www.unicode.org/reports/tr46/ - * - * @internal - */ -final class Idn -{ - public const ERROR_EMPTY_LABEL = 1; - public const ERROR_LABEL_TOO_LONG = 2; - public const ERROR_DOMAIN_NAME_TOO_LONG = 4; - public const ERROR_LEADING_HYPHEN = 8; - public const ERROR_TRAILING_HYPHEN = 0x10; - public const ERROR_HYPHEN_3_4 = 0x20; - public const ERROR_LEADING_COMBINING_MARK = 0x40; - public const ERROR_DISALLOWED = 0x80; - public const ERROR_PUNYCODE = 0x100; - public const ERROR_LABEL_HAS_DOT = 0x200; - public const ERROR_INVALID_ACE_LABEL = 0x400; - public const ERROR_BIDI = 0x800; - public const ERROR_CONTEXTJ = 0x1000; - public const ERROR_CONTEXTO_PUNCTUATION = 0x2000; - public const ERROR_CONTEXTO_DIGITS = 0x4000; - - public const INTL_IDNA_VARIANT_2003 = 0; - public const INTL_IDNA_VARIANT_UTS46 = 1; - - public const IDNA_DEFAULT = 0; - public const IDNA_ALLOW_UNASSIGNED = 1; - public const IDNA_USE_STD3_RULES = 2; - public const IDNA_CHECK_BIDI = 4; - public const IDNA_CHECK_CONTEXTJ = 8; - public const IDNA_NONTRANSITIONAL_TO_ASCII = 16; - public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32; - - public const MAX_DOMAIN_SIZE = 253; - public const MAX_LABEL_SIZE = 63; - - public const BASE = 36; - public const TMIN = 1; - public const TMAX = 26; - public const SKEW = 38; - public const DAMP = 700; - public const INITIAL_BIAS = 72; - public const INITIAL_N = 128; - public const DELIMITER = '-'; - public const MAX_INT = 2147483647; - - /** - * Contains the numeric value of a basic code point (for use in representing integers) in the - * range 0 to BASE-1, or -1 if b is does not represent a value. - * - * @var array<int, int> - */ - private static $basicToDigit = [ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - ]; - - /** - * @var array<int, int> - */ - private static $virama; - - /** - * @var array<int, string> - */ - private static $mapped; - - /** - * @var array<int, bool> - */ - private static $ignored; - - /** - * @var array<int, string> - */ - private static $deviation; - - /** - * @var array<int, bool> - */ - private static $disallowed; - - /** - * @var array<int, string> - */ - private static $disallowed_STD3_mapped; - - /** - * @var array<int, bool> - */ - private static $disallowed_STD3_valid; - - /** - * @var bool - */ - private static $mappingTableLoaded = false; - - /** - * @see https://www.unicode.org/reports/tr46/#ToASCII - * - * @param string $domainName - * @param int $options - * @param int $variant - * @param array $idna_info - * - * @return string|false - */ - public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = []) - { - if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) { - @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED); - } - - $options = [ - 'CheckHyphens' => true, - 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI), - 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ), - 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES), - 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII), - 'VerifyDnsLength' => true, - ]; - $info = new Info(); - $labels = self::process((string) $domainName, $options, $info); - - foreach ($labels as $i => $label) { - // Only convert labels to punycode that contain non-ASCII code points - if (1 === preg_match('/[^\x00-\x7F]/', $label)) { - try { - $label = 'xn--'.self::punycodeEncode($label); - } catch (Exception $e) { - $info->errors |= self::ERROR_PUNYCODE; - } - - $labels[$i] = $label; - } - } - - if ($options['VerifyDnsLength']) { - self::validateDomainAndLabelLength($labels, $info); - } - - $idna_info = [ - 'result' => implode('.', $labels), - 'isTransitionalDifferent' => $info->transitionalDifferent, - 'errors' => $info->errors, - ]; - - return 0 === $info->errors ? $idna_info['result'] : false; - } - - /** - * @see https://www.unicode.org/reports/tr46/#ToUnicode - * - * @param string $domainName - * @param int $options - * @param int $variant - * @param array $idna_info - * - * @return string|false - */ - public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = []) - { - if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) { - @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED); - } - - $info = new Info(); - $labels = self::process((string) $domainName, [ - 'CheckHyphens' => true, - 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI), - 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ), - 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES), - 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE), - ], $info); - $idna_info = [ - 'result' => implode('.', $labels), - 'isTransitionalDifferent' => $info->transitionalDifferent, - 'errors' => $info->errors, - ]; - - return 0 === $info->errors ? $idna_info['result'] : false; - } - - /** - * @param string $label - * - * @return bool - */ - private static function isValidContextJ(array $codePoints, $label) - { - if (!isset(self::$virama)) { - self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php'; - } - - $offset = 0; - - foreach ($codePoints as $i => $codePoint) { - if (0x200C !== $codePoint && 0x200D !== $codePoint) { - continue; - } - - if (!isset($codePoints[$i - 1])) { - return false; - } - - // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; - if (isset(self::$virama[$codePoints[$i - 1]])) { - continue; - } - - // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then - // True; - // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}] - if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) { - $offset += \strlen($matches[1][0]); - - continue; - } - - return false; - } - - return true; - } - - /** - * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap - * - * @param string $input - * @param array<string, bool> $options - * - * @return string - */ - private static function mapCodePoints($input, array $options, Info $info) - { - $str = ''; - $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules']; - $transitional = $options['Transitional_Processing']; - - foreach (self::utf8Decode($input) as $codePoint) { - $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules); - - switch ($data['status']) { - case 'disallowed': - $info->errors |= self::ERROR_DISALLOWED; - - // no break. - - case 'valid': - $str .= mb_chr($codePoint, 'utf-8'); - - break; - - case 'ignored': - // Do nothing. - break; - - case 'mapped': - $str .= $data['mapping']; - - break; - - case 'deviation': - $info->transitionalDifferent = true; - $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8')); - - break; - } - } - - return $str; - } - - /** - * @see https://www.unicode.org/reports/tr46/#Processing - * - * @param string $domain - * @param array<string, bool> $options - * - * @return array<int, string> - */ - private static function process($domain, array $options, Info $info) - { - // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and - // we need to respect the VerifyDnsLength option. - $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength']; - - if ($checkForEmptyLabels && '' === $domain) { - $info->errors |= self::ERROR_EMPTY_LABEL; - - return [$domain]; - } - - // Step 1. Map each code point in the domain name string - $domain = self::mapCodePoints($domain, $options, $info); - - // Step 2. Normalize the domain name string to Unicode Normalization Form C. - if (!Normalizer::isNormalized($domain, Normalizer::FORM_C)) { - $domain = Normalizer::normalize($domain, Normalizer::FORM_C); - } - - // Step 3. Break the string into labels at U+002E (.) FULL STOP. - $labels = explode('.', $domain); - $lastLabelIndex = \count($labels) - 1; - - // Step 4. Convert and validate each label in the domain name string. - foreach ($labels as $i => $label) { - $validationOptions = $options; - - if ('xn--' === substr($label, 0, 4)) { - try { - $label = self::punycodeDecode(substr($label, 4)); - } catch (Exception $e) { - $info->errors |= self::ERROR_PUNYCODE; - - continue; - } - - $validationOptions['Transitional_Processing'] = false; - $labels[$i] = $label; - } - - self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex); - } - - if ($info->bidiDomain && !$info->validBidiDomain) { - $info->errors |= self::ERROR_BIDI; - } - - // Any input domain name string that does not record an error has been successfully - // processed according to this specification. Conversely, if an input domain_name string - // causes an error, then the processing of the input domain_name string fails. Determining - // what to do with error input is up to the caller, and not in the scope of this document. - return $labels; - } - - /** - * @see https://tools.ietf.org/html/rfc5893#section-2 - * - * @param string $label - */ - private static function validateBidiLabel($label, Info $info) - { - if (1 === preg_match(Regex::RTL_LABEL, $label)) { - $info->bidiDomain = true; - - // Step 1. The first character must be a character with Bidi property L, R, or AL. - // If it has the R or AL property, it is an RTL label - if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) { - $info->validBidiDomain = false; - - return; - } - - // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, - // CS, ET, ON, BN, or NSM are allowed. - if (1 === preg_match(Regex::BIDI_STEP_2, $label)) { - $info->validBidiDomain = false; - - return; - } - - // Step 3. In an RTL label, the end of the label must be a character with Bidi property - // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM. - if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) { - $info->validBidiDomain = false; - - return; - } - - // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa. - if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) { - $info->validBidiDomain = false; - - return; - } - - return; - } - - // We are a LTR label - // Step 1. The first character must be a character with Bidi property L, R, or AL. - // If it has the L property, it is an LTR label. - if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) { - $info->validBidiDomain = false; - - return; - } - - // Step 5. In an LTR label, only characters with the Bidi properties L, EN, - // ES, CS, ET, ON, BN, or NSM are allowed. - if (1 === preg_match(Regex::BIDI_STEP_5, $label)) { - $info->validBidiDomain = false; - - return; - } - - // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or - // EN, followed by zero or more characters with Bidi property NSM. - if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) { - $info->validBidiDomain = false; - - return; - } - } - - /** - * @param array<int, string> $labels - */ - private static function validateDomainAndLabelLength(array $labels, Info $info) - { - $maxDomainSize = self::MAX_DOMAIN_SIZE; - $length = \count($labels); - - // Number of "." delimiters. - $domainLength = $length - 1; - - // If the last label is empty and it is not the first label, then it is the root label. - // Increase the max size by 1, making it 254, to account for the root label's "." - // delimiter. This also means we don't need to check the last label's length for being too - // long. - if ($length > 1 && '' === $labels[$length - 1]) { - ++$maxDomainSize; - --$length; - } - - for ($i = 0; $i < $length; ++$i) { - $bytes = \strlen($labels[$i]); - $domainLength += $bytes; - - if ($bytes > self::MAX_LABEL_SIZE) { - $info->errors |= self::ERROR_LABEL_TOO_LONG; - } - } - - if ($domainLength > $maxDomainSize) { - $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG; - } - } - - /** - * @see https://www.unicode.org/reports/tr46/#Validity_Criteria - * - * @param string $label - * @param array<string, bool> $options - * @param bool $canBeEmpty - */ - private static function validateLabel($label, Info $info, array $options, $canBeEmpty) - { - if ('' === $label) { - if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) { - $info->errors |= self::ERROR_EMPTY_LABEL; - } - - return; - } - - // Step 1. The label must be in Unicode Normalization Form C. - if (!Normalizer::isNormalized($label, Normalizer::FORM_C)) { - $info->errors |= self::ERROR_INVALID_ACE_LABEL; - } - - $codePoints = self::utf8Decode($label); - - if ($options['CheckHyphens']) { - // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character - // in both the thrid and fourth positions. - if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) { - $info->errors |= self::ERROR_HYPHEN_3_4; - } - - // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D - // HYPHEN-MINUS character. - if ('-' === substr($label, 0, 1)) { - $info->errors |= self::ERROR_LEADING_HYPHEN; - } - - if ('-' === substr($label, -1, 1)) { - $info->errors |= self::ERROR_TRAILING_HYPHEN; - } - } - - // Step 4. The label must not contain a U+002E (.) FULL STOP. - if (false !== strpos($label, '.')) { - $info->errors |= self::ERROR_LABEL_HAS_DOT; - } - - // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark. - if (1 === preg_match(Regex::COMBINING_MARK, $label)) { - $info->errors |= self::ERROR_LEADING_COMBINING_MARK; - } - - // Step 6. Each code point in the label must only have certain status values according to - // Section 5, IDNA Mapping Table: - $transitional = $options['Transitional_Processing']; - $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules']; - - foreach ($codePoints as $codePoint) { - $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules); - $status = $data['status']; - - if ('valid' === $status || (!$transitional && 'deviation' === $status)) { - continue; - } - - $info->errors |= self::ERROR_DISALLOWED; - - break; - } - - // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in - // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA) - // [IDNA2008]. - if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) { - $info->errors |= self::ERROR_CONTEXTJ; - } - - // Step 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must - // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2. - if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) { - self::validateBidiLabel($label, $info); - } - } - - /** - * @see https://tools.ietf.org/html/rfc3492#section-6.2 - * - * @param string $input - * - * @return string - */ - private static function punycodeDecode($input) - { - $n = self::INITIAL_N; - $out = 0; - $i = 0; - $bias = self::INITIAL_BIAS; - $lastDelimIndex = strrpos($input, self::DELIMITER); - $b = false === $lastDelimIndex ? 0 : $lastDelimIndex; - $inputLength = \strlen($input); - $output = []; - $bytes = array_map('ord', str_split($input)); - - for ($j = 0; $j < $b; ++$j) { - if ($bytes[$j] > 0x7F) { - throw new Exception('Invalid input'); - } - - $output[$out++] = $input[$j]; - } - - if ($b > 0) { - ++$b; - } - - for ($in = $b; $in < $inputLength; ++$out) { - $oldi = $i; - $w = 1; - - for ($k = self::BASE; /* no condition */; $k += self::BASE) { - if ($in >= $inputLength) { - throw new Exception('Invalid input'); - } - - $digit = self::$basicToDigit[$bytes[$in++] & 0xFF]; - - if ($digit < 0) { - throw new Exception('Invalid input'); - } - - if ($digit > intdiv(self::MAX_INT - $i, $w)) { - throw new Exception('Integer overflow'); - } - - $i += $digit * $w; - - if ($k <= $bias) { - $t = self::TMIN; - } elseif ($k >= $bias + self::TMAX) { - $t = self::TMAX; - } else { - $t = $k - $bias; - } - - if ($digit < $t) { - break; - } - - $baseMinusT = self::BASE - $t; - - if ($w > intdiv(self::MAX_INT, $baseMinusT)) { - throw new Exception('Integer overflow'); - } - - $w *= $baseMinusT; - } - - $outPlusOne = $out + 1; - $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi); - - if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) { - throw new Exception('Integer overflow'); - } - - $n += intdiv($i, $outPlusOne); - $i %= $outPlusOne; - array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]); - } - - return implode('', $output); - } - - /** - * @see https://tools.ietf.org/html/rfc3492#section-6.3 - * - * @param string $input - * - * @return string - */ - private static function punycodeEncode($input) - { - $n = self::INITIAL_N; - $delta = 0; - $out = 0; - $bias = self::INITIAL_BIAS; - $inputLength = 0; - $output = ''; - $iter = self::utf8Decode($input); - - foreach ($iter as $codePoint) { - ++$inputLength; - - if ($codePoint < 0x80) { - $output .= \chr($codePoint); - ++$out; - } - } - - $h = $out; - $b = $out; - - if ($b > 0) { - $output .= self::DELIMITER; - ++$out; - } - - while ($h < $inputLength) { - $m = self::MAX_INT; - - foreach ($iter as $codePoint) { - if ($codePoint >= $n && $codePoint < $m) { - $m = $codePoint; - } - } - - if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) { - throw new Exception('Integer overflow'); - } - - $delta += ($m - $n) * ($h + 1); - $n = $m; - - foreach ($iter as $codePoint) { - if ($codePoint < $n && 0 === ++$delta) { - throw new Exception('Integer overflow'); - } - - if ($codePoint === $n) { - $q = $delta; - - for ($k = self::BASE; /* no condition */; $k += self::BASE) { - if ($k <= $bias) { - $t = self::TMIN; - } elseif ($k >= $bias + self::TMAX) { - $t = self::TMAX; - } else { - $t = $k - $bias; - } - - if ($q < $t) { - break; - } - - $qMinusT = $q - $t; - $baseMinusT = self::BASE - $t; - $output .= self::encodeDigit($t + ($qMinusT) % ($baseMinusT), false); - ++$out; - $q = intdiv($qMinusT, $baseMinusT); - } - - $output .= self::encodeDigit($q, false); - ++$out; - $bias = self::adaptBias($delta, $h + 1, $h === $b); - $delta = 0; - ++$h; - } - } - - ++$delta; - ++$n; - } - - return $output; - } - - /** - * @see https://tools.ietf.org/html/rfc3492#section-6.1 - * - * @param int $delta - * @param int $numPoints - * @param bool $firstTime - * - * @return int - */ - private static function adaptBias($delta, $numPoints, $firstTime) - { - // xxx >> 1 is a faster way of doing intdiv(xxx, 2) - $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1; - $delta += intdiv($delta, $numPoints); - $k = 0; - - while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) { - $delta = intdiv($delta, self::BASE - self::TMIN); - $k += self::BASE; - } - - return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW); - } - - /** - * @param int $d - * @param bool $flag - * - * @return string - */ - private static function encodeDigit($d, $flag) - { - return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5)); - } - - /** - * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any - * invalid byte sequences will be replaced by a U+FFFD replacement code point. - * - * @see https://encoding.spec.whatwg.org/#utf-8-decoder - * - * @param string $input - * - * @return array<int, int> - */ - private static function utf8Decode($input) - { - $bytesSeen = 0; - $bytesNeeded = 0; - $lowerBoundary = 0x80; - $upperBoundary = 0xBF; - $codePoint = 0; - $codePoints = []; - $length = \strlen($input); - - for ($i = 0; $i < $length; ++$i) { - $byte = \ord($input[$i]); - - if (0 === $bytesNeeded) { - if ($byte >= 0x00 && $byte <= 0x7F) { - $codePoints[] = $byte; - - continue; - } - - if ($byte >= 0xC2 && $byte <= 0xDF) { - $bytesNeeded = 1; - $codePoint = $byte & 0x1F; - } elseif ($byte >= 0xE0 && $byte <= 0xEF) { - if (0xE0 === $byte) { - $lowerBoundary = 0xA0; - } elseif (0xED === $byte) { - $upperBoundary = 0x9F; - } - - $bytesNeeded = 2; - $codePoint = $byte & 0xF; - } elseif ($byte >= 0xF0 && $byte <= 0xF4) { - if (0xF0 === $byte) { - $lowerBoundary = 0x90; - } elseif (0xF4 === $byte) { - $upperBoundary = 0x8F; - } - - $bytesNeeded = 3; - $codePoint = $byte & 0x7; - } else { - $codePoints[] = 0xFFFD; - } - - continue; - } - - if ($byte < $lowerBoundary || $byte > $upperBoundary) { - $codePoint = 0; - $bytesNeeded = 0; - $bytesSeen = 0; - $lowerBoundary = 0x80; - $upperBoundary = 0xBF; - --$i; - $codePoints[] = 0xFFFD; - - continue; - } - - $lowerBoundary = 0x80; - $upperBoundary = 0xBF; - $codePoint = ($codePoint << 6) | ($byte & 0x3F); - - if (++$bytesSeen !== $bytesNeeded) { - continue; - } - - $codePoints[] = $codePoint; - $codePoint = 0; - $bytesNeeded = 0; - $bytesSeen = 0; - } - - // String unexpectedly ended, so append a U+FFFD code point. - if (0 !== $bytesNeeded) { - $codePoints[] = 0xFFFD; - } - - return $codePoints; - } - - /** - * @param int $codePoint - * @param bool $useSTD3ASCIIRules - * - * @return array{status: string, mapping?: string} - */ - private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules) - { - if (!self::$mappingTableLoaded) { - self::$mappingTableLoaded = true; - self::$mapped = require __DIR__.'/Resources/unidata/mapped.php'; - self::$ignored = require __DIR__.'/Resources/unidata/ignored.php'; - self::$deviation = require __DIR__.'/Resources/unidata/deviation.php'; - self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php'; - self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php'; - self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php'; - } - - if (isset(self::$mapped[$codePoint])) { - return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]]; - } - - if (isset(self::$ignored[$codePoint])) { - return ['status' => 'ignored']; - } - - if (isset(self::$deviation[$codePoint])) { - return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]]; - } - - if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) { - return ['status' => 'disallowed']; - } - - $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]); - - if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) { - $status = 'disallowed'; - - if (!$useSTD3ASCIIRules) { - $status = $isDisallowedMapped ? 'mapped' : 'valid'; - } - - if ($isDisallowedMapped) { - return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]]; - } - - return ['status' => $status]; - } - - return ['status' => 'valid']; - } -} +<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com> and Trevor Rowbotham <trevor.rowbotham@pm.me>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace Symfony\Polyfill\Intl\Idn;
+
+use Symfony\Polyfill\Intl\Idn\Resources\unidata\DisallowedRanges;
+use Symfony\Polyfill\Intl\Idn\Resources\unidata\Regex;
+
+/**
+ * @see https://www.unicode.org/reports/tr46/
+ *
+ * @internal
+ */
+final class Idn
+{
+ public const ERROR_EMPTY_LABEL = 1;
+ public const ERROR_LABEL_TOO_LONG = 2;
+ public const ERROR_DOMAIN_NAME_TOO_LONG = 4;
+ public const ERROR_LEADING_HYPHEN = 8;
+ public const ERROR_TRAILING_HYPHEN = 0x10;
+ public const ERROR_HYPHEN_3_4 = 0x20;
+ public const ERROR_LEADING_COMBINING_MARK = 0x40;
+ public const ERROR_DISALLOWED = 0x80;
+ public const ERROR_PUNYCODE = 0x100;
+ public const ERROR_LABEL_HAS_DOT = 0x200;
+ public const ERROR_INVALID_ACE_LABEL = 0x400;
+ public const ERROR_BIDI = 0x800;
+ public const ERROR_CONTEXTJ = 0x1000;
+ public const ERROR_CONTEXTO_PUNCTUATION = 0x2000;
+ public const ERROR_CONTEXTO_DIGITS = 0x4000;
+
+ public const INTL_IDNA_VARIANT_2003 = 0;
+ public const INTL_IDNA_VARIANT_UTS46 = 1;
+
+ public const IDNA_DEFAULT = 0;
+ public const IDNA_ALLOW_UNASSIGNED = 1;
+ public const IDNA_USE_STD3_RULES = 2;
+ public const IDNA_CHECK_BIDI = 4;
+ public const IDNA_CHECK_CONTEXTJ = 8;
+ public const IDNA_NONTRANSITIONAL_TO_ASCII = 16;
+ public const IDNA_NONTRANSITIONAL_TO_UNICODE = 32;
+
+ public const MAX_DOMAIN_SIZE = 253;
+ public const MAX_LABEL_SIZE = 63;
+
+ public const BASE = 36;
+ public const TMIN = 1;
+ public const TMAX = 26;
+ public const SKEW = 38;
+ public const DAMP = 700;
+ public const INITIAL_BIAS = 72;
+ public const INITIAL_N = 128;
+ public const DELIMITER = '-';
+ public const MAX_INT = 2147483647;
+
+ /**
+ * Contains the numeric value of a basic code point (for use in representing integers) in the
+ * range 0 to BASE-1, or -1 if b is does not represent a value.
+ *
+ * @var array<int, int>
+ */
+ private static $basicToDigit = [
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
+
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ ];
+
+ /**
+ * @var array<int, int>
+ */
+ private static $virama;
+
+ /**
+ * @var array<int, string>
+ */
+ private static $mapped;
+
+ /**
+ * @var array<int, bool>
+ */
+ private static $ignored;
+
+ /**
+ * @var array<int, string>
+ */
+ private static $deviation;
+
+ /**
+ * @var array<int, bool>
+ */
+ private static $disallowed;
+
+ /**
+ * @var array<int, string>
+ */
+ private static $disallowed_STD3_mapped;
+
+ /**
+ * @var array<int, bool>
+ */
+ private static $disallowed_STD3_valid;
+
+ /**
+ * @var bool
+ */
+ private static $mappingTableLoaded = false;
+
+ /**
+ * @see https://www.unicode.org/reports/tr46/#ToASCII
+ *
+ * @param string $domainName
+ * @param int $options
+ * @param int $variant
+ * @param array $idna_info
+ *
+ * @return string|false
+ */
+ public static function idn_to_ascii($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
+ {
+ if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
+ @trigger_error('idn_to_ascii(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
+ }
+
+ $options = [
+ 'CheckHyphens' => true,
+ 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
+ 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
+ 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
+ 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_ASCII),
+ 'VerifyDnsLength' => true,
+ ];
+ $info = new Info();
+ $labels = self::process((string) $domainName, $options, $info);
+
+ foreach ($labels as $i => $label) {
+ // Only convert labels to punycode that contain non-ASCII code points
+ if (1 === preg_match('/[^\x00-\x7F]/', $label)) {
+ try {
+ $label = 'xn--'.self::punycodeEncode($label);
+ } catch (\Exception $e) {
+ $info->errors |= self::ERROR_PUNYCODE;
+ }
+
+ $labels[$i] = $label;
+ }
+ }
+
+ if ($options['VerifyDnsLength']) {
+ self::validateDomainAndLabelLength($labels, $info);
+ }
+
+ $idna_info = [
+ 'result' => implode('.', $labels),
+ 'isTransitionalDifferent' => $info->transitionalDifferent,
+ 'errors' => $info->errors,
+ ];
+
+ return 0 === $info->errors ? $idna_info['result'] : false;
+ }
+
+ /**
+ * @see https://www.unicode.org/reports/tr46/#ToUnicode
+ *
+ * @param string $domainName
+ * @param int $options
+ * @param int $variant
+ * @param array $idna_info
+ *
+ * @return string|false
+ */
+ public static function idn_to_utf8($domainName, $options = self::IDNA_DEFAULT, $variant = self::INTL_IDNA_VARIANT_UTS46, &$idna_info = [])
+ {
+ if (\PHP_VERSION_ID >= 70200 && self::INTL_IDNA_VARIANT_2003 === $variant) {
+ @trigger_error('idn_to_utf8(): INTL_IDNA_VARIANT_2003 is deprecated', \E_USER_DEPRECATED);
+ }
+
+ $info = new Info();
+ $labels = self::process((string) $domainName, [
+ 'CheckHyphens' => true,
+ 'CheckBidi' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 !== ($options & self::IDNA_CHECK_BIDI),
+ 'CheckJoiners' => self::INTL_IDNA_VARIANT_UTS46 === $variant && 0 !== ($options & self::IDNA_CHECK_CONTEXTJ),
+ 'UseSTD3ASCIIRules' => 0 !== ($options & self::IDNA_USE_STD3_RULES),
+ 'Transitional_Processing' => self::INTL_IDNA_VARIANT_2003 === $variant || 0 === ($options & self::IDNA_NONTRANSITIONAL_TO_UNICODE),
+ ], $info);
+ $idna_info = [
+ 'result' => implode('.', $labels),
+ 'isTransitionalDifferent' => $info->transitionalDifferent,
+ 'errors' => $info->errors,
+ ];
+
+ return 0 === $info->errors ? $idna_info['result'] : false;
+ }
+
+ /**
+ * @param string $label
+ *
+ * @return bool
+ */
+ private static function isValidContextJ(array $codePoints, $label)
+ {
+ if (!isset(self::$virama)) {
+ self::$virama = require __DIR__.\DIRECTORY_SEPARATOR.'Resources'.\DIRECTORY_SEPARATOR.'unidata'.\DIRECTORY_SEPARATOR.'virama.php';
+ }
+
+ $offset = 0;
+
+ foreach ($codePoints as $i => $codePoint) {
+ if (0x200C !== $codePoint && 0x200D !== $codePoint) {
+ continue;
+ }
+
+ if (!isset($codePoints[$i - 1])) {
+ return false;
+ }
+
+ // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
+ if (isset(self::$virama[$codePoints[$i - 1]])) {
+ continue;
+ }
+
+ // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) Then
+ // True;
+ // Generated RegExp = ([Joining_Type:{L,D}][Joining_Type:T]*\u200C[Joining_Type:T]*)[Joining_Type:{R,D}]
+ if (0x200C === $codePoint && 1 === preg_match(Regex::ZWNJ, $label, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
+ $offset += \strlen($matches[1][0]);
+
+ continue;
+ }
+
+ return false;
+ }
+
+ return true;
+ }
+
+ /**
+ * @see https://www.unicode.org/reports/tr46/#ProcessingStepMap
+ *
+ * @param string $input
+ * @param array<string, bool> $options
+ *
+ * @return string
+ */
+ private static function mapCodePoints($input, array $options, Info $info)
+ {
+ $str = '';
+ $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
+ $transitional = $options['Transitional_Processing'];
+
+ foreach (self::utf8Decode($input) as $codePoint) {
+ $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
+
+ switch ($data['status']) {
+ case 'disallowed':
+ $info->errors |= self::ERROR_DISALLOWED;
+
+ // no break.
+
+ case 'valid':
+ $str .= mb_chr($codePoint, 'utf-8');
+
+ break;
+
+ case 'ignored':
+ // Do nothing.
+ break;
+
+ case 'mapped':
+ $str .= $data['mapping'];
+
+ break;
+
+ case 'deviation':
+ $info->transitionalDifferent = true;
+ $str .= ($transitional ? $data['mapping'] : mb_chr($codePoint, 'utf-8'));
+
+ break;
+ }
+ }
+
+ return $str;
+ }
+
+ /**
+ * @see https://www.unicode.org/reports/tr46/#Processing
+ *
+ * @param string $domain
+ * @param array<string, bool> $options
+ *
+ * @return array<int, string>
+ */
+ private static function process($domain, array $options, Info $info)
+ {
+ // If VerifyDnsLength is not set, we are doing ToUnicode otherwise we are doing ToASCII and
+ // we need to respect the VerifyDnsLength option.
+ $checkForEmptyLabels = !isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'];
+
+ if ($checkForEmptyLabels && '' === $domain) {
+ $info->errors |= self::ERROR_EMPTY_LABEL;
+
+ return [$domain];
+ }
+
+ // Step 1. Map each code point in the domain name string
+ $domain = self::mapCodePoints($domain, $options, $info);
+
+ // Step 2. Normalize the domain name string to Unicode Normalization Form C.
+ if (!\Normalizer::isNormalized($domain, \Normalizer::FORM_C)) {
+ $domain = \Normalizer::normalize($domain, \Normalizer::FORM_C);
+ }
+
+ // Step 3. Break the string into labels at U+002E (.) FULL STOP.
+ $labels = explode('.', $domain);
+ $lastLabelIndex = \count($labels) - 1;
+
+ // Step 4. Convert and validate each label in the domain name string.
+ foreach ($labels as $i => $label) {
+ $validationOptions = $options;
+
+ if ('xn--' === substr($label, 0, 4)) {
+ try {
+ $label = self::punycodeDecode(substr($label, 4));
+ } catch (\Exception $e) {
+ $info->errors |= self::ERROR_PUNYCODE;
+
+ continue;
+ }
+
+ $validationOptions['Transitional_Processing'] = false;
+ $labels[$i] = $label;
+ }
+
+ self::validateLabel($label, $info, $validationOptions, $i > 0 && $i === $lastLabelIndex);
+ }
+
+ if ($info->bidiDomain && !$info->validBidiDomain) {
+ $info->errors |= self::ERROR_BIDI;
+ }
+
+ // Any input domain name string that does not record an error has been successfully
+ // processed according to this specification. Conversely, if an input domain_name string
+ // causes an error, then the processing of the input domain_name string fails. Determining
+ // what to do with error input is up to the caller, and not in the scope of this document.
+ return $labels;
+ }
+
+ /**
+ * @see https://tools.ietf.org/html/rfc5893#section-2
+ *
+ * @param string $label
+ */
+ private static function validateBidiLabel($label, Info $info)
+ {
+ if (1 === preg_match(Regex::RTL_LABEL, $label)) {
+ $info->bidiDomain = true;
+
+ // Step 1. The first character must be a character with Bidi property L, R, or AL.
+ // If it has the R or AL property, it is an RTL label
+ if (1 !== preg_match(Regex::BIDI_STEP_1_RTL, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES,
+ // CS, ET, ON, BN, or NSM are allowed.
+ if (1 === preg_match(Regex::BIDI_STEP_2, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ // Step 3. In an RTL label, the end of the label must be a character with Bidi property
+ // R, AL, EN, or AN, followed by zero or more characters with Bidi property NSM.
+ if (1 !== preg_match(Regex::BIDI_STEP_3, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
+ if (1 === preg_match(Regex::BIDI_STEP_4_AN, $label) && 1 === preg_match(Regex::BIDI_STEP_4_EN, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ return;
+ }
+
+ // We are a LTR label
+ // Step 1. The first character must be a character with Bidi property L, R, or AL.
+ // If it has the L property, it is an LTR label.
+ if (1 !== preg_match(Regex::BIDI_STEP_1_LTR, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ // Step 5. In an LTR label, only characters with the Bidi properties L, EN,
+ // ES, CS, ET, ON, BN, or NSM are allowed.
+ if (1 === preg_match(Regex::BIDI_STEP_5, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+
+ // Step 6.In an LTR label, the end of the label must be a character with Bidi property L or
+ // EN, followed by zero or more characters with Bidi property NSM.
+ if (1 !== preg_match(Regex::BIDI_STEP_6, $label)) {
+ $info->validBidiDomain = false;
+
+ return;
+ }
+ }
+
+ /**
+ * @param array<int, string> $labels
+ */
+ private static function validateDomainAndLabelLength(array $labels, Info $info)
+ {
+ $maxDomainSize = self::MAX_DOMAIN_SIZE;
+ $length = \count($labels);
+
+ // Number of "." delimiters.
+ $domainLength = $length - 1;
+
+ // If the last label is empty and it is not the first label, then it is the root label.
+ // Increase the max size by 1, making it 254, to account for the root label's "."
+ // delimiter. This also means we don't need to check the last label's length for being too
+ // long.
+ if ($length > 1 && '' === $labels[$length - 1]) {
+ ++$maxDomainSize;
+ --$length;
+ }
+
+ for ($i = 0; $i < $length; ++$i) {
+ $bytes = \strlen($labels[$i]);
+ $domainLength += $bytes;
+
+ if ($bytes > self::MAX_LABEL_SIZE) {
+ $info->errors |= self::ERROR_LABEL_TOO_LONG;
+ }
+ }
+
+ if ($domainLength > $maxDomainSize) {
+ $info->errors |= self::ERROR_DOMAIN_NAME_TOO_LONG;
+ }
+ }
+
+ /**
+ * @see https://www.unicode.org/reports/tr46/#Validity_Criteria
+ *
+ * @param string $label
+ * @param array<string, bool> $options
+ * @param bool $canBeEmpty
+ */
+ private static function validateLabel($label, Info $info, array $options, $canBeEmpty)
+ {
+ if ('' === $label) {
+ if (!$canBeEmpty && (!isset($options['VerifyDnsLength']) || $options['VerifyDnsLength'])) {
+ $info->errors |= self::ERROR_EMPTY_LABEL;
+ }
+
+ return;
+ }
+
+ // Step 1. The label must be in Unicode Normalization Form C.
+ if (!\Normalizer::isNormalized($label, \Normalizer::FORM_C)) {
+ $info->errors |= self::ERROR_INVALID_ACE_LABEL;
+ }
+
+ $codePoints = self::utf8Decode($label);
+
+ if ($options['CheckHyphens']) {
+ // Step 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
+ // in both the thrid and fourth positions.
+ if (isset($codePoints[2], $codePoints[3]) && 0x002D === $codePoints[2] && 0x002D === $codePoints[3]) {
+ $info->errors |= self::ERROR_HYPHEN_3_4;
+ }
+
+ // Step 3. If CheckHyphens, the label must neither begin nor end with a U+002D
+ // HYPHEN-MINUS character.
+ if ('-' === substr($label, 0, 1)) {
+ $info->errors |= self::ERROR_LEADING_HYPHEN;
+ }
+
+ if ('-' === substr($label, -1, 1)) {
+ $info->errors |= self::ERROR_TRAILING_HYPHEN;
+ }
+ }
+
+ // Step 4. The label must not contain a U+002E (.) FULL STOP.
+ if (false !== strpos($label, '.')) {
+ $info->errors |= self::ERROR_LABEL_HAS_DOT;
+ }
+
+ // Step 5. The label must not begin with a combining mark, that is: General_Category=Mark.
+ if (1 === preg_match(Regex::COMBINING_MARK, $label)) {
+ $info->errors |= self::ERROR_LEADING_COMBINING_MARK;
+ }
+
+ // Step 6. Each code point in the label must only have certain status values according to
+ // Section 5, IDNA Mapping Table:
+ $transitional = $options['Transitional_Processing'];
+ $useSTD3ASCIIRules = $options['UseSTD3ASCIIRules'];
+
+ foreach ($codePoints as $codePoint) {
+ $data = self::lookupCodePointStatus($codePoint, $useSTD3ASCIIRules);
+ $status = $data['status'];
+
+ if ('valid' === $status || (!$transitional && 'deviation' === $status)) {
+ continue;
+ }
+
+ $info->errors |= self::ERROR_DISALLOWED;
+
+ break;
+ }
+
+ // Step 7. If CheckJoiners, the label must satisify the ContextJ rules from Appendix A, in
+ // The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
+ // [IDNA2008].
+ if ($options['CheckJoiners'] && !self::isValidContextJ($codePoints, $label)) {
+ $info->errors |= self::ERROR_CONTEXTJ;
+ }
+
+ // Step 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must
+ // satisfy all six of the numbered conditions in [IDNA2008] RFC 5893, Section 2.
+ if ($options['CheckBidi'] && (!$info->bidiDomain || $info->validBidiDomain)) {
+ self::validateBidiLabel($label, $info);
+ }
+ }
+
+ /**
+ * @see https://tools.ietf.org/html/rfc3492#section-6.2
+ *
+ * @param string $input
+ *
+ * @return string
+ */
+ private static function punycodeDecode($input)
+ {
+ $n = self::INITIAL_N;
+ $out = 0;
+ $i = 0;
+ $bias = self::INITIAL_BIAS;
+ $lastDelimIndex = strrpos($input, self::DELIMITER);
+ $b = false === $lastDelimIndex ? 0 : $lastDelimIndex;
+ $inputLength = \strlen($input);
+ $output = [];
+ $bytes = array_map('ord', str_split($input));
+
+ for ($j = 0; $j < $b; ++$j) {
+ if ($bytes[$j] > 0x7F) {
+ throw new \Exception('Invalid input');
+ }
+
+ $output[$out++] = $input[$j];
+ }
+
+ if ($b > 0) {
+ ++$b;
+ }
+
+ for ($in = $b; $in < $inputLength; ++$out) {
+ $oldi = $i;
+ $w = 1;
+
+ for ($k = self::BASE; /* no condition */; $k += self::BASE) {
+ if ($in >= $inputLength) {
+ throw new \Exception('Invalid input');
+ }
+
+ $digit = self::$basicToDigit[$bytes[$in++] & 0xFF];
+
+ if ($digit < 0) {
+ throw new \Exception('Invalid input');
+ }
+
+ if ($digit > intdiv(self::MAX_INT - $i, $w)) {
+ throw new \Exception('Integer overflow');
+ }
+
+ $i += $digit * $w;
+
+ if ($k <= $bias) {
+ $t = self::TMIN;
+ } elseif ($k >= $bias + self::TMAX) {
+ $t = self::TMAX;
+ } else {
+ $t = $k - $bias;
+ }
+
+ if ($digit < $t) {
+ break;
+ }
+
+ $baseMinusT = self::BASE - $t;
+
+ if ($w > intdiv(self::MAX_INT, $baseMinusT)) {
+ throw new \Exception('Integer overflow');
+ }
+
+ $w *= $baseMinusT;
+ }
+
+ $outPlusOne = $out + 1;
+ $bias = self::adaptBias($i - $oldi, $outPlusOne, 0 === $oldi);
+
+ if (intdiv($i, $outPlusOne) > self::MAX_INT - $n) {
+ throw new \Exception('Integer overflow');
+ }
+
+ $n += intdiv($i, $outPlusOne);
+ $i %= $outPlusOne;
+ array_splice($output, $i++, 0, [mb_chr($n, 'utf-8')]);
+ }
+
+ return implode('', $output);
+ }
+
+ /**
+ * @see https://tools.ietf.org/html/rfc3492#section-6.3
+ *
+ * @param string $input
+ *
+ * @return string
+ */
+ private static function punycodeEncode($input)
+ {
+ $n = self::INITIAL_N;
+ $delta = 0;
+ $out = 0;
+ $bias = self::INITIAL_BIAS;
+ $inputLength = 0;
+ $output = '';
+ $iter = self::utf8Decode($input);
+
+ foreach ($iter as $codePoint) {
+ ++$inputLength;
+
+ if ($codePoint < 0x80) {
+ $output .= \chr($codePoint);
+ ++$out;
+ }
+ }
+
+ $h = $out;
+ $b = $out;
+
+ if ($b > 0) {
+ $output .= self::DELIMITER;
+ ++$out;
+ }
+
+ while ($h < $inputLength) {
+ $m = self::MAX_INT;
+
+ foreach ($iter as $codePoint) {
+ if ($codePoint >= $n && $codePoint < $m) {
+ $m = $codePoint;
+ }
+ }
+
+ if ($m - $n > intdiv(self::MAX_INT - $delta, $h + 1)) {
+ throw new \Exception('Integer overflow');
+ }
+
+ $delta += ($m - $n) * ($h + 1);
+ $n = $m;
+
+ foreach ($iter as $codePoint) {
+ if ($codePoint < $n && 0 === ++$delta) {
+ throw new \Exception('Integer overflow');
+ }
+
+ if ($codePoint === $n) {
+ $q = $delta;
+
+ for ($k = self::BASE; /* no condition */; $k += self::BASE) {
+ if ($k <= $bias) {
+ $t = self::TMIN;
+ } elseif ($k >= $bias + self::TMAX) {
+ $t = self::TMAX;
+ } else {
+ $t = $k - $bias;
+ }
+
+ if ($q < $t) {
+ break;
+ }
+
+ $qMinusT = $q - $t;
+ $baseMinusT = self::BASE - $t;
+ $output .= self::encodeDigit($t + $qMinusT % $baseMinusT, false);
+ ++$out;
+ $q = intdiv($qMinusT, $baseMinusT);
+ }
+
+ $output .= self::encodeDigit($q, false);
+ ++$out;
+ $bias = self::adaptBias($delta, $h + 1, $h === $b);
+ $delta = 0;
+ ++$h;
+ }
+ }
+
+ ++$delta;
+ ++$n;
+ }
+
+ return $output;
+ }
+
+ /**
+ * @see https://tools.ietf.org/html/rfc3492#section-6.1
+ *
+ * @param int $delta
+ * @param int $numPoints
+ * @param bool $firstTime
+ *
+ * @return int
+ */
+ private static function adaptBias($delta, $numPoints, $firstTime)
+ {
+ // xxx >> 1 is a faster way of doing intdiv(xxx, 2)
+ $delta = $firstTime ? intdiv($delta, self::DAMP) : $delta >> 1;
+ $delta += intdiv($delta, $numPoints);
+ $k = 0;
+
+ while ($delta > ((self::BASE - self::TMIN) * self::TMAX) >> 1) {
+ $delta = intdiv($delta, self::BASE - self::TMIN);
+ $k += self::BASE;
+ }
+
+ return $k + intdiv((self::BASE - self::TMIN + 1) * $delta, $delta + self::SKEW);
+ }
+
+ /**
+ * @param int $d
+ * @param bool $flag
+ *
+ * @return string
+ */
+ private static function encodeDigit($d, $flag)
+ {
+ return \chr($d + 22 + 75 * ($d < 26 ? 1 : 0) - (($flag ? 1 : 0) << 5));
+ }
+
+ /**
+ * Takes a UTF-8 encoded string and converts it into a series of integer code points. Any
+ * invalid byte sequences will be replaced by a U+FFFD replacement code point.
+ *
+ * @see https://encoding.spec.whatwg.org/#utf-8-decoder
+ *
+ * @param string $input
+ *
+ * @return array<int, int>
+ */
+ private static function utf8Decode($input)
+ {
+ $bytesSeen = 0;
+ $bytesNeeded = 0;
+ $lowerBoundary = 0x80;
+ $upperBoundary = 0xBF;
+ $codePoint = 0;
+ $codePoints = [];
+ $length = \strlen($input);
+
+ for ($i = 0; $i < $length; ++$i) {
+ $byte = \ord($input[$i]);
+
+ if (0 === $bytesNeeded) {
+ if ($byte >= 0x00 && $byte <= 0x7F) {
+ $codePoints[] = $byte;
+
+ continue;
+ }
+
+ if ($byte >= 0xC2 && $byte <= 0xDF) {
+ $bytesNeeded = 1;
+ $codePoint = $byte & 0x1F;
+ } elseif ($byte >= 0xE0 && $byte <= 0xEF) {
+ if (0xE0 === $byte) {
+ $lowerBoundary = 0xA0;
+ } elseif (0xED === $byte) {
+ $upperBoundary = 0x9F;
+ }
+
+ $bytesNeeded = 2;
+ $codePoint = $byte & 0xF;
+ } elseif ($byte >= 0xF0 && $byte <= 0xF4) {
+ if (0xF0 === $byte) {
+ $lowerBoundary = 0x90;
+ } elseif (0xF4 === $byte) {
+ $upperBoundary = 0x8F;
+ }
+
+ $bytesNeeded = 3;
+ $codePoint = $byte & 0x7;
+ } else {
+ $codePoints[] = 0xFFFD;
+ }
+
+ continue;
+ }
+
+ if ($byte < $lowerBoundary || $byte > $upperBoundary) {
+ $codePoint = 0;
+ $bytesNeeded = 0;
+ $bytesSeen = 0;
+ $lowerBoundary = 0x80;
+ $upperBoundary = 0xBF;
+ --$i;
+ $codePoints[] = 0xFFFD;
+
+ continue;
+ }
+
+ $lowerBoundary = 0x80;
+ $upperBoundary = 0xBF;
+ $codePoint = ($codePoint << 6) | ($byte & 0x3F);
+
+ if (++$bytesSeen !== $bytesNeeded) {
+ continue;
+ }
+
+ $codePoints[] = $codePoint;
+ $codePoint = 0;
+ $bytesNeeded = 0;
+ $bytesSeen = 0;
+ }
+
+ // String unexpectedly ended, so append a U+FFFD code point.
+ if (0 !== $bytesNeeded) {
+ $codePoints[] = 0xFFFD;
+ }
+
+ return $codePoints;
+ }
+
+ /**
+ * @param int $codePoint
+ * @param bool $useSTD3ASCIIRules
+ *
+ * @return array{status: string, mapping?: string}
+ */
+ private static function lookupCodePointStatus($codePoint, $useSTD3ASCIIRules)
+ {
+ if (!self::$mappingTableLoaded) {
+ self::$mappingTableLoaded = true;
+ self::$mapped = require __DIR__.'/Resources/unidata/mapped.php';
+ self::$ignored = require __DIR__.'/Resources/unidata/ignored.php';
+ self::$deviation = require __DIR__.'/Resources/unidata/deviation.php';
+ self::$disallowed = require __DIR__.'/Resources/unidata/disallowed.php';
+ self::$disallowed_STD3_mapped = require __DIR__.'/Resources/unidata/disallowed_STD3_mapped.php';
+ self::$disallowed_STD3_valid = require __DIR__.'/Resources/unidata/disallowed_STD3_valid.php';
+ }
+
+ if (isset(self::$mapped[$codePoint])) {
+ return ['status' => 'mapped', 'mapping' => self::$mapped[$codePoint]];
+ }
+
+ if (isset(self::$ignored[$codePoint])) {
+ return ['status' => 'ignored'];
+ }
+
+ if (isset(self::$deviation[$codePoint])) {
+ return ['status' => 'deviation', 'mapping' => self::$deviation[$codePoint]];
+ }
+
+ if (isset(self::$disallowed[$codePoint]) || DisallowedRanges::inRange($codePoint)) {
+ return ['status' => 'disallowed'];
+ }
+
+ $isDisallowedMapped = isset(self::$disallowed_STD3_mapped[$codePoint]);
+
+ if ($isDisallowedMapped || isset(self::$disallowed_STD3_valid[$codePoint])) {
+ $status = 'disallowed';
+
+ if (!$useSTD3ASCIIRules) {
+ $status = $isDisallowedMapped ? 'mapped' : 'valid';
+ }
+
+ if ($isDisallowedMapped) {
+ return ['status' => $status, 'mapping' => self::$disallowed_STD3_mapped[$codePoint]];
+ }
+
+ return ['status' => $status];
+ }
+
+ return ['status' => 'valid'];
+ }
+}
|