= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) { throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.'); } return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code)); } /** * Starts the $haystack string with the prefix $needle? * @param string * @param string * @return bool */ public static function startsWith($haystack, $needle) { return strncmp($haystack, $needle, strlen($needle)) === 0; } /** * Ends the $haystack string with the suffix $needle? * @param string * @param string * @return bool */ public static function endsWith($haystack, $needle) { return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle; } /** * Does $haystack contain $needle? * @param string * @param string * @return bool */ public static function contains($haystack, $needle) { return strpos($haystack, $needle) !== false; } /** * Returns a part of UTF-8 string. * @param string * @param int in characters (code points) * @param int in characters (code points) * @return string */ public static function substring($s, $start, $length = null) { if (function_exists('mb_substr')) { return mb_substr($s, $start, $length, 'UTF-8'); // MB is much faster } elseif ($length === null) { $length = self::length($s); } elseif ($start < 0 && $length < 0) { $start += self::length($s); // unifies iconv_substr behavior with mb_substr } return iconv_substr($s, $start, $length, 'UTF-8'); } /** * Removes special controls characters and normalizes line endings and spaces. * @param string UTF-8 encoding * @return string */ public static function normalize($s) { $s = self::normalizeNewLines($s); // remove control characters; leave \t + \n $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s); // right trim $s = preg_replace('#[\t ]+$#m', '', $s); // leading and trailing blank lines $s = trim($s, "\n"); return $s; } /** * Standardize line endings to unix-like. * @param string UTF-8 encoding or 8-bit * @return string */ public static function normalizeNewLines($s) { return str_replace(["\r\n", "\r"], "\n", $s); } /** * Converts to ASCII. * @param string UTF-8 encoding * @return string ASCII */ public static function toAscii($s) { static $transliterator = null; if ($transliterator === null && class_exists('Transliterator', false)) { $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII'); } $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s); $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06"); $s = str_replace( ["\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"], ["\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"], $s ); if ($transliterator !== null) { $s = $transliterator->transliterate($s); } if (ICONV_IMPL === 'glibc') { $s = str_replace( ["\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"], ['>>', '<<', '...', 'TM', '(c)', '(R)'], $s ); $s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s); $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e" . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3" . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8" . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe" . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7", 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.'); $s = preg_replace('#[^\x00-\x7F]++#', '', $s); } else { $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); } $s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s); return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?'); } /** * Converts to web safe characters [a-z0-9-] text. * @param string UTF-8 encoding * @param string allowed characters * @param bool * @return string */ public static function webalize($s, $charlist = null, $lower = true) { $s = self::toAscii($s); if ($lower) { $s = strtolower($s); } $s = preg_replace('#[^a-z0-9' . ($charlist !== null ? preg_quote($charlist, '#') : '') . ']+#i', '-', $s); $s = trim($s, '-'); return $s; } /** * Truncates string to maximal length. * @param string UTF-8 encoding * @param int * @param string UTF-8 encoding * @return string */ public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6") { if (self::length($s) > $maxLen) { $maxLen = $maxLen - self::length($append); if ($maxLen < 1) { return $append; } elseif ($matches = self::match($s, '#^.{1,' . $maxLen . '}(?=[\s\x00-/:-@\[-`{-~])#us')) { return $matches[0] . $append; } else { return self::substring($s, 0, $maxLen) . $append; } } return $s; } /** * Indents the content from the left. * @param string UTF-8 encoding or 8-bit * @param int * @param string * @return string */ public static function indent($s, $level = 1, $chars = "\t") { if ($level > 0) { $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level)); } return $s; } /** * Convert to lower case. * @param string UTF-8 encoding * @return string */ public static function lower($s) { return mb_strtolower($s, 'UTF-8'); } /** * Convert first character to lower case. * @param string UTF-8 encoding * @return string */ public static function firstLower($s) { return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1); } /** * Convert to upper case. * @param string UTF-8 encoding * @return string */ public static function upper($s) { return mb_strtoupper($s, 'UTF-8'); } /** * Convert first character to upper case. * @param string UTF-8 encoding * @return string */ public static function firstUpper($s) { return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1); } /** * Capitalize string. * @param string UTF-8 encoding * @return string */ public static function capitalize($s) { return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8'); } /** * Case-insensitive compares UTF-8 strings. * @param string * @param string * @param int * @return bool */ public static function compare($left, $right, $len = null) { if ($len < 0) { $left = self::substring($left, $len, -$len); $right = self::substring($right, $len, -$len); } elseif ($len !== null) { $left = self::substring($left, 0, $len); $right = self::substring($right, 0, $len); } return self::lower($left) === self::lower($right); } /** * Finds the length of common prefix of strings. * @param string|array * @return string */ public static function findPrefix(...$strings) { if (is_array($strings[0])) { $strings = $strings[0]; } $first = array_shift($strings); for ($i = 0; $i < strlen($first); $i++) { foreach ($strings as $s) { if (!isset($s[$i]) || $first[$i] !== $s[$i]) { while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") { $i--; } return substr($first, 0, $i); } } } return $first; } /** * Returns number of characters (not bytes) in UTF-8 string. * That is the number of Unicode code points which may differ from the number of graphemes. * @param string * @return int */ public static function length($s) { return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s)); } /** * Strips whitespace. * @param string UTF-8 encoding * @param string * @return string */ public static function trim($s, $charlist = self::TRIM_CHARACTERS) { $charlist = preg_quote($charlist, '#'); return self::replace($s, '#^[' . $charlist . ']+|[' . $charlist . ']+\z#u', ''); } /** * Pad a string to a certain length with another string. * @param string UTF-8 encoding * @param int * @param string * @return string */ public static function padLeft($s, $length, $pad = ' ') { $length = max(0, $length - self::length($s)); $padLen = self::length($pad); return str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen) . $s; } /** * Pad a string to a certain length with another string. * @param string UTF-8 encoding * @param int * @param string * @return string */ public static function padRight($s, $length, $pad = ' ') { $length = max(0, $length - self::length($s)); $padLen = self::length($pad); return $s . str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen); } /** * Reverse string. * @param string UTF-8 encoding * @return string */ public static function reverse($s) { return iconv('UTF-32LE', 'UTF-8', strrev(iconv('UTF-8', 'UTF-32BE', $s))); } /** * Returns part of $haystack before $nth occurence of $needle. * @param string * @param string * @param int negative value means searching from the end * @return string|false returns false if the needle was not found */ public static function before($haystack, $needle, $nth = 1) { $pos = self::pos($haystack, $needle, $nth); return $pos === false ? false : substr($haystack, 0, $pos); } /** * Returns part of $haystack after $nth occurence of $needle. * @param string * @param string * @param int negative value means searching from the end * @return string|false returns false if the needle was not found */ public static function after($haystack, $needle, $nth = 1) { $pos = self::pos($haystack, $needle, $nth); return $pos === false ? false : (string) substr($haystack, $pos + strlen($needle)); } /** * Returns position of $nth occurence of $needle in $haystack. * @param string * @param string * @param int negative value means searching from the end * @return int|false offset in characters or false if the needle was not found */ public static function indexOf($haystack, $needle, $nth = 1) { $pos = self::pos($haystack, $needle, $nth); return $pos === false ? false : self::length(substr($haystack, 0, $pos)); } /** * Returns position of $nth occurence of $needle in $haystack. * @return int|false offset in bytes or false if the needle was not found */ private static function pos($haystack, $needle, $nth = 1) { if (!$nth) { return false; } elseif ($nth > 0) { if (strlen($needle) === 0) { return 0; } $pos = 0; while (($pos = strpos($haystack, $needle, $pos)) !== false && --$nth) { $pos++; } } else { $len = strlen($haystack); if (strlen($needle) === 0) { return $len; } $pos = $len - 1; while (($pos = strrpos($haystack, $needle, $pos - $len)) !== false && ++$nth) { $pos--; } } return $pos; } /** * Splits string by a regular expression. * @param string * @param string * @param int * @return array */ public static function split($subject, $pattern, $flags = 0) { return self::pcre('preg_split', [$pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE]); } /** * Performs a regular expression match. * @param string * @param string * @param int can be PREG_OFFSET_CAPTURE (returned in bytes) * @param int offset in bytes * @return mixed */ public static function match($subject, $pattern, $flags = 0, $offset = 0) { if ($offset > strlen($subject)) { return null; } return self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset]) ? $m : null; } /** * Performs a global regular expression match. * @param string * @param string * @param int can be PREG_OFFSET_CAPTURE (returned in bytes); PREG_SET_ORDER is default * @param int offset in bytes * @return array */ public static function matchAll($subject, $pattern, $flags = 0, $offset = 0) { if ($offset > strlen($subject)) { return []; } self::pcre('preg_match_all', [ $pattern, $subject, &$m, ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER), $offset, ]); return $m; } /** * Perform a regular expression search and replace. * @param string * @param string|array * @param string|callable * @param int * @return string */ public static function replace($subject, $pattern, $replacement = null, $limit = -1) { if (is_object($replacement) || is_array($replacement)) { if (!is_callable($replacement, false, $textual)) { throw new Nette\InvalidStateException("Callback '$textual' is not callable."); } return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit]); } elseif ($replacement === null && is_array($pattern)) { $replacement = array_values($pattern); $pattern = array_keys($pattern); } return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]); } /** @internal */ public static function pcre($func, $args) { static $messages = [ PREG_INTERNAL_ERROR => 'Internal error', PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted', PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted', PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data', PREG_BAD_UTF8_OFFSET_ERROR => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point', 6 => 'Failed due to limited JIT stack space', // PREG_JIT_STACKLIMIT_ERROR ]; $res = Callback::invokeSafe($func, $args, function ($message) use ($args) { // compile-time error, not detectable by preg_last_error throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0])); }); if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars && ($res === null || !in_array($func, ['preg_filter', 'preg_replace_callback', 'preg_replace'], true)) ) { throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error') . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code); } return $res; } }