= "\x80" && $c < "\xc0") { // Legal tail bytes are nice. $sequence .= $c; } else { if ($len == 0) { // Premature end of string! // Drop a replacement character into output to // represent the invalid UTF-8 sequence. $result .= $unknown; break 2; } else { // Illegal tail byte; abandon the sequence. $result .= $unknown; // Back up and reprocess this byte; it may itself // be a legal ASCII or UTF-8 sequence head. --$i; ++$len; continue 2; } } } while(--$remaining); $n = ord($head); if ($n <= 0xdf) { $ord = ($n-192)*64 + (ord($sequence{1})-128); } else if ($n <= 0xef) { $ord = ($n-224)*4096 + (ord($sequence{1})-128)*64 + (ord($sequence{2})-128); } else if ($n <= 0xf7) { $ord = ($n-240)*262144 + (ord($sequence{1})-128)*4096 + (ord($sequence{2})-128)*64 + (ord($sequence{3})-128); } else if ($n <= 0xfb) { $ord = ($n-248)*16777216 + (ord($sequence{1})-128)*262144 + (ord($sequence{2})-128)*4096 + (ord($sequence{3})-128)*64 + (ord($sequence{4})-128); } else if ($n <= 0xfd) { $ord = ($n-252)*1073741824 + (ord($sequence{1})-128)*16777216 + (ord($sequence{2})-128)*262144 + (ord($sequence{3})-128)*4096 + (ord($sequence{4})-128)*64 + (ord($sequence{5})-128); } $result .= _transliteration_replace($ord, $unknown); $head = ''; } elseif ($c < "\x80") { // ASCII byte. $result .= $c; $head = ''; } elseif ($c < "\xc0") { // Illegal tail bytes. if ($head == '') { $result .= $unknown; } } else { // Miscellaneous freaks. $result .= $unknown; $head = ''; } } } return $result; } /** * Load transliteration database and replace a character. * * @param integer $ord * A unicode ordinal character code. * @param string $unknown * Replacement for unknown characters. * @return string * Plain ASCII replacement character. * @see transliteration_get() */ function _transliteration_replace($ord, $unknown = '?') { global $locale; static $banks = array(); $bank = $ord >> 8; if (!isset($banks[$bank])) { $file = drupal_get_path('module', 'transliteration') .'/data/'. sprintf('x%02x', $bank) .'.php'; if (file_exists($file)) { $charmap = include($file); // Merge locale specific replacements if ($locale && isset($charmap[$locale])) { $banks[$bank] = array_merge($charmap['default'], $charmap[$locale]); } else { $banks[$bank] = $charmap['default']; } } else { $banks[$bank] = array(); } } $ord = $ord & 255; return isset($banks[$bank][$ord]) ? $banks[$bank][$ord] : $unknown; }