= "\x80" && $c < "\xc0") { // Legal tail bytes are nice. $sequence .= $c; } else { if ($len == 0) { // Premature end of string! // Drop a replacement character into output to // represent the invalid UTF-8 sequence. $result .= $unknown; break 2; } else { // Illegal tail byte; abandon the sequence. $result .= $unknown; // Back up and reprocess this byte; it may itself // be a legal ASCII or UTF-8 sequence head. --$i; ++$len; continue 2; } } } while(--$remaining); $n = ord($head); if ($n <= 0xdf) { $ord = ($n-192)*64 + (ord($sequence{1})-128); } else if ($n <= 0xef) { $ord = ($n-224)*4096 + (ord($sequence{1})-128)*64 + (ord($sequence{2})-128); } else if ($n <= 0xf7) { $ord = ($n-240)*262144 + (ord($sequence{1})-128)*4096 + (ord($sequence{2})-128)*64 + (ord($sequence{3})-128); } else if ($n <= 0xfb) { $ord = ($n-248)*16777216 + (ord($sequence{1})-128)*262144 + (ord($sequence{2})-128)*4096 + (ord($sequence{3})-128)*64 + (ord($sequence{4})-128); } else if ($n <= 0xfd) { $ord = ($n-252)*1073741824 + (ord($sequence{1})-128)*16777216 + (ord($sequence{2})-128)*262144 + (ord($sequence{3})-128)*4096 + (ord($sequence{4})-128)*64 + (ord($sequence{5})-128); } $result .= _transliteration_replace($ord, $unknown, $langcode); $head = ''; } elseif ($c < "\x80") { // ASCII byte. $result .= $c; $head = ''; } elseif ($c < "\xc0") { // Illegal tail bytes. if ($head == '') { $result .= $unknown; } } else { // Miscellaneous freaks. $result .= $unknown; $head = ''; } } } return $result; } /** * Lookup and replace a character from the transliteration database. * * @param integer $ord * A unicode ordinal character code. * @param string $unknown * Replacement for unknown characters. * @param string $langcode * Optional ISO 639 language code used to import language specific * replacements. Defaults to the current display language. * @return string * Plain ASCII replacement character. * @see transliteration_get() */ function _transliteration_replace($ord, $unknown = '?', $langcode = NULL) { if (!isset($langcode)) { global $language; $langcode = $language->language; } static $map = array(), $template = array(); $bank = $ord >> 8; // Check if we need to load a new bank if (!isset($template[$bank])) { $file = drupal_get_path('module', 'transliteration') .'/data/'. sprintf('x%02x', $bank) .'.php'; if (file_exists($file)) { $template[$bank] = include($file); } else { $template[$bank] = array('en' => array()); } } // Check if we need to create new mappings with language specific alterations if (!isset($map[$bank][$langcode])) { if ($langcode != 'en' && isset($template[$bank][$langcode])) { // Merge language specific mappings with the default transliteration table $map[$bank][$langcode] = $template[$bank][$langcode] + $template[$bank]['en']; } else { $map[$bank][$langcode] = $template[$bank]['en']; } } $ord = $ord & 255; return isset($map[$bank][$langcode][$ord]) ? $map[$bank][$langcode][$ord] : $unknown; }