- *
* * @param $string * The string to be transformed. * @param $allowed_tags (optional) * If supplied, a list of tags that will be transformed. If omitted, all * all supported tags are transformed. * @return * The transformed string. */ function mimemail_html_to_text($string, $allowed_tags = NULL) { // Cache list of supported tags. static $supported_tags; if (empty($supported_tags)) { $supported_tags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr'); } // Make sure only supported tags are kept. $allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags; // Make sure tags, entities and attributes are well-formed and properly nested. $string = _mimemail_filter_htmlcorrector(filter_xss($string, $allowed_tags)); // Apply inline styles. $string = preg_replace('!?(em|i)>!i', '/', $string); $string = preg_replace('!?(strong|b)>!i', '*', $string); // Replace inline tags with the text of link and a footnote. // 'See the Drupal site' becomes // 'See the Drupal site [1]' with the URL included as a footnote. _mimemail_html_to_mail_urls(NULL, TRUE); $pattern = '@(]+?href="([^"]*)"[^>]*?>(.+?))@i'; $string = preg_replace_callback($pattern, '_mimemail_html_to_mail_urls', $string); $urls = _mimemail_html_to_mail_urls(); $footnotes = ''; if (count($urls)) { $footnotes .= "\n"; for ($i = 0, $max = count($urls); $i < $max; $i++) { $footnotes .= '['. ($i + 1) .'] '. $urls[$i] ."\n"; } } // Split tags from text. $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). $tag = FALSE; // Odd/even counter (tag or no tag) $casing = NULL; // Case conversion function $output = ''; $indent = array(); // All current indentation string chunks $lists = array(); // Array of counters for opened lists foreach ($split as $value) { $chunk = NULL; // Holds a string ready to be formatted and output. // Process HTML tags (but don't output any literally). if ($tag) { list($tagname) = explode(' ', strtolower($value), 2); switch ($tagname) { // List counters case 'ul': array_unshift($lists, '*'); break; case 'ol': array_unshift($lists, 1); break; case '/ul': case '/ol': array_shift($lists); $chunk = ''; // Ensure blank new-line. break; // Quotation/list markers, non-fancy headers case 'blockquote': // Format=flowed indentation cannot be mixed with lists. $indent[] = count($lists) ? ' "' : '>'; break; case 'li': $indent[] = is_numeric($lists[0]) ? ' '. $lists[0]++ .') ' : ' * '; break; case 'dd': $indent[] = ' '; break; case 'h3': $indent[] = '.... '; break; case 'h4': $indent[] = '.. '; break; case '/blockquote': if (count($lists)) { // Append closing quote for inline quotes (immediately). $output = rtrim($output, "> \n") ."\"\n"; $chunk = ''; // Ensure blank new-line. } // Fall-through case '/li': case '/dd': array_pop($indent); break; case '/h3': case '/h4': array_pop($indent); case '/h5': case '/h6': $chunk = ''; // Ensure blank new-line. break; // Fancy headers case 'h1': $indent[] = '======== '; $casing = 'drupal_strtoupper'; break; case 'h2': $indent[] = '-------- '; $casing = 'drupal_strtoupper'; break; case '/h1': case '/h2': $casing = NULL; // Pad the line with dashes. $output = _mimemail_html_to_text_pad($output, ($tagname == '/h1') ? '=' : '-', ' '); array_pop($indent); $chunk = ''; // Ensure blank new-line. break; // Horizontal rulers case 'hr': // Insert immediately. $output .= mimemail_wrap_mail('', implode('', $indent)) ."\n"; $output = _mimemail_html_to_text_pad($output, '-'); break; // Paragraphs and definition lists case '/p': case '/dl': $chunk = ''; // Ensure blank new-line. break; } } // Process blocks of text. else { // Convert inline HTML text to plain text. $value = trim(preg_replace('/\s+/', ' ', decode_entities($value))); if (strlen($value)) { $chunk = $value; } } // See if there is something waiting to be output. if (isset($chunk)) { // Apply any necessary case conversion. if (isset($casing)) { $chunk = $casing($chunk); } // Format it and apply the current indentation. $output .= mimemail_wrap_mail($chunk, implode('', $indent)) ."\n"; // Remove non-quotation markers from indentation. $indent = array_map('_mimemail_html_to_text_clean', $indent); } $tag = !$tag; } return $output . $footnotes; } /** * Helper function for array_walk in mimemail_wrap_mail(). * * Wraps words on a single line. */ function _mimemail_wrap_mail_line(&$line, $key, $values) { // Use soft-breaks only for purely quoted or unindented text. $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? " \n" : "\n"); // Break really long words at the maximum width allowed. $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n"); } /** * Helper function for mimemail_html_to_text(). * * Keeps track of URLs and replaces them with placeholder tokens. */ function _mimemail_html_to_mail_urls($match = NULL, $reset = FALSE) { global $base_url, $base_path; static $urls = array(), $regexp; if ($reset) { // Reset internal URL list. $urls = array(); } else { if (empty($regexp)) { $regexp = '@^'. preg_quote($base_path, '@') .'@'; } if ($match) { list(, , $url, $label) = $match; // Ensure all URLs are absolute. $urls[] = strpos($url, '://') ? $url : preg_replace($regexp, $base_url .'/', $url); return $label .' ['. count($urls) .']'; } } return $urls; } /** * Helper function for mimemail_wrap_mail() and mimemail_html_to_text(). * * Replace all non-quotation markers from a given piece of indentation with spaces. */ function _mimemail_html_to_text_clean($indent) { return preg_replace('/[^>]/', ' ', $indent); } /** * Helper function for mimemail_html_to_text(). * * Pad the last line with the given character. */ function _mimemail_html_to_text_pad($text, $pad, $prefix = '') { // Remove last line break. $text = substr($text, 0, -1); // Calculate needed padding space and add it. if (($p = strrpos($text, "\n")) === FALSE) { $p = -1; } $n = max(0, 79 - (strlen($text) - $p)); // Add prefix and padding, and restore linebreak. return $text . $prefix . str_repeat($pad, $n - strlen($prefix)) ."\n"; } /** * Scan input and make sure that all HTML tags are properly closed and nested. * * Copied from,Drupal 6 filter.module. */ function _mimemail_filter_htmlcorrector($text) { // Prepare tag lists. static $no_nesting, $single_use; if (!isset($no_nesting)) { // Tags which cannot be nested but are typically left unclosed. $no_nesting = drupal_map_assoc(array('li', 'p')); // Single use tags in HTML4 $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame')); } // Properly entify angles. $text = preg_replace('!<([^a-zA-Z/])!', '<\1', $text); // Split tags from text. $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). $tag = false; // Odd/even counter. Tag or no tag. $stack = array(); $output = ''; foreach ($split as $value) { // Process HTML tags. if ($tag) { list($tagname) = explode(' ', strtolower($value), 2); // Closing tag if ($tagname{0} == '/') { $tagname = substr($tagname, 1); // Discard XHTML closing tags for single use tags. if (!isset($single_use[$tagname])) { // See if we possibly have a matching opening tag on the stack. if (in_array($tagname, $stack)) { // Close other tags lingering first. do { $output .= ''. $stack[0] .'>'; } while (array_shift($stack) != $tagname); } // Otherwise, discard it. } } // Opening tag else { // See if we have an identical 'no nesting' tag already open and close it if found. if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) { $output .= ''. array_shift($stack) .'>'; } // Push non-single-use tags onto the stack if (!isset($single_use[$tagname])) { array_unshift($stack, $tagname); } // Add trailing slash to single-use tags as per X(HT)ML. else { $value = rtrim($value, ' /') .' /'; } $output .= '<'. $value .'>'; } } else { // Passthrough all text. $output .= $value; } $tag = !$tag; } // Close remaining tags. while (count($stack) > 0) { $output .= ''. array_shift($stack) .'>'; } return $output; }