'fieldset', '#title' => t('Upload delimited files'), '#collapsible' => TRUE, '#collapsed' => TRUE, 'uploadhelp' => array( '#prefix' => '
', '#value' => t('Uploading a new file imports the contents of the file into a database table and adds it to the list above. Uploading a new copy of a previously-uploaded file replaces the file data - it should be reanalyzed before doing anything further. Note that any existing comments, ignored fields, and excluded rows are preserved when reloading the data.'), '#suffix' => '
', ), 'file' => array( '#type' => 'file', '#title' => t('Upload file'), '#size' => 48, '#description' => t('File containing the data to be imported.'), ), 'file_format' => array( '#type' => 'select', '#title' => t('File format'), '#options' => _tw_delimited_get_file_formats(), '#default_value' => '_tw_tsv_get_row', ), 'delimitedsubmit' => array( '#type' => 'submit', '#value' => t('Import new file'), ), ); return $fieldsets; } /** * Implementation of hook_tw_form_submit * * @param unknown_type $values * @return unknown */ function tw_import_delimited_tw_form_submit_delimited($values) { $file = file_save_upload('delimited'); if ($file) { $files = array(); $dest = _tw_delimited_file_name($file->filename); if ($dest) { file_move($file, $dest, FILE_EXISTS_REPLACE); $files['sourcesize'] = _tw_delimited_getsize($dest); $format = $values['file_format']; } else { drupal_set_message("Could not upload $file->filename"); return NULL; } $formats = _tw_delimited_get_file_formats(); if (!isset($formats[$format])) { form_set_error('file_format', t('You need to select a format from the list.')); return NULL; } $function = $format; $headers = $function($file->filepath, TRUE); $pieces = explode('.', $file->filename); $filename = $pieces[0]; // Lowercase table names make views and flags integration much easier $tablename = strtolower(preg_replace('/[^a-z0-9]/i', '_', $filename)); $files['sourcefile'] = $filename; // Truncate to 32 characters for views integration $tablename = substr($tablename, 0, 32); $files['importtable'] = $tablename; $schema = array( 'description' => t('uploaded data file'), ); $seen = array(); foreach ($headers as $i => $coltitle) { $colname = preg_replace('/[^a-z0-9]/i', '_', $coltitle); if (!isset($seen[$coltitle])) { $seen[$coltitle] = 1; } else { $seen[$coltitle]++; } if ($seen[$coltitle] > 1) { $colname = $coltitle . '_' . $seen[$coltitle]; } $schema['fields'][$colname] = array( 'type' => 'text', 'size' => 'medium', 'not null' => FALSE, 'default' => '', ); } if (db_table_exists($tablename)) { db_query("TRUNCATE TABLE {$tablename}"); $prevexists = TRUE; } else { db_create_table($ret, $tablename, $schema); $prevexists = FALSE; } // @TODO: MySQL-specific // Alternative - load row-by-row (more control, more portable, can pick up warnings for bad // rows and automatically skip null rows etc., but much slower) if ($format == '_tw_delimited_csv_get_row') { $importsql = "LOAD DATA LOCAL INFILE '"._tw_delimited_file_name($file->filename). "' INTO TABLE $tablename FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ". "ESCAPED BY '\"' IGNORE 1 LINES"; } else { $importsql = "LOAD DATA LOCAL INFILE '"._tw_delimited_file_name($file->filename). "' INTO TABLE $tablename FIELDS ESCAPED BY '' LINES TERMINATED BY '\n' IGNORE 1 LINES"; } drupal_set_message($importsql); $result = db_query($importsql); drupal_set_message("result: ".dpr($result, TRUE)); if ($result == 1) { $resultstring = print_r(mysql_info(), TRUE); if ($resultstring) { drupal_set_message("Results of import: $resultstring"); } $files['schema'] = $schema; return array($files); } else { drupal_set_message('Results: '.print_r($result,TRUE)); } } else { drupal_set_message('Failed to upload delimited file'); } } function _tw_delimited_getsize($path){ if (!is_dir($path)) { return filesize($path); } $size = 0; foreach (scandir($path) as $file) { if ($file=='.' or $file=='..') { continue; } $size += filesize($path.'/'.$file); } return $size; } function _tw_delimited_file_name($file) { $dir = str_replace('\\', '/', getcwd()).'/'.file_directory_path() . '/tw_delimited'; if (file_check_directory($dir, TRUE)) { return $dir . '/' . $file; } else { return FALSE; } } /************************************************************************ * Support for different file formats. ************************************************************************/ /** * Return an option list of supported formats. This will always * include 'auto'. */ function _tw_delimited_get_file_formats() { return array( '_tw_delimited_tsv_get_row' => t('Tab Separated Values (TSV) text file'), '_tw_delimited_csv_get_row' => t('Comma Separated Values (CSV) text file'), ); } /** * Get one row from the CSV file and return it as an array of * columns/fields. */ function _tw_delimited_csv_get_row($filepath, $reset = FALSE) { static $handle; $size = variable_get('tw_delimited_csv_size', 10000); $separator = variable_get('tw_delimited_csv_separator', ','); if ($filepath == '') { unset($handle); return FALSE; } if (!isset($handle) || $reset) { if (isset($handle)) { fclose($handle); } $handle = fopen($filepath, 'r'); } // fgetcsv() is rubbish so replace it with our custom csv parser return fgetcsv($handle, $size, $separator); } /** * Get one row from the TSV file and return it as an array of * columns/fields. */ function _tw_delimited_tsv_get_row($filepath, $reset = FALSE) { static $separator; if (!$separator) { $separator = variable_get('tw_delimited_tsv_separator', "\t"); } if ($filepath == '') { return FALSE; } $file = fopen($filepath, 'r'); if (!$file) { return FALSE; } $line = fgets($file); fclose($file); if ($line != '') { return explode($separator, $line); } return FALSE; } /** * fgetcsv() is buggy with special chars at the beginning of fields, * let's create our own fgetcsv(). * * main code taken from http://de.php.net/manual/en/function.fgetcsv.php#75332 * function changed to have a prototype like fgetcvs() and not take * a string parameter * * Taken from http://drupal.org/node/158413 */ function _tw_delimited_fgetcsv($handle, $length, $delimiter = ',', $qualifier = '"', $qualifierEscape = '\\') { $str = fgets($handle, $length); $fields = array(); while (strlen($str) > 0) { if ($str{0} == $delimiter) $str = substr($str, 1); if ($str{0} == $qualifier) { $value = ''; for ($i = 1; $i < strlen($str); $i++) { if (($str{$i} == $qualifier) && ($str{$i-1} != $qualifierEscape)) { $str = substr($str, (strlen($value) + 2)); $value = str_replace(($qualifierEscape.$qualifier), $qualifier, $value); break; } $value .= $str{$i}; } } else { $end = strpos($str, $delimiter); $value = ($end !== false) ? substr($str, 0, $end) : $str; $str = substr($str, strlen($value)); } $fields[] = $value; } return $fields; } /** * Split a line from a CSV file into fields. Should handle various field * delimiters, escape characters and enclosing quotes for fields * * @param string $line Line to split for the CSV * @param string $delimiter Field delimiter (defaults to ',') * @param string $escaper Field escaper (defaults to '\') * @param string $encloser Field encloser (defaults to '"') * * Taken from http://recurser.com/articles/2007/08/09/multibyte-safe-csv-parser/ */ function splitCsvLine($line, $delimiter=',', $encloser='"', $escaper='\\') { $fields = array(); if (is_string($delimiter) && is_string($encloser)) : // Make sure the arguments are regex-safe $reg_safe_delimiter = '\x' . dechex(ord($delimiter)); $reg_safe_encloser = '\x' . dechex(ord($encloser)); $reg_safe_escaper = '\x' . dechex(ord($escaper)); $line = trim($line); // Replace any 'quote-escaped' quotes within fields. This is only really necessary to // handle excel exports, which often escape double quotes with double quotes. if ($encloser == $escaper) : $line = mb_ereg_replace($reg_safe_escaper . $reg_safe_encloser, "__ESCAPED__ENCLOSER__", $line); endif; // Loop over the string and extract each field $field_num = 0; while(mb_strlen($line) > 0) : if(mb_substr($line, 0, 1) == $encloser) : // If this string starts with an encloser, look for the next (non-escaped) encloser preg_match('/^' . $reg_safe_encloser . '((?:[^' . $reg_safe_encloser . ']|(?<='.$reg_safe_escaper.')' . $reg_safe_encloser . ')*)' . $reg_safe_encloser . $reg_safe_delimiter . '?(.*)$/', $line, $matches); $value = mb_ereg_replace($reg_safe_escaper . $reg_safe_encloser, $encloser, $matches[1]); $line = trim($matches[2]); $fields[$field_num++] = $value; // Otherwise, look for the next (non-escaped) delimiter else : preg_match('/^((?:[^' . $reg_safe_delimiter . ']|(?<='.$reg_safe_escaper.')' . $reg_safe_delimiter . ')*)' . $reg_safe_delimiter . '?(.*)$/', $line, $matches); $value = mb_ereg_replace($reg_safe_escaper . $reg_safe_delimiter, $delimiter, $matches[1]); $line = trim($matches[2]); $fields[$field_num++] = $value; endif; endwhile; if ($encloser == $escaper) : for ($i=0; $i