Above are tools for managing the migration of external data into Drupal. A typical migration process would work as follows:
  1. Under Sources, upload the external data. The raw data is imported without filtering into database tables on your server.
  2. Analyze each imported table. Each column of data in the table is scanned, identifying uniqueness (implying a potential primary key), empty columns (automatically hidden by default), possible data types, and data ranges and sizes. Each column may be annotated to document its meaning, any special handling, etc., and may be checked Ignore to omit from views and the migration process.
  3. View the data for each imported table, to aid in the analysis and to mark Excluded any rows you don\'t want to import.
'); module_load_include('inc', 'system', 'system.admin'); return system_admin_menu_block_page().$content; } function tw_sources() { return drupal_get_form('_tw_sources_form'); } function _tw_sources_form($form_state) { $form['description'] = array( '#prefix' => '
', '#value' => t('Tables managed by the Table Wizard module are listed here, each with the name of the database table used to store the data, and statistics on the amount of data. Click the Analyze link to view and edit information on the fields available in the table. Click on the table name to view the data in the table and mark specific rows for exclusion.'), '#suffix' => '
', ); $header = array( array('data' => ''), array('data' => t('Table name'), 'field' => 'twt.tablename', 'sort' => 'asc'), /* array('data' => t('Rows'), 'field' => 'mf.importrows'), array('data' => t('Source'), 'field' => 'mf.sourcefile'), array('data' => t('Size'), 'field' => 'mf.sourcesize'),*/ ); $sql = "SELECT * FROM {tw_tables} twt ORDER BY tablename"; $tablesort = tablesort_sql($header); $result = pager_query($sql . $tablesort, 50); while ($row = db_fetch_object($result)) { $rows[] = array('data' => array( l(t('Analyze'), 'admin/content/tw/sources/analyze/'. $row->tablename, array('html' => TRUE)), l($row->tablename, 'admin/content/tw/sources/view/'. $row->tablename, array('html' => TRUE)), /* $row->importrows, $row->sourcefile, format_size($row->sourcesize),*/ ), 'class' => "tw-files-tr", ); } if (!isset($rows)) { $rows[] = array(array('data' => t('No tables are managed here.'), 'colspan' => count($header))); } $form['tablelist'] = array( '#value' => theme('table', $header, $rows, array('id' => 'tw-tables')) ); // Keep each type's fields apart $form['#tree'] = TRUE; // Note that fieldset names ('delimited', 'htmldirectory', ...) are the source type names $sourcefieldsets = _tw_invoke_all('form'); $form = array_merge($form, $sourcefieldsets); // Support file uploads $form['#attributes'] = array('enctype' => 'multipart/form-data'); return $form; } function _tw_sources_form_submit($form, &$form_state) { $type = $form_state['clicked_button']['#parents'][0]; $values = $form_state['values'][$type]; $tableset = _tw_invoke_all('form_submit', $type, $values); // TODO: Use Batch API (multiple large tables can take a while) foreach ($tableset as $tables) { _tw_sources_update($tables); _tw_perform_analysis($tables['tablename']); } // Must clear the views cache so default views based on new tables get built/rebuilt module_load_include('inc', 'views', 'includes/admin'); views_ui_tools_clear_cache(); } function _tw_sources_update($tables) { db_query("INSERT INTO {tw_tables} (tablename) VALUES('%s')", $tables['tablename']); $twtid = db_last_insert_id('tw_tables', 'twtid'); $i = 0; foreach ($tables['schema']['fields'] as $colname => $info) { // TODO: set posstypes if provided, override analysis // PKs are assumed to be available FKs by default db_query("INSERT INTO {tw_columns} (twtid, colname, weight, primarykey, availablefk) VALUES(%d, '%s', %d, %d, %d)", $twtid, $colname, $i++, $info['pk'], $info['pk']); } return $twtid; } function _tw_perform_analysis($table) { $twtid = db_result(db_query("SELECT twtid FROM {tw_tables} WHERE tablename='%s'", $table)); // Perform the analysis $inspect = schema_invoke('inspect'); $schema = $inspect[$table]; foreach ($schema['fields'] as $colname => $coldef) { $coltype = $coldef['type']; $sql = "SELECT MIN($colname) FROM $table"; $minvalue = db_result(db_query($sql)); $sql = "SELECT MAX($colname) FROM $table"; $maxvalue = db_result(db_query($sql)); if (tw_column_type($coltype) == 'text') { $sql = "SELECT MIN(CHAR_LENGTH($colname)) FROM $table"; $minlength = db_result(db_query($sql)); $sql = "SELECT MAX(CHAR_LENGTH($colname)) FROM $table"; $maxlength = db_result(db_query($sql)); if ($maxlength == 0) { $isempty = TRUE; } else { $isempty = FALSE; } $sql = "UPDATE {tw_columns} SET coltype='%s', isempty=%d, minlength=%d, maxlength=%d, minvalue=NULL, maxvalue=NULL, minstring='%s', maxstring='%s' WHERE twtid=%d AND colname='%s'"; db_query($sql, $coltype, $isempty, $minlength, $maxlength, $minvalue, $maxvalue, $twtid, $colname); } else { if ($minvalue || $maxvalue) { $isempty = FALSE; } else { $isempty = TRUE; } $sql = "UPDATE {tw_columns} SET coltype='%s', isempty=%d, minlength=NULL, maxlength=NULL, minvalue=%d, maxvalue=%d, minstring=NULL, maxstring=NULL WHERE twtid=%d AND colname='%s'"; db_query($sql, $coltype, $isempty, $minvalue, $maxvalue, $twtid, $colname); } } } /** * Menu callback function. */ function tw_analysis($form_state, $table, $flag=TW_COLS_NONEMPTY) { $twtid = db_result(db_query("SELECT twtid FROM {tw_tables} WHERE tablename='%s'", $table)); if (isset($_GET['reanalyze'])) { _tw_perform_analysis($table); } $form['twtid'] = array('#type' => 'value', '#value' => $twtid); $form['table'] = array('#type' => 'value', '#value' => $table); switch ($flag) { case TW_COLS_ALL: $flaglabel = t('All fields'); break; case TW_COLS_EMPTY: $flaglabel = t('Empty fields only'); break; case TW_COLS_NONEMPTY: $flaglabel = t('Non-empty fields only'); break; } $form['help'] = array( '#type' => 'fieldset', '#title' => t('Help'), '#collapsible' => TRUE, '#collapsed' => TRUE, ); $form['help']['help1'] = array( '#prefix' => '
', '#value' => t('Fields in the table are listed below, with some information on the range and type of values each field contains. Comments may be entered to document each field individually, and any fields deemed unnecessary can be marked Ignore to omit them from the Table View page and the migration process.'), '#suffix' => '
', ); $form['help']['help2'] = array( '#prefix' => '
', '#value' => t('The first time this page is visited for a file that has been uploaded (including the first time after a new copy has been imported), the entire contents are read and analyzed - this can take quite a while. The results of the analysis are saved so later visits to the analysis page are quicker. Sometimes, if changes have been made to the table behind the scenes, it\'s useful to force the full analysis by clicking Reanalyze.'), '#suffix' => '
', ); $form['help']['help3'] = array( '#prefix' => '
', '#value' => t('View contents goes to the Table View page for this table. You may find it helpful to open this link in a new window, to browse the data in context while figuring out its meaning for the comments here.'), '#suffix' => '
', ); $form['help']['columns'] = array( '#prefix' => '
', '#value' => t('These are the columns in the table below:'), '#suffix' => '
', ); $form['help']['fieldname'] = array( '#prefix' => '
', '#value' => t('Field name - The name of the field as imported into the database table. This is not necessarily the same as the name in the original file - any non-alphanumeric characters are replaced by underscores (_), and duplicate column names have sequence numbers appended.'), '#suffix' => '
', ); $form['help']['ignore'] = array( '#prefix' => '
', '#value' => t('Ignore - Checking this box for a field will omit that field from the table view.'), '#suffix' => '
', ); $form['help']['pk'] = array( '#prefix' => '
', '#value' => t('Potential PK/PK - Any field which could be used as a primary key for the table (that is, each value is unique and there are no NULL values) is flagged with a 1. The PK checkbox is used to define a field as the primary key for the table.'), '#suffix' => '
', ); $form['help']['fk'] = array( '#prefix' => '
', '#value' => t('Available FK - Check this box to make the field available for use in table relationships.'), '#suffix' => '
', ); $form['help']['strlength'] = array( '#prefix' => '
', '#value' => t('String length - The ranges of string lengths in this field, which helps determine whether the data will fit into the corresponding Drupal field when migrated.'), '#suffix' => '
', ); $form['help']['numrange'] = array( '#prefix' => '
', '#value' => t('Numeric value range - For potential numeric fields, the range of numeric values they contain (determined numerically - e.g., 10 is greater than 9).'), '#suffix' => '
', ); $form['help']['strrange'] = array( '#prefix' => '
', '#value' => t('String value range - For all fields, the range of string values they contained (determined by character order - e.g., 9 is greater than 10). It\'s important to note that at this time all fields except the PK are being stored as strings, so sorting on "numeric" fields in the Table View won\'t necessarily have the expected effect.'), '#suffix' => '
', ); $form['help']['comments'] = array( '#prefix' => '
', '#value' => t('Comments - Description of the field - what it means, and what we will do with it in the migration (ignore it, map it to a given user field or node field, etc.).'), '#suffix' => '
', ); $form['title'] = array('#value' => "Analysis of $table"); $form['header'] = array( '#type' => 'value', '#value' => array( array('data' => t('Field name')), array('data' => t('Ignore')), array('data' => t('PK')), array('data' => t('Available FK')), array('data' => t('Current type')), array('data' => t('String length')), array('data' => t('Range')), array('data' => t('Comments')), ), ); $colcount = 0; $emptycount = 0; $result = db_query("SELECT * FROM {tw_columns} WHERE twtid=%d ORDER BY weight", $twtid); $availablefks = array(); $availablefkenable = array(); $ignoredcols = array(); $ignoreenable = array(); $cols = array(); while ($row = db_fetch_object($result)) { $colname = $row->colname; $colcount++; if ($row->isempty) { $emptycount++; } if (($flag == TW_COLS_ALL) || (($flag == TW_COLS_EMPTY) && $row->isempty) || (($flag == TW_COLS_NONEMPTY) && !$row->isempty)) { $cols[] = $colname; $form['colname'][$colname] = array('#value' => $colname); $ignoredcols[$colname] = ''; if ($row->ignorecol) { $ignoreenable[] = $colname; } $form['primarykey'][$colname] = array('#value' => $row->primarykey); $availablefks[$colname] = ''; if ($row->availablefk) { $availablefkenable[] = $colname; } $form['currtype'][$colname] = array('#value' => $row->coltype); if (tw_column_type($row->coltype) == 'text') { $form['lengths'][$colname] = array('#value' => $row->maxlength ? $row->minlength.'-'.$row->maxlength : '' ); $form['values'][$colname] = array( '#attributes' => array('style' => 'white-space: normal'), '#prefix' => '
', '#value' => $row->maxlength ? substr(check_plain($row->minstring),0,40).'
'. substr(check_plain($row->maxstring),0,40) : '', '#suffix' => '
', ); } else { $form['lengths'][$colname] = array('#value' => ''); $form['values'][$colname] = array('#value' => $row->maxvalue ? $row->minvalue.'-'.$row->maxvalue : '' ); } $form['comments'][$colname] = array( '#type' => 'textarea', '#default_value' => $row->comments, '#cols' => 45, '#rows' => 3, '#resizable' => FALSE, ); } } $form['availablefk'] = array( '#type' => 'checkboxes', '#options' => $availablefks, '#default_value' => $availablefkenable, ); $form['ignorecol'] = array( '#type' => 'checkboxes', '#options' => $ignoredcols, '#default_value' => $ignoreenable, ); $form['cols'] = array( '#type' => 'value', '#value' => $cols, ); $form['colcounts'] = array('#value' => "$emptycount of $colcount fields have no data for any row."); $form['submit'] = array( '#type' => 'submit', '#value' => t('Submit changes'), ); return $form; } function theme_tw_analysis($form) { $title = drupal_render($form['title']); $table = $form['table']['#value']; drupal_set_title($title); $output = ''; $output .= ''; $output .= '
'.drupal_render($form['colcounts']).'
'; $output .= drupal_render($form['help']); if (isset($form['colname']) && is_array($form['colname'])) { foreach(element_children($form['colname']) as $colname) { $row = array(); $row[] = drupal_render($form['colname'][$colname]); $row[] = drupal_render($form['ignorecol'][$colname]); $row[] = drupal_render($form['primarykey'][$colname]); $row[] = drupal_render($form['availablefk'][$colname]); $row[] = drupal_render($form['currtype'][$colname]); $row[] = drupal_render($form['lengths'][$colname]); $row[] = drupal_render($form['values'][$colname]); $row[] = drupal_render($form['comments'][$colname]); $rows[] = $row; } } $header = $form['header']['#value']; if (!isset($rows)) { $rows[] = array(array('data' => t('No data in the table.'), 'colspan' => count($header))); } $output .= theme('table', $header, $rows); $output .= drupal_render($form); return $output; } /** * Implementation of hook_submit() */ function tw_analysis_submit($form, &$form_state) { $twtid = $form_state['values']['twtid']; foreach($form_state['values']['cols'] as $key => $colname) { $newignore = $form_state['values']['ignorecol'][$colname] ? 1 : 0; $newavailablefk = $form_state['values']['availablefk'][$colname] ? 1 : 0; $newcomment = $form_state['values'][$colname]; $newfk = $form_state['values'][$colname.'_fk']; db_query("UPDATE {tw_columns} SET ignorecol=%d, availablefk=%d, comments='%s' WHERE twtid=%d AND colname='%s'", $newignore, $newavailablefk, $newcomment, $twtid, $colname); } module_load_include('inc', 'views', 'includes/admin'); views_ui_tools_clear_cache(); drupal_set_message('Changes saved'); } function tw_relationships() { return(drupal_get_form('_tw_relationships_form')); } function _tw_relationships_form($form_state) { $form['description'] = array( '#prefix' => '
', '#value' => t('Define relationships between tables, enabling the creation of views joining those tables.'), '#suffix' => '
', ); $header = array( array('data' => t('Left column')), array('data' => t('Right column')), ); $sql = "SELECT CONCAT(twtleft.tablename, '.', twcleft.colname) leftcol, CONCAT(twtright.tablename, '.', twcright.colname) rightcol FROM {tw_relationships} twr INNER JOIN {tw_columns} twcleft ON twr.leftcol=twcleft.twcid INNER JOIN {tw_tables} twtleft ON twcleft.twtid=twtleft.twtid INNER JOIN {tw_columns} twcright ON twr.rightcol=twcright.twcid INNER JOIN {tw_tables} twtright ON twcright.twtid=twtright.twtid ORDER BY leftcol"; $result = db_query($sql); while ($row = db_fetch_object($result)) { $rows[] = array('data' => array( $row->leftcol, $row->rightcol, ), 'class' => "tw-relationships-tr", ); } if (!isset($rows)) { $rows[] = array(array('data' => t('No relationships have been defined'), 'colspan' => count($header))); } $form['rellist'] = array( '#value' => theme('table', $header, $rows, array('id' => 'tw-relationships')), ); $form['addrel'] = array( '#type' => 'fieldset', '#title' => t('Add a relationship'), '#collapsible' => TRUE, ); $sql = "SELECT twc.twcid, twc.colname, twt.tablename FROM {tw_columns} twc INNER JOIN {tw_tables} twt ON twc.twtid=twt.twtid WHERE twc.availablefk=1 ORDER BY tablename, colname"; $result = db_query($sql); while ($row = db_fetch_array($result)) { $options[$row['twcid']] = $row['tablename'].'.'.$row['colname']; } $form['addrel']['leftcol'] = array( '#type' => 'select', '#title' => t('Column to appear on the left side of the join'), '#options' => $options, ); $form['addrel']['rightcol'] = array( '#type' => 'select', '#title' => t('Column to appear on the right side of the join'), '#options' => $options, ); $form['addrel']['add'] = array( '#type' => 'submit', '#value' => t('Add'), ); return $form; } function _tw_relationships_form_submit($form, &$form_state) { $leftcol = $form_state['values']['leftcol']; $rightcol = $form_state['values']['rightcol']; // @TODO: Create an index for any field referenced in the criteria (or suggest it) db_query("INSERT INTO {tw_relationships} (leftcol, rightcol) VALUES(%d, %d)", $leftcol, $rightcol); // Must clear the views cache so the new relationships become available module_load_include('inc', 'views', 'includes/admin'); views_ui_tools_clear_cache(); }