Above are tools for managing the migration of external data into Drupal. A typical
migration process would work as follows:
- Under Sources, upload the external data. The raw data is imported
without filtering into database tables on your server.
- Analyze each imported table. Each column of data in the table is scanned,
identifying uniqueness (implying a potential primary key),
empty columns (automatically hidden by default), possible data types, and data ranges
and sizes. Each column may be annotated to document its meaning, any special handling, etc.,
and may be checked Ignore to omit from views and the migration process.
- View the data for each imported table, to aid in the analysis and to mark Excluded
any rows you don\'t want to import.
');
module_load_include('inc', 'system', 'system.admin');
return system_admin_menu_block_page().$content;
}
function tw_sources() {
return drupal_get_form('_tw_sources_form');
}
function _tw_sources_form($form_state) {
$form['description'] = array(
'#prefix' => '',
'#value' => t('Tables managed by the Table Wizard module are listed here, each with the name of the database
table used to store the data, and statistics on the amount of data. Click the Analyze
link to view and edit information on the fields available in the table. Click on the table
name to view the data in the table and mark specific rows for exclusion.'),
'#suffix' => '
',
);
$header = array(
array('data' => ''),
array('data' => t('Table name'), 'field' => 'twt.tablename', 'sort' => 'asc'),
/* array('data' => t('Rows'), 'field' => 'mf.importrows'),
array('data' => t('Source'), 'field' => 'mf.sourcefile'),
array('data' => t('Size'), 'field' => 'mf.sourcesize'),*/
);
$sql = "SELECT * FROM {tw_tables} twt ORDER BY tablename";
$tablesort = tablesort_sql($header);
$result = pager_query($sql . $tablesort, 50);
while ($row = db_fetch_object($result)) {
$rows[] = array('data' =>
array(
l(t('Analyze'), 'admin/content/tw/sources/analyze/'. $row->tablename, array('html' => TRUE)),
l($row->tablename, 'admin/content/tw/sources/view/'. $row->tablename, array('html' => TRUE)),
/* $row->importrows,
$row->sourcefile,
format_size($row->sourcesize),*/
),
'class' => "tw-files-tr",
);
}
if (!isset($rows)) {
$rows[] = array(array('data' => t('No tables are managed here.'), 'colspan' => count($header)));
}
$form['tablelist'] = array(
'#value' => theme('table', $header, $rows, array('id' => 'tw-tables'))
);
// Keep each type's fields apart
$form['#tree'] = TRUE;
// Note that fieldset names ('delimited', 'htmldirectory', ...) are the source type names
$sourcefieldsets = _tw_invoke_all('form');
$form = array_merge($form, $sourcefieldsets);
// Support file uploads
$form['#attributes'] = array('enctype' => 'multipart/form-data');
return $form;
}
function _tw_sources_form_submit($form, &$form_state) {
$type = $form_state['clicked_button']['#parents'][0];
$values = $form_state['values'][$type];
$tableset = _tw_invoke_all('form_submit', $type, $values);
// TODO: Use Batch API (multiple large tables can take a while)
foreach ($tableset as $tables) {
_tw_sources_update($tables);
_tw_perform_analysis($tables['tablename']);
}
// Must clear the views cache so default views based on new tables get built/rebuilt
module_load_include('inc', 'views', 'includes/admin');
views_ui_tools_clear_cache();
}
function _tw_sources_update($tables) {
db_query("INSERT INTO {tw_tables}
(tablename)
VALUES('%s')",
$tables['tablename']);
$twtid = db_last_insert_id('tw_tables', 'twtid');
$i = 0;
foreach ($tables['schema']['fields'] as $colname => $info) {
// TODO: set posstypes if provided, override analysis
// PKs are assumed to be available FKs by default
db_query("INSERT INTO {tw_columns}
(twtid, colname, weight, primarykey, availablefk)
VALUES(%d, '%s', %d, %d, %d)",
$twtid, $colname, $i++, $info['pk'], $info['pk']);
}
return $twtid;
}
function _tw_perform_analysis($table) {
$twtid = db_result(db_query("SELECT twtid FROM {tw_tables}
WHERE tablename='%s'",
$table));
// Perform the analysis
$inspect = schema_invoke('inspect');
$schema = $inspect[$table];
foreach ($schema['fields'] as $colname => $coldef) {
$coltype = $coldef['type'];
$sql = "SELECT MIN($colname) FROM $table";
$minvalue = db_result(db_query($sql));
$sql = "SELECT MAX($colname) FROM $table";
$maxvalue = db_result(db_query($sql));
if (tw_column_type($coltype) == 'text') {
$sql = "SELECT MIN(CHAR_LENGTH($colname)) FROM $table";
$minlength = db_result(db_query($sql));
$sql = "SELECT MAX(CHAR_LENGTH($colname)) FROM $table";
$maxlength = db_result(db_query($sql));
if ($maxlength == 0) {
$isempty = TRUE;
} else {
$isempty = FALSE;
}
$sql = "UPDATE {tw_columns}
SET coltype='%s', isempty=%d, minlength=%d, maxlength=%d, minvalue=NULL, maxvalue=NULL, minstring='%s', maxstring='%s'
WHERE twtid=%d AND colname='%s'";
db_query($sql, $coltype, $isempty, $minlength, $maxlength, $minvalue, $maxvalue, $twtid, $colname);
} else {
if ($minvalue || $maxvalue) {
$isempty = FALSE;
} else {
$isempty = TRUE;
}
$sql = "UPDATE {tw_columns}
SET coltype='%s', isempty=%d, minlength=NULL, maxlength=NULL, minvalue=%d, maxvalue=%d, minstring=NULL, maxstring=NULL
WHERE twtid=%d AND colname='%s'";
db_query($sql, $coltype, $isempty, $minvalue, $maxvalue, $twtid, $colname);
}
}
}
/**
* Menu callback function.
*/
function tw_analysis($form_state, $table, $flag=TW_COLS_NONEMPTY) {
$twtid = db_result(db_query("SELECT twtid FROM {tw_tables}
WHERE tablename='%s'",
$table));
if (isset($_GET['reanalyze'])) {
_tw_perform_analysis($table);
}
$form['twtid'] = array('#type' => 'value', '#value' => $twtid);
$form['table'] = array('#type' => 'value', '#value' => $table);
switch ($flag) {
case TW_COLS_ALL:
$flaglabel = t('All fields');
break;
case TW_COLS_EMPTY:
$flaglabel = t('Empty fields only');
break;
case TW_COLS_NONEMPTY:
$flaglabel = t('Non-empty fields only');
break;
}
$form['help'] = array(
'#type' => 'fieldset',
'#title' => t('Help'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
);
$form['help']['help1'] = array(
'#prefix' => '',
'#value' => t('Fields in the table are listed below, with some information
on the range and type of values each field contains. Comments may be entered to
document each field individually, and any fields deemed unnecessary can be marked
Ignore to omit them from the Table View page and the migration
process.'),
'#suffix' => '
',
);
$form['help']['help2'] = array(
'#prefix' => '',
'#value' => t('The first time this page is visited for a file that has been
uploaded (including the first time after a new copy has been imported), the entire
contents are read and analyzed - this can take quite a while. The results of the analysis
are saved so later visits to the analysis page are quicker. Sometimes, if changes have
been made to the table behind the scenes, it\'s useful to force the full analysis
by clicking Reanalyze.'),
'#suffix' => '
',
);
$form['help']['help3'] = array(
'#prefix' => '',
'#value' => t('View contents goes to the Table View page for this table.
You may find it helpful to open this link in a new window, to browse the data in context
while figuring out its meaning for the comments here.'),
'#suffix' => '
',
);
$form['help']['columns'] = array(
'#prefix' => '',
'#value' => t('These are the columns in the table below:'),
'#suffix' => '
',
);
$form['help']['fieldname'] = array(
'#prefix' => '',
'#value' => t('Field name - The name of the field as imported into the database table.
This is not necessarily the same as the name in the original file - any non-alphanumeric
characters are replaced by underscores (_), and duplicate column names have sequence
numbers appended.'),
'#suffix' => '
',
);
$form['help']['ignore'] = array(
'#prefix' => '',
'#value' => t('Ignore - Checking this box for a field will omit that field from the
table view.'),
'#suffix' => '
',
);
$form['help']['pk'] = array(
'#prefix' => '',
'#value' => t('Potential PK/PK - Any field which could be used as a primary key for
the table (that is, each value is unique and there are no NULL values) is flagged with a 1.
The PK checkbox is used to define a field as the primary key for the table.'),
'#suffix' => '
',
);
$form['help']['fk'] = array(
'#prefix' => '',
'#value' => t('Available FK - Check this box to make the field available for use
in table relationships.'),
'#suffix' => '
',
);
$form['help']['strlength'] = array(
'#prefix' => '',
'#value' => t('String length - The ranges of string lengths in this field, which helps
determine whether the data will fit into the corresponding Drupal field when migrated.'),
'#suffix' => '
',
);
$form['help']['numrange'] = array(
'#prefix' => '',
'#value' => t('Numeric value range - For potential numeric fields, the range of numeric
values they contain (determined numerically - e.g., 10 is greater than 9).'),
'#suffix' => '
',
);
$form['help']['strrange'] = array(
'#prefix' => '',
'#value' => t('String value range - For all fields, the range of string values they
contained (determined by character order - e.g., 9 is greater than 10). It\'s important to
note that at this time all fields except the PK are being stored as strings, so sorting on
"numeric" fields in the Table View won\'t necessarily have the expected effect.'),
'#suffix' => '
',
);
$form['help']['comments'] = array(
'#prefix' => '',
'#value' => t('Comments - Description of the field - what it means, and what we will do
with it in the migration (ignore it, map it to a given user field or node field, etc.).'),
'#suffix' => '
',
);
$form['title'] = array('#value' => "Analysis of $table");
$form['header'] = array(
'#type' => 'value',
'#value' => array(
array('data' => t('Field name')),
array('data' => t('Ignore')),
array('data' => t('PK')),
array('data' => t('Available FK')),
array('data' => t('Current type')),
array('data' => t('String length')),
array('data' => t('Range')),
array('data' => t('Comments')),
),
);
$colcount = 0;
$emptycount = 0;
$result = db_query("SELECT *
FROM {tw_columns}
WHERE twtid=%d
ORDER BY weight",
$twtid);
$availablefks = array();
$availablefkenable = array();
$ignoredcols = array();
$ignoreenable = array();
$cols = array();
while ($row = db_fetch_object($result)) {
$colname = $row->colname;
$colcount++;
if ($row->isempty) {
$emptycount++;
}
if (($flag == TW_COLS_ALL) ||
(($flag == TW_COLS_EMPTY) && $row->isempty) ||
(($flag == TW_COLS_NONEMPTY) && !$row->isempty)) {
$cols[] = $colname;
$form['colname'][$colname] = array('#value' => $colname);
$ignoredcols[$colname] = '';
if ($row->ignorecol) {
$ignoreenable[] = $colname;
}
$form['primarykey'][$colname] = array('#value' => $row->primarykey);
$availablefks[$colname] = '';
if ($row->availablefk) {
$availablefkenable[] = $colname;
}
$form['currtype'][$colname] = array('#value' => $row->coltype);
if (tw_column_type($row->coltype) == 'text') {
$form['lengths'][$colname] = array('#value' =>
$row->maxlength ? $row->minlength.'-'.$row->maxlength : ''
);
$form['values'][$colname] = array(
'#attributes' => array('style' => 'white-space: normal'),
'#prefix' => '',
'#value' =>
$row->maxlength ? substr(check_plain($row->minstring),0,40).'
'.
substr(check_plain($row->maxstring),0,40) : '',
'#suffix' => '',
);
} else {
$form['lengths'][$colname] = array('#value' => '');
$form['values'][$colname] = array('#value' =>
$row->maxvalue ? $row->minvalue.'-'.$row->maxvalue : ''
);
}
$form['comments'][$colname] = array(
'#type' => 'textarea',
'#default_value' => $row->comments,
'#cols' => 45,
'#rows' => 3,
'#resizable' => FALSE,
);
}
}
$form['availablefk'] = array(
'#type' => 'checkboxes',
'#options' => $availablefks,
'#default_value' => $availablefkenable,
);
$form['ignorecol'] = array(
'#type' => 'checkboxes',
'#options' => $ignoredcols,
'#default_value' => $ignoreenable,
);
$form['cols'] = array(
'#type' => 'value',
'#value' => $cols,
);
$form['colcounts'] = array('#value' => "$emptycount of $colcount fields have no data for any row.");
$form['submit'] = array(
'#type' => 'submit',
'#value' => t('Submit changes'),
);
return $form;
}
function theme_tw_analysis($form) {
$title = drupal_render($form['title']);
$table = $form['table']['#value'];
drupal_set_title($title);
$output = '';
$output .= 'Operations on this table: ';
$output .= l(t('Reanalyze'),
"admin/content/tw/sources/analyze/$table/",
array('html' => TRUE, 'query' => 'reanalyze=1')).' | ';
$output .= l(t('View table contents'),
"admin/content/tw/sources/view/$table/",
array('html' => TRUE));
$output .= '
';
$output .= '';
$output .= 'Filter: ';
$output .= l(t('Show all fields'),
"admin/content/tw/sources/analyze/$table/".TW_COLS_ALL,
array('html' => TRUE)).' | ';
$output .= l(t('Show only fields that are empty for all rows'),
"admin/content/tw/sources/analyze/$table/".TW_COLS_EMPTY,
array('html' => TRUE)).' | ';
$output .= l(t('Show only fields which have data for at least one row'),
"admin/content/tw/sources/analyze/$table/".TW_COLS_NONEMPTY,
array('html' => TRUE));
$output .= '
';
$output .= ''.drupal_render($form['colcounts']).'
';
$output .= drupal_render($form['help']);
if (isset($form['colname']) && is_array($form['colname'])) {
foreach(element_children($form['colname']) as $colname) {
$row = array();
$row[] = drupal_render($form['colname'][$colname]);
$row[] = drupal_render($form['ignorecol'][$colname]);
$row[] = drupal_render($form['primarykey'][$colname]);
$row[] = drupal_render($form['availablefk'][$colname]);
$row[] = drupal_render($form['currtype'][$colname]);
$row[] = drupal_render($form['lengths'][$colname]);
$row[] = drupal_render($form['values'][$colname]);
$row[] = drupal_render($form['comments'][$colname]);
$rows[] = $row;
}
}
$header = $form['header']['#value'];
if (!isset($rows)) {
$rows[] = array(array('data' => t('No data in the table.'), 'colspan' => count($header)));
}
$output .= theme('table', $header, $rows);
$output .= drupal_render($form);
return $output;
}
/**
* Implementation of hook_submit()
*/
function tw_analysis_submit($form, &$form_state) {
$twtid = $form_state['values']['twtid'];
foreach($form_state['values']['cols'] as $key => $colname) {
$newignore = $form_state['values']['ignorecol'][$colname] ? 1 : 0;
$newavailablefk = $form_state['values']['availablefk'][$colname] ? 1 : 0;
$newcomment = $form_state['values'][$colname];
$newfk = $form_state['values'][$colname.'_fk'];
db_query("UPDATE {tw_columns}
SET ignorecol=%d, availablefk=%d, comments='%s'
WHERE twtid=%d AND colname='%s'",
$newignore, $newavailablefk, $newcomment, $twtid, $colname);
}
module_load_include('inc', 'views', 'includes/admin');
views_ui_tools_clear_cache();
drupal_set_message('Changes saved');
}
function tw_relationships() {
return(drupal_get_form('_tw_relationships_form'));
}
function _tw_relationships_form($form_state) {
$form['description'] = array(
'#prefix' => '',
'#value' => t('Define relationships between tables, enabling the creation
of views joining those tables.'),
'#suffix' => '
',
);
$header = array(
array('data' => t('Left column')),
array('data' => t('Right column')),
);
$sql = "SELECT CONCAT(twtleft.tablename, '.', twcleft.colname) leftcol,
CONCAT(twtright.tablename, '.', twcright.colname) rightcol
FROM {tw_relationships} twr
INNER JOIN {tw_columns} twcleft ON twr.leftcol=twcleft.twcid
INNER JOIN {tw_tables} twtleft ON twcleft.twtid=twtleft.twtid
INNER JOIN {tw_columns} twcright ON twr.rightcol=twcright.twcid
INNER JOIN {tw_tables} twtright ON twcright.twtid=twtright.twtid
ORDER BY leftcol";
$result = db_query($sql);
while ($row = db_fetch_object($result)) {
$rows[] = array('data' =>
array(
$row->leftcol,
$row->rightcol,
),
'class' => "tw-relationships-tr",
);
}
if (!isset($rows)) {
$rows[] = array(array('data' => t('No relationships have been defined'), 'colspan' => count($header)));
}
$form['rellist'] = array(
'#value' => theme('table', $header, $rows, array('id' => 'tw-relationships')),
);
$form['addrel'] = array(
'#type' => 'fieldset',
'#title' => t('Add a relationship'),
'#collapsible' => TRUE,
);
$sql = "SELECT twc.twcid, twc.colname, twt.tablename
FROM {tw_columns} twc
INNER JOIN {tw_tables} twt ON twc.twtid=twt.twtid
WHERE twc.availablefk=1
ORDER BY tablename, colname";
$result = db_query($sql);
while ($row = db_fetch_array($result)) {
$options[$row['twcid']] = $row['tablename'].'.'.$row['colname'];
}
$form['addrel']['leftcol'] = array(
'#type' => 'select',
'#title' => t('Column to appear on the left side of the join'),
'#options' => $options,
);
$form['addrel']['rightcol'] = array(
'#type' => 'select',
'#title' => t('Column to appear on the right side of the join'),
'#options' => $options,
);
$form['addrel']['add'] = array(
'#type' => 'submit',
'#value' => t('Add'),
);
return $form;
}
function _tw_relationships_form_submit($form, &$form_state) {
$leftcol = $form_state['values']['leftcol'];
$rightcol = $form_state['values']['rightcol'];
// @TODO: Create an index for any field referenced in the criteria (or suggest it)
db_query("INSERT INTO {tw_relationships}
(leftcol, rightcol)
VALUES(%d, %d)",
$leftcol, $rightcol);
// Must clear the views cache so the new relationships become available
module_load_include('inc', 'views', 'includes/admin');
views_ui_tools_clear_cache();
}