'Calais Node Settings', 'description' => 'Configurations for Calais Integration with content nodes', 'page callback' => 'drupal_get_form', 'page arguments' => array('calais_admin_settings'), 'access arguments' => array('administer calais'), 'type' => MENU_LOCAL_TASK ); $items['admin/settings/calais/bulk-process'] = array( 'title' => 'Calais Bulk Processing', 'description' => 'Submits all Nodes of a specific type to Calais for processing', 'page callback' => 'drupal_get_form', 'page arguments' => array('calais_bulk_process'), 'access arguments' => array('administer calais'), 'weight' => 10, 'type' => MENU_LOCAL_TASK ); $items['node/%node/calais'] = array( 'title' => 'Calais', 'page callback' => 'drupal_get_form', 'page arguments' => array('calais_keywords_form', 1), 'access callback' => 'calais_access', 'access arguments' => array(1), 'weight' => 2, 'type' => MENU_LOCAL_TASK ); $items['node/%/calais/rdf'] = array( 'title' => 'Calais', 'page callback' => 'calais_get_rdf', 'page arguments' => array(1), 'access callback' => 'user_access', 'access arguments' => array('access calais rdf'), 'type' => MENU_CALLBACK ); return $items; } /** * Implementation of hook_preprocess_node(). * * If RDF exists for a node, provide an autodiscovery link. */ function calais_preprocess_node(&$variables) { $node = $variables['node']; $count = rdf_count(NULL, NULL, NULL, calais_rdf_options($node->nid)); // Add an RDF autodiscovery link when the node is displayed by itself as a page: if (user_access('access calais rdf') && $count) { rdf_add_autodiscovery_link(t('Calais RDF'), url("node/$node->nid/calais/rdf"), 'rdf+xml'); } } /** * Provide the Calais RDF describing a node. * * @param $nid * The node id */ function calais_get_rdf($nid) { $format = 'rdf+xml'; $rdf_triples = rdf_query(NULL, NULL, NULL, calais_rdf_options($nid)); $xml = rdf_serialize($rdf_triples, array('format' => $format)); // TODO: We need to add proper namespaces until http://drupal.org/node/350485 is resolved $rdf = _calais_api_fix_rdf_namespaces($xml); $formats = rdf_get_formats(); $output_format = $formats[$format]; drupal_set_header('Content-Type: '. $output_format->mime_type .'; charset='. $output_format->encoding); drupal_set_header('Content-Length: '. strlen($rdf)); print $rdf; } /** * Implementation of hook_access(). */ function calais_access($node) { return user_access('access calais') && calais_processing_type($node) != CALAIS_PROCESS_NO; } /** * Implementation of hook_nodeapi(). * * Process node updates and if applicable, update the Calais terms. */ function calais_nodeapi(&$node, $op, $a3 = NULL, $a4 = NULL) { switch ($op) { case "delete": calais_delete_node($node); break; case "insert": case "update": $process_type = calais_processing_type($node); $threshold = calais_get_node_threshold($node->type); calais_process_node($node, $process_type, $threshold, $op); break; } } /** * A node is getting removed so do some cleanup. * * @param $node The node for deletion */ function calais_delete_node($node) { calais_remove_rdf($node); db_query("DELETE FROM {calais_term_node} WHERE nid=%d", $node->nid); } /** * Process the provided node according to the processing type. * * @param $node The node to process * @param $process_type The processing type for this node. */ function calais_process_node(&$node, $process_type, $threshold, $op = 'insert') { if ($process_type == CALAIS_PROCESS_NO) return; // On feedapi runs, updates are missing vid so get it. if(!isset($node->vid)) { $node->vid = db_result(db_query("SELECT vid FROM {node} WHERE nid=%d", $node->nid)); } $loaded_node = node_build_content($node); $body = strip_tags(drupal_render($loaded_node->content)); $date = format_date($node->created, 'custom', 'r'); $node_settings = calais_get_node_settings($node); $calais = new Calais($node_settings); $keywords = $calais->analyzeXML($node->title, $body, $date); if (sizeof($keywords) > 0) { db_query("DELETE FROM {calais_term_node} WHERE nid=%d", $node->nid); } module_invoke_all('calais_preprocess', $node, $keywords); $vocabularies = calais_get_entity_vocabularies($node->type); foreach ($keywords as $cat => $metadata) { $vid = $vocabularies[$cat]; foreach ($metadata->terms as $term) { calais_associate_term($vid, $term, $node); calais_populate_term_guid($vid, $term); if ( ($process_type == CALAIS_PROCESS_AUTO || ($process_type == CALAIS_PROCESS_AUTO_ONCE && $op == "insert")) && round($term->relevance * 1000) >= round($threshold * 1000)) { calais_assign_to_node($vid, $term, $node); } } } calais_store_rdf($node, $calais->flatTriples); module_invoke_all('calais_postprocess', $node, $keywords); } /** * Associates a retrieved Calais term with a node. * * @param $vid The vocabulary id of the term * @param $term The CalaisTerm object * @param $node The node for association */ function calais_associate_term($vid, $term, $node) { $tid = calais_get_calais_term($vid, $term); if ($tid) { calais_assign_node_calaisterm($node->nid, $tid, $term->relevance); } else { watchdog('Calais', "Could not create Calais Term: $term->value"); } } /** * For legacy terms with no guid, set the guid if the term name matches. * * @param $vid The vocabulary id of the term * @param $term The CalaisTerm object */ function calais_populate_term_guid($vid, $term) { db_query("UPDATE {term_data} SET guid = '%s' WHERE vid = %d AND name = '%s' AND guid IS NULL", $term->guid, $vid, $term->value); } /** * Assign the taxonomy term to the node. * * @param $vid The vocabulary id of the term * @param $term The CalaisTerm object * @param $node The node for association */ function calais_assign_to_node($vid, $term, $node) { $tid = calais_get_taxonomy_term($vid, $term); if ($tid) { calais_assign_node_taxonomyterm($node->nid, $node->vid, $tid); } else { watchdog('Calais', "Could not assign Taxonomy Term: $term->value"); } } /** * Implementation of hook_calais_preprocess(). * * Make sure that a vocabulary exists for all entities returned, if not, create it. */ function calais_calais_preprocess(&$node, &$keywords) { $vocabularies = calais_get_entity_vocabularies(); foreach ($keywords as $cat => $metadata) { $vid = $vocabularies[$cat]; // Create a vocabulary if we come across an entity that we dont know of. if (!$vid) { $vid = calais_create_entity_vocabulary($metadata->readable_type()); $vocabularies[$cat] = $vid; variable_set('calais_vocabulary_names', $vocabularies); drupal_set_message(t('Added a Vocabulary for the new Calais Entity %entity. You may need to update your Calais Node Settings to take advantage of this new entity.', array('%entity' => $cat))); } } } /** * Checks if a specified calais term exists in a specified vocabulary by GUID. If it * does not exist it will create it and return the term id. * * @param int $vid * @param name $term * * @return $tid * Term id from the {calais_term} table */ function calais_get_calais_term($vid, $term) { $tid = db_result(db_query("SELECT tid FROM {calais_term} where guid = '%s' and vid = %d", $term->guid, $vid)); $calais_term = new stdClass(); $calais_term->guid = $term->guid; $calais_term->name = $term->value; $calais_term->vid = $vid; $calais_term->resolved_name = $term->resolvedName; $calais_term->latitude = $term->lat; $calais_term->longitude = $term->lon; if ($tid === FALSE) { drupal_write_record('calais_term', $calais_term); } else { $calais_term->tid = $tid; drupal_write_record('calais_term', $calais_term, 'tid'); } return $calais_term->tid; } /** * Checks if a taxonomy term exists in a specified vocabulary by GUID. If it is not found by GUID, * but found by name with no GUID, it will set the GUID for future reference. If it does not exist * at all it will create it and return the term id. * * @param $vid * The vocabulary id for the term to find. * @param $term * The CalaisTerm to find a matching taxonomy term * @return $tid * Term id from the {term_data} table */ function calais_get_taxonomy_term($vid, $term) { // Look for the term by GUID< but make sure a null GUID passed in does not match an '' (empty) one. $tid = db_result(db_query("SELECT tid FROM {term_data} WHERE vid=%d AND guid='%s' AND guid != ''", $vid, $term->guid)); // If not found by GUID, then look by name if($tid === FALSE) { $tid = db_result(db_query("SELECT tid FROM {term_data} WHERE name='%s' AND vid=%d", $term->value, $vid)); if($tid) { // Was found by name, set GUID if there is one to set and the existing one is null/blank if(!empty($term->guid)) { db_query("UPDATE {term_data} SET guid='%s' WHERE tid=%d AND (guid IS NULL OR guid = '')", $term->guid, $tid); } } else { // Nothing was found or updated, so create one, we need a term. $tax_term = array('vid' => $vid, 'name' => $term->value, 'guid' => $term->guid); taxonomy_save_term($tax_term); $tid = $tax_term['tid']; } } return $tid; } /** * Implementation of hook_taxonomy(); * * Post process the insert/update of taxonomy terms to make sure the proper guid gets inserted into the taxonomy tables. */ function calais_taxonomy($op, $type, $data) { if ($type == 'term' && ($op == 'insert' || $op == 'update') && isset($data['guid'])) { db_query("UPDATE {term_data} SET guid = '%s' WHERE tid = %d", $data['guid'], $data['tid']); } } function calais_assign_node_calaisterm($nid, $tid, $relevance = 0.0) { db_query("DELETE FROM {calais_term_node} where nid=%d and tid=%d", $nid, $tid); db_query("INSERT INTO {calais_term_node} (nid, tid, relevance) VALUES(%d, %d, %f)", $nid, $tid, $relevance); } function calais_assign_node_taxonomyterm($nid, $vid, $tid) { db_query("DELETE FROM {term_node} where nid=%d and vid=%d and tid=%d", $nid, $vid, $tid); db_query("INSERT INTO {term_node} (nid, vid, tid) VALUES(%d, %d, %d)", $nid, $vid, $tid); } /** * Determines which processing type (none, manual, auto) is * applicable to a node specific node instance passed as an argument. * * @param mixed $var either a Node object or a valid node_type String * * @return constant one of: CALAIS_PROCESS_NO, CALAIS_PROCESS_AUTO or CALAIS_PROCESS_MANUAL */ function calais_processing_type($var) { if (is_object($var)) { $nodetype = $var->type; } else { $nodetype = $var; } $key = drupal_strtolower($nodetype); return variable_get("calais_node_{$key}_process", CALAIS_PROCESS_NO); } /** * Return the Calais parameter settings for the specific node type. */ function calais_get_node_settings($node) { $key = drupal_strtolower($node->type); $settings = array( 'allowSearch' => variable_get("calais_api_allow_searching_{$key}", TRUE) ? 'true' : 'false', 'allowDistribution' => variable_get("calais_api_allow_distribution_{$key}", TRUE) ? 'true' : 'false', ); return $settings; } /** * Return the relevancy threshold settings for the specific node type. * * @param $node_type Node type. */ function calais_get_node_threshold($node_type) { $type = drupal_strtolower($node_type); return variable_get("calais_threshold_{$type}", 0.0); } /** * Get a list of the entities that Calais API defines: * http://opencalais.mashery.com/page/calaissemanticmetadata * * IF $type is not specified it will return all known Calais Entities. * however if $type is specified it will return only those Entities that the * specific node type is interested in, or the global list if no specific node type * settings have been configures. * * @param $type - A node type, if Entities need to be filtered. * * @return Associative array of [entity_name => vid]'s */ function calais_get_entity_vocabularies($type = NULL) { $all_vocabs = variable_get('calais_vocabulary_names', FALSE); if ($type) { $applied_entities = variable_get('calais_applied_entities_global', FALSE); if (!variable_get("calais_use_global_{$type}", TRUE)) { $applied_entities = variable_get("calais_applied_entities_{$type}", FALSE); } foreach ($applied_entities as $entity => $apply_entity) { if ($apply_entity) { $applied_vocabs[$entity] = $all_vocabs[$entity]; } } } else { $applied_vocabs = $all_vocabs; } return $applied_vocabs; } /** * Returns the Calais vocabularies that are enabled for this node type. *
Compare to: calais_api_get_all_entities() which gives entityname/vid pairs. * Vocabulary names can be updated by users. Entity names stay as defined * by Calais. * * @param $node_type The node type * * @return Array of vocabularies keyed on vid; */ function calais_get_vocabularies($type = NULL) { $vocs = calais_get_entity_vocabularies($type); if(empty($vocs)) return array(); $pattern = implode(",", $vocs); if ($type) { $result = db_query("SELECT v.vid, v.*, n.type FROM {vocabulary} v LEFT JOIN {vocabulary_node_types} n ON v.vid = n.vid WHERE n.type = '%s' and v.vid in (%s) ORDER BY v.weight, v.name", $type, $pattern); } else { $result = db_query('SELECT v.* FROM {vocabulary} v WHERE v.vid in (%s) ORDER BY v.weight, v.name', $pattern); } $vocabularies = array(); $node_types = array(); while ($voc = db_fetch_object($result)) { // If no node types are associated with a vocabulary, the LEFT JOIN will // return a NULL value for type. if (isset($voc->type)) { $node_types[$voc->vid][$voc->type] = $voc->type; unset($voc->type); $voc->nodes = $node_types[$voc->vid]; } elseif (!isset($voc->nodes)) { $voc->nodes = array(); } $vocabularies[$voc->vid] = $voc; } return $vocabularies; } /** * Returns a map of suggested terms for a given vocabulary id. This will return * suggested terms for all vocabularies if no vid is specified. * * @param $nid The node id to get the calais keywords * @param $type The node type * @param $vid Optional, a specific vocabulary id to return terms * @param $threshold * Optional, A relevance threshold for suggestions. If not specified the default for the node type will be used. * * @return Array { $vid => array('term1', 'term2', 'term3') } */ function calais_get_keywords($nid, $type, $vid = NULL, $threshold = NULL) { $terms = array(); if(!isset($threshold)) { $threshold = calais_get_node_threshold(drupal_strtolower($type)); } if ($vid) { $res = db_query(" SELECT name FROM {calais_term} t JOIN {calais_term_node} tn ON tn.tid=t.tid WHERE tn.nid=%d and t.vid=%d and tn.relevance >= %f ORDER BY name asc ", $nid, $vid, $threshold); $terms[$vid] = array(); while ($obj = db_fetch_object($res)) { $terms[$vid][] = $obj->name; } } else { $vocabularies = calais_get_entity_vocabularies(); foreach ($vocabularies as $vid) { $keys = calais_get_keywords($nid, $type, $vid, $threshold); $terms[$vid] = $keys[$vid]; } } return $terms; } /** * Get the CalaisTerm for the term name and node id. * NOTE: Columns were renamed here so that this object has the same attributes as CalaisTerm for consistency * * @param $nid The node id to limit the calais term look up * @param $term_name The term name to look up */ function calais_get_node_term($nid, $term_name) { $res = db_query(" SELECT t.name as value, t.guid, tn.relevance FROM {calais_term} t JOIN {calais_term_node} tn ON tn.tid=t.tid WHERE tn.nid=%d and t.name='%s'", $nid, $term_name); return db_fetch_object($res); } /** * Creates a new vocabulary for the supplied Calais entity name. */ function calais_create_entity_vocabulary($entity) { $description = t("Calais Entity Vocabulary: @name", array('@name' => $entity)); db_query("INSERT INTO {vocabulary} (name,description,module,tags) values('%s','%s','calais',1)", $entity, $description); return db_last_insert_id('vocabulary', 'vid'); } /** * Get the standard RDF options array. * * @param $nid The node id */ function calais_rdf_options($nid) { return array('graph' => "node/$nid/calais/rdf", 'repository' => CALAIS_RDF_REPOSITORY); } /** * Store the triples for this node in the local RDF store. Storing these allows for more * Semantic Web functionality to be developed later by not losing all of the rich metadata * returned by Calais. This could be used to batch process data that was missed originally * or by querying it later for use in RDFa rendering, etc. * * @param $node The node for association * @param $triples The flat set of triples */ function calais_store_rdf($node, $triples) { if(variable_get('calais_store_rdf', TRUE)) { calais_remove_rdf($node); rdf_insert_all($triples, calais_rdf_options($node->nid)); } } /** * Store the triples for this node in the local RDF store. Storing these allows for more * Semantic Web functionality to be developed later by not losing all of the rich metadata * returned by Calais. This could be used to batch process data that was missed originally * or by querying it later for use in RDFa rendering, etc. * * @param $node The node for association * @param $triples The flat set of triples */ function calais_remove_rdf($node) { if(variable_get('calais_store_rdf', TRUE)) { rdf_delete(NULL, NULL, NULL, calais_rdf_options($node->nid)); } } /** * Implementation of hook_views_api(). */ function calais_views_api() { return array( 'api' => 2.0, 'path' => drupal_get_path('module', 'calais') .'/views', ); } function ____debug($array, $exit = TRUE) { echo "
". print_r($array, TRUE) .""; if ($exit) exit(0); }