l(t('Calais API Settings'), 'admin/settings/calais-api') ); $requirements['calais-api'] = array('title' => $t('Calais API Key is not set'), 'description' => $t('Calais integration module is enabled, but Calais API Key is not set. Module will not function properly. Please obtain the key and and set it at !calaissetturi as soon as possible', $settings_uri), 'severity' => REQUIREMENT_ERROR); } $arc2 = class_exists('ARC2'); $requirements['calais-rdf'] = array( 'title' => $t('Calais RDF Parser'), 'value' => $arc2 ? $t('ARC2 @version is installed', array('@version' => ARC2::getVersion())) : $t('ARC2 Not installed'), 'description' => $arc2 ? '' : $t('ARC2 is not available. Please download the latest version of the library and install it in the RDF module. For more information please refer to the handbook.', array('@arc2' => 'http://arc.semsol.org/', '@download' => 'http://arc.semsol.org/download', '@handbook' => 'http://drupal.org/node/219852')), 'severity' => $arc2 ? REQUIREMENT_OK : REQUIREMENT_ERROR, ); } return $requirements; } /** * Implementation of hook_perm(). */ function calais_api_perm() { return array('administer calais api'); } /** * Implementation of hook_menu(). */ function calais_api_menu() { $items = array(); $items['admin/settings/calais'] = array( 'title' => 'Calais Configuration', 'description' => 'Configurations for Calais', 'page callback' => 'drupal_get_form', 'page arguments' => array('calais_api_admin_settings'), 'access arguments' => array('administer calais api'), ); $items['admin/settings/calais/calais-api'] = array( 'title' => 'Calais API Settings', 'description' => 'Configurations for Calais API', 'type' => MENU_DEFAULT_LOCAL_TASK, ); return $items; } /** * Build the admin settings form. */ function calais_api_admin_settings() { $form = array(); $calais_url = array( '!calaisurl' => l(t('Calais Website'), 'http://www.opencalais.com/user/register') ); $form['calais_api_key'] = array( '#type' => 'textfield', '#title' => t('Calais API Key'), '#default_value' => variable_get('calais_api_key', NULL), '#size' => 60, '#description' => t('You need to obtain an API Key from the !calaisurl first', $calais_url), ); $form['calais_api_allow_searching'] = array( '#type' => 'checkbox', '#title' => t('Allow Calais Searching'), '#default_value' => variable_get('calais_api_allow_searching', NULL), '#description' => t('Indicates whether future searches can be performed on the extracted metadata by Calais'), ); $form['calais_api_allow_distribution'] = array( '#type' => 'checkbox', '#title' => t('Allow Calais Distribution'), '#default_value' => variable_get('calais_api_allow_distribution', NULL), '#description' => t('Indicates whether the extracted metadata can be distributed by Calais'), ); $form = system_settings_form($form); return $form; } /** * Analyze the content via Calais. * * @param $content The content to ship off to Calais for analysis * @param $parameters Array of Calais parameters for overriding defaults. * @see calais_api_build_xml_params for applicable Parameter values. */ function calais_api_analyze($content, $parameters = array()) { $apikey = variable_get('calais_api_key', FALSE); if (empty($apikey)) { drupal_set_message(t('Calais semantic analysis skipped because Calais API Key is not set. Please configure it at: !link', array('!link' => l('admin/settings/calais','admin/settings/calais', array('attributes' => array('target' => '_blank')) ) ) ), $type = 'warning', $repeat = FALSE); return array(); } $HOST = "http://api.opencalais.com"; $URI = "/enlighten/calais.asmx/Enlighten"; $headers = array('Content-Type' => 'application/x-www-form-urlencoded'); $method = 'POST'; $data = array( 'licenseID' => variable_get('calais_api_key', NULL), 'content' => $content, 'paramsXML' => calais_api_xml_params($parameters), ); $keywords = array(); $data_enc = http_build_query($data, '', '&'); $ret = drupal_http_request($HOST . $URI, $headers, $method, $data_enc); if (isset($ret->error)) { $msg = t('Calais processing error: @msg', array('@msg' => $ret->data)); drupal_set_message($msg, 'error'); watchdog('calais', 'Calais processing error: (@code - @error) @msg', array('@code' => $ret->code, '@error' => $ret->error, '@msg' => $ret->data), WATCHDOG_ERROR); } else { $xml = $ret->data; $rdfxml = calais_api_extract_rdf($xml); $keywords = calais_api_parse_rdf($rdfxml); } return $keywords; } /** * Build the XML document request format expected by Calais * * @return an xml string to be submitted to Calais */ function calais_api_build_xml_request($title, $date, $body) { $req = ""; $req .= "$title"; $req .= "$date"; $req .= "$body"; $req .= ""; return $req; } /** * Get the XML Parameters required by the Calais Web-Service based on the node type. * * @return an XML Document */ function calais_api_xml_params($parameters) { $global_params = array( 'allowSearch' => variable_get('calais_api_allow_searching', FALSE) ? 'true' : 'false', 'allowDistribution' => variable_get('calais_api_allow_distribution', FALSE) ? 'true' : 'false', ); $options = array_merge($global_params, $parameters); return calais_api_build_xml_params($options); } /** * Build the XML Parameters required by the Calais Web-Service * * Valid parameters are specified in the options array as key/value pairs with the * parameter name being the key and the parameter setting being the value * e.g. array('allowSearch' => 'false') * * @return XML document of Calais parameters. * @see http://opencalais.mashery.com/page/documentation#inputparameters for valid parameters. */ function calais_api_build_xml_params($options) { $defaults = array( 'contentType' => 'TEXT/HTML', 'outputFormat' => 'XML/RDF', 'calculateRelevanceScore' => 'true', 'allowSearch' => 'false', 'allowDistribution' => 'false', 'externalID' => time(), 'submitter' => "Drupal Calais", ); $attributes = array_merge($defaults, $options); $ret = << EOD; return $ret; } /** * You need to understand how Calais responses are structured to parse its * RDF. * * Samle element: * [http://d.opencalais.com/pershash-1/fc0ac3c8-5d95-3625-bbd2-6e6fc3f1d703] => Array ( * 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => * array ( * 0 => 'http://s.opencalais.com/1/type/em/e/Person', * ), * 'http://s.opencalais.com/1/pred/name' => * array ( * 0 => 'Diana', * ) * ) */ function calais_api_parse_rdf($rdf_xml) { $parser = ARC2::getRDFXMLParser(); $parser->parse(NULL, $rdf_xml); $indexed_triples = $parser->getSimpleIndex(); $keywords = new stdClass(); foreach ($indexed_triples as $indx) { $is_keyword = FALSE; foreach ($indx as $key => $value) { if ($key == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') { $keyword_type = $value[0]; // take the last part only $keyword_type = preg_replace('/.*\//ims', '', $keyword_type); //$keyword_type = strtolower($keyword_type); // Value of keyword will come in the next iteration, sorry. $is_keyword = TRUE; } if ($is_keyword == TRUE && $key == 'http://s.opencalais.com/1/pred/name') { // nullify to avoid double-passes $is_keyword = FALSE; $keyword_value = $value[0]; if (!property_exists($keywords, $keyword_type)) { $keywords->$keyword_type = array(); } //Gotta do "eval" to be able to use $arr[] = $nealemenet notation; $keyword_value = db_escape_string($keyword_value); eval("\$keywords->$keyword_type"."[] = '$keyword_value';"); } } } return $keywords; } /** * For whatever reason Calais returns XML wrapped around the RDF, instead of * pure RDF-XML, so we need to remove it. * * @return unknown */ function calais_api_extract_rdf($data) { $xml = html_entity_decode($data); $success = preg_match('//ims', $xml, $matches); $rdfxml = $matches[0]; return $rdfxml; } /** * Get a list of the entities that Calais API defines: * http://opencalais.mashery.com/page/calaissemanticmetadata * * TODO: When Calais updates to have a static list at a URL or via API call, return that instead. * * @return flat array listing of Calais entities */ function calais_api_get_all_entities() { return array( 'Anniversary', 'City', 'Company', 'Continent', 'Country', 'Currency', 'EmailAddress', 'EntertainmentAwardEvent', 'Facility', 'FaxNumber', 'Holiday', 'IndustryTerm', 'MedicalCondition', 'Movie', 'MusicAlbum', 'MusicGroup', 'NaturalDisaster', 'NaturalFeature', 'Organization', 'Person', 'PhoneNumber', 'ProvinceOrState', 'PublishedMedium', 'Region', 'SportsEvent', 'SportsGame', 'Technology', 'TVShow', 'URL', ); }