l(t('Calais API Settings'), 'admin/settings/calais') ); $requirements['drupal'] = array('title' => $t('Calais API Key is not set'), 'description' => $t('Calais integration module is enabled, but Calais API Key is not set. Module will not function properly. Please obtain the key and and set it at !calaissetturi as soon as possible', $settings_uri), 'severity' => REQUIREMENT_ERROR); } } return $requirements; } /** * Implementation of hook_perm(). */ function calais_api_perm() { return array('administer calais api'); } /** * Implementation of hook_menu(). */ function calais_api_menu($may_cache) { $items = array(); if ($may_cache) { $items[] = array( 'path' => 'admin/settings/calais', 'title' => t('Calais Settings'), 'description' => t('Configurations for Calais'), 'callback' => 'drupal_get_form', 'callback arguments' => array('calais_api_admin_settings'), 'access' => user_access('administer calais api'), ); $items[] = array( 'path' => 'admin/settings/calais/calais-api', 'title' => t('Calais API Settings'), 'description' => t('Configurations for Calais API'), 'type' => MENU_DEFAULT_LOCAL_TASK, ); $items[] = array( 'path' => 'calais/tester', 'title' => t('Calais Test'), 'description' => t('Calais Call Test'), 'callback' => 'calais_call_test', 'type' => MENU_CALLBACK, 'access' => TRUE, ); } return $items; } /** * Build the admin settings form. */ function calais_api_admin_settings() { $form = array(); $calais_url = array( '!calaisurl' => l(t('Calais Website'), 'http://www.opencalais.com/member/register') ); $form['calais_api_key'] = array( '#type' => 'textfield', '#title' => t('Calais API Key'), '#default_value' => variable_get('calais_api_key', NULL), '#size' => 60, '#description' => t('You need to obtain an API Key from the !calaisurl first', $calais_url), ); $form['calais_api_allow_searching'] = array( '#type' => 'checkbox', '#title' => t('Allow Calais Searching'), '#default_value' => variable_get('calais_api_allow_searching', NULL), '#description' => t('Indicates whether future searches can be performed on the extracted metadata by Calais'), ); $form['calais_api_allow_distribution'] = array( '#type' => 'checkbox', '#title' => t('Allow Calais Distribution'), '#default_value' => variable_get('calais_api_allow_distribution', NULL), '#description' => t('Indicates whether the extracted metadata can be distributed by Calais'), ); $form = system_settings_form($form); return $form; } /** * Analyze the content via Calais. * * @param $content The content to ship off to Calais for analysis * @param $node_type The node type for this content, if applicable. * @param $parameters Array of Calais parameters for overriding defaults. * @see calais_api_build_xml_params for applicable Parameter values. */ function calais_api_analyze($node, $content, $parameters = array()) { $HOST = "http://api.opencalais.com"; $URI = "/enlighten/calais.asmx/Enlighten"; $headers = array('Content-Type' => 'application/x-www-form-urlencoded'); $method = 'POST'; $data = array( 'licenseID' => variable_get('calais_api_key', NULL), 'content' => $content, 'paramsXML' => calais_api_xml_params($node->type, $parameters), ); $keywords = array(); $data_enc = http_build_query($data, '', '&'); $ret = drupal_http_request($HOST . $URI, $headers, $method, $data_enc); if (isset($ret->error)) { $msg = t('Calais processing error: @msg', array('@msg' => $ret->data)); $watchdog = t('Calais processing error: (@code - @error) @msg', array('@code' => $ret->code, '@error' => $ret->error, '@msg' => $ret->data)); drupal_set_message($msg, 'error'); watchdog('calais', $watchdog, WATCHDOG_ERROR); } else { $xml = $ret->data; $rdfxml = calais_api_extract_rdf($xml); //Allow Alternative Implementations of RDF Parsers $rdf_parsers = module_invoke_all('calais_parse_rdf', $rdfxml); // If no suitable implementation returns an object, use // the default (ours) ARC2 implementation // @TODO: allow for several non-overriding implementations. if (!is_array($rdf_parsers) || sizeof($rdf_parsers) == 0) { $keywords = calais_api_parse_rdf($rdfxml); } else { // Use the first one. $keywords = $rdf_parsers[0]; } } return $keywords; } /** * Get the XML Parameters required by the Calais Web-Service based on the node type. * * @return an XML Document */ function calais_api_xml_params($node_type, $parameters) { $global_params = array( 'allowSearch' => variable_get('calais_api_allow_searching', false) ? 'true' : 'false', 'allowDistribution' => variable_get('calais_api_allow_distribution', false) ? 'true' : 'false', ); $options = array_merge($global_params, $parameters); return calais_api_build_xml_params($options); } /** * Build the XML Parameters required by the Calais Web-Service * * Valid parameters are specified in the options array as key/value pairs with the * parameter name being the key and the parameter setting being the value * e.g. array('allowSearch' => 'false') * * @return XML document of Calais parameters. * @see http://opencalais.mashery.com/page/documentation#inputparameters for valid parameters. */ function calais_api_build_xml_params($options) { $defaults = array( 'contentType' => 'TEXT/HTML', 'outputFormat' => 'XML/RDF', 'allowSearch' => 'false', 'allowDistribution' => 'false', 'externalID' => time(), 'submitter' => "Drupal Calais", ); $attributes = array_merge($defaults, $options); $ret = << EOD; return $ret; } /** * You need to understand how Calais responses are structured to parse its * RDF. * * Samle element: * [http://d.opencalais.com/pershash-1/fc0ac3c8-5d95-3625-bbd2-6e6fc3f1d703] => Array ( * 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => * array ( * 0 => 'http://s.opencalais.com/1/type/em/e/Person', * ), * 'http://s.opencalais.com/1/pred/name' => * array ( * 0 => 'Diana', * ) * ) */ function calais_api_parse_rdf($rdfxml) { $parser = ARC2::getRDFXMLParser(); $parser->parse(null, $rdfxml); $indexedTriples = $parser->getSimpleIndex(); $keywords = new StdClass(); foreach ($indexedTriples as $indx) { $isKeyword = false; foreach ($indx as $key => $value) { if ($key == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') { $keywordType = $value[0]; // take the last part only $keywordType = preg_replace('/.*\//ims', '', $keywordType); //$keywordType = strtolower($keywordType); // Value of keyword will come in the next iteration, sorry. $isKeyword = True; } if ($isKeyword == true && $key == 'http://s.opencalais.com/1/pred/name') { // nullify to avoid double-passes $isKeyword = False; $keywordValue = $value[0]; if (!is_array($keywords->$keywordType)) { $keywords->$keywordType = array(); } //Gotta do "eval" to be able to use $arr[] = $nealemenet notation; $keywordValue = db_escape_string($keywordValue); eval("\$keywords->$keywordType". "[] = '$keywordValue';"); } } } return $keywords; } /** * For whatever reason Calais returns XML wrapped around the RDF, instead of * pure RDF-XML, so we need to remove it. * * @return unknown */ function calais_api_extract_rdf($data) { $xml = html_entity_decode($data); $success = preg_match('//ims', $xml, $matches); $rdfxml = $matches[0]; return $rdfxml; } /** * Get a list of the entities that Calais API defines: * http://opencalais.mashery.com/page/calaissemanticmetadata * * TODO: When Calais updates to have a static list at a URL or via API call, return that instead. * * @return flat array listing of Calais entities */ function calais_api_get_all_entities() { return array( 'Anniversary', 'City', 'Company', 'Continent', 'Country', 'Currency', 'EmailAddress', 'Facility', 'FaxNumber', 'Holiday', 'IndustryTerm', 'MedicalCondition', 'NaturalDisaster', 'NaturalFeature', 'Organization', 'Person', 'PhoneNumber', 'ProvinceOrState', 'PublishedMedium', 'Region', 'SportsGame', 'Technology', 'URL', ); }