Merge pull request GoogleCloudPlatform#793 from GoogleCloudPlatform/analyze_all

gogasca · web-flow · commit a8ea24aa23f5 · 2018-12-17T15:12:54.000-08:00
[analyze_all] Migrate to new client LanguageServiceClient in 0.16.2
diff --git a/language/language.php b/language/language.php
@@ -79,7 +79,7 @@
         // Regex to match a Cloud Storage path as the first argument
         // e.g "gs://my-bucket/file_with_text.txt"
         if (preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $content, $matches)) {
-            analyze_all_from_file($matches[1], $matches[2], $projectId);
+            analyze_all_from_file($matches[0], $projectId);
         } else {
             analyze_all($content, $projectId);
         }
diff --git a/language/src/analyze_all.php b/language/src/analyze_all.php
@@ -24,7 +24,9 @@
 # [START analyze_all]
 namespace Google\Cloud\Samples\Language;
 
-use Google\Cloud\Language\LanguageClient;
+use Google\Cloud\Language\V1beta2\AnnotateTextRequest\Features;
+use Google\Cloud\Language\V1beta2\Document;
+use Google\Cloud\Language\V1beta2\LanguageServiceClient;
 
 /**
  * Find the everything in text.
@@ -39,58 +41,107 @@
 function analyze_all($text, $projectId = null)
 {
     // Create the Natural Language client
-    $language = new LanguageClient([
-        'projectId' => $projectId,
-    ]);
+    $languageServiceClient = new LanguageServiceClient(['projectId' => $projectId]);
 
-    // Call the annotateText function
-    $annotation = $language->annotateText($text, [
-        'features' => ['entities', 'syntax', 'sentiment']
-    ]);
+    // Entities, Mention and Tags mappings
+    $entity_types = [
+        0 => 'UNKNOWN',
+        1 => 'PERSON',
+        2 => 'LOCATION',
+        3 => 'ORGANIZATION',
+        4 => 'EVENT',
+        5 => 'WORK_OF_ART',
+        6 => 'CONSUMER_GOOD',
+        7 => 'OTHER',
+    ];
 
-    // Print out information about each entity
-    $entities = $annotation->entities();
-    foreach ($entities as $entity) {
-        printf('Name: %s' . PHP_EOL, $entity['name']);
-        printf('Type: %s' . PHP_EOL, $entity['type']);
-        printf('Salience: %s' . PHP_EOL, $entity['salience']);
-        if (array_key_exists('wikipedia_url', $entity['metadata'])) {
-            printf('Wikipedia URL: %s' . PHP_EOL, $entity['metadata']['wikipedia_url']);
+    $mention_type = [
+        0 => 'TYPE_UNKNOWN',
+        1 => 'PROPER',
+        2 => 'COMMON',
+    ];
+
+    $tag_types = [
+        0 => 'UNKNOWN',
+        1 => 'ADJ',
+        2 => 'ADP',
+        3 => 'ADV',
+        4 => 'CONJ',
+        5 => 'DET',
+        6 => 'NOUN',
+        7 => 'NUM',
+        8 => 'PRON',
+        9 => 'PRT',
+        10 => 'PUNCT',
+        11 => 'VERB',
+        12 => 'X',
+        13 => 'AFFIX',
+    ];
+
+    try {
+        // Create a new Document
+        $document = new Document();
+        // Pass GCS URI and set document type to PLAIN_TEXT
+        $document->setContent($text)->setType(1);
+        // Define features we need to extract.
+        $features = new Features();
+        // Set Features to extract ['entities', 'syntax', 'sentiment']
+        $features->setExtractEntities(true);
+        $features->setExtractSyntax(true);
+        $features->setExtractDocumentSentiment(true);
+        // Collect annotations
+        $response = $languageServiceClient->annotateText($document, $features);
+        // Process Entities
+        $entities = $response->getEntities();
+        foreach ($entities as $entity) {
+            printf('Name: %s' . PHP_EOL, $entity->getName());
+            printf('Type: %s' . PHP_EOL, $entity_types[$entity->getType()]);
+            printf('Salience: %s' . PHP_EOL, $entity->getSalience());
+            if($entity->getMetadata()->offsetExists('wikipedia_url')) {
+                printf('Wikipedia URL: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('wikipedia_url'));
+            }
+            if($entity->getMetadata()->offsetExists('mid')) {
+                printf('Knowledge Graph MID: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('mid'));
+            }
+            printf('Mentions:' . PHP_EOL);
+            foreach ($entity->getMentions() as $mention) {
+                printf('  Begin Offset: %s' . PHP_EOL, $mention->getText()->getBeginOffset());
+                printf('  Content: %s' . PHP_EOL, $mention->getText()->getContent());
+                printf('  Mention Type: %s' . PHP_EOL, $mention_type[$mention->getType()]);
+                printf(PHP_EOL);
+            }
+            printf(PHP_EOL);
         }
-        if (array_key_exists('mid', $entity['metadata'])) {
-            printf('Knowledge Graph MID: %s' . PHP_EOL, $entity['metadata']['mid']);
+        // Process Sentiment
+        $document_sentiment = $response->getDocumentSentiment();
+        // Print document information
+        printf('Document Sentiment:' . PHP_EOL);
+        printf('  Magnitude: %s' . PHP_EOL, $document_sentiment->getMagnitude());
+        printf('  Score: %s' . PHP_EOL, $document_sentiment->getScore());
+        printf(PHP_EOL);
+        $sentences = $response->getSentences();
+        foreach ($sentences as $sentence) {
+            printf('Sentence: %s' . PHP_EOL, $sentence->getText()->getContent());
+            printf('Sentence Sentiment:' . PHP_EOL);
+            $sentiment = $sentence->getSentiment();
+            if ($sentiment) {
+                printf('Entity Magnitude: %s' . PHP_EOL, $sentiment->getMagnitude());
+                printf('Entity Score: %s' . PHP_EOL, $sentiment->getScore());
+            }
+            printf(PHP_EOL);
         }
-        printf('Mentions:' . PHP_EOL);
-        foreach ($entity['mentions'] as $mention) {
-            printf('  Begin Offset: %s' . PHP_EOL, $mention['text']['beginOffset']);
-            printf('  Content: %s' . PHP_EOL, $mention['text']['content']);
-            printf('  Mention Type: %s' . PHP_EOL, $mention['type']);
+        // Process Syntax
+        $tokens = $response->getTokens();
+        // Print out information about each entity
+        foreach ($tokens as $token) {
+            printf('Token text: %s' . PHP_EOL, $token->getText()->getContent());
+            printf('Token part of speech: %s' . PHP_EOL, $tag_types[$token->getPartOfSpeech()->getTag()]);
             printf(PHP_EOL);
         }
-        printf(PHP_EOL);
-    }
 
-    // Print document and sentence sentiment information
-    $sentiment = $annotation->sentiment();
-    printf('Document Sentiment:' . PHP_EOL);
-    printf('  Magnitude: %s' . PHP_EOL, $sentiment['magnitude']);
-    printf('  Score: %s' . PHP_EOL, $sentiment['score']);
-    printf(PHP_EOL);
-    foreach ($annotation->sentences() as $sentence) {
-        printf('Sentence: %s' . PHP_EOL, $sentence['text']['content']);
-        printf('Sentence Sentiment:' . PHP_EOL);
-        printf('  Magnitude: %s' . PHP_EOL, $sentence['sentiment']['magnitude']);
-        printf('  Score: %s' . PHP_EOL, $sentence['sentiment']['score']);
-        printf(PHP_EOL);
+    } finally {
+        $languageServiceClient->close();
     }
 
-    // Print syntax information. See https://cloud.google.com/natural-language/docs/reference/rest/v1/Token
-    // to learn about more information you can extract from Token objects.
-    $tokens = $annotation->tokens();
-    foreach ($tokens as $token) {
-        printf('Token text: %s' . PHP_EOL, $token['text']['content']);
-        printf('Token part of speech: %s' . PHP_EOL, $token['partOfSpeech']['tag']);
-        printf(PHP_EOL);
-    }
 }
 # [END analyze_all]
diff --git a/language/src/analyze_all_from_file.php b/language/src/analyze_all_from_file.php
@@ -24,80 +24,123 @@
 # [START analyze_all_from_file]
 namespace Google\Cloud\Samples\Language;
 
-use Google\Cloud\Language\LanguageClient;
-use Google\Cloud\Storage\StorageClient;
+use Google\Cloud\Language\V1beta2\AnnotateTextRequest\Features;
+use Google\Cloud\Language\V1beta2\Document;
+use Google\Cloud\Language\V1beta2\LanguageServiceClient;
 
 /**
  * Find the everything in text stored in a Cloud Storage bucket.
  * ```
  * analyze_all_from_file('my-bucket', 'file_with_text.txt');;
  * ```
  *
- * @param string $bucketName The Cloud Storage bucket.
- * @param string $objectName The Cloud Storage object with text.
+ * @param string $cloud_storage_uri Your Cloud Storage bucket URI
  * @param string $projectId (optional) Your Google Cloud Project ID
  *
  */
-function analyze_all_from_file($bucketName, $objectName, $projectId = null)
+function analyze_all_from_file($gcsUri, $projectId = null)
 {
-    // Create the Cloud Storage object
-    $storage = new StorageClient();
-    $bucket = $storage->bucket($bucketName);
-    $storageObject = $bucket->object($objectName);
-
     // Create the Natural Language client
-    $language = new LanguageClient([
-        'projectId' => $projectId,
-    ]);
+    $languageServiceClient = new LanguageServiceClient(['projectId' => $projectId]);
+
+    // Entities, Mention and Tags mappings
+    $entity_types = [
+        0 => 'UNKNOWN',
+        1 => 'PERSON',
+        2 => 'LOCATION',
+        3 => 'ORGANIZATION',
+        4 => 'EVENT',
+        5 => 'WORK_OF_ART',
+        6 => 'CONSUMER_GOOD',
+        7 => 'OTHER',
+    ];
+
+    $mention_type = [
+        0 => 'TYPE_UNKNOWN',
+        1 => 'PROPER',
+        2 => 'COMMON',
+    ];
 
-    // Call the annotateText function
-    $annotation = $language->annotateText($storageObject, [
-        'features' => ['entities', 'syntax', 'sentiment']
-    ]);
+    $tag_types = [
+        0 => 'UNKNOWN',
+        1 => 'ADJ',
+        2 => 'ADP',
+        3 => 'ADV',
+        4 => 'CONJ',
+        5 => 'DET',
+        6 => 'NOUN',
+        7 => 'NUM',
+        8 => 'PRON',
+        9 => 'PRT',
+        10 => 'PUNCT',
+        11 => 'VERB',
+        12 => 'X',
+        13 => 'AFFIX',
+    ];
 
-    // Print out information about each entity
-    $entities = $annotation->entities();
-    foreach ($entities as $entity) {
-        printf('Name: %s' . PHP_EOL, $entity['name']);
-        printf('Type: %s' . PHP_EOL, $entity['type']);
-        printf('Salience: %s' . PHP_EOL, $entity['salience']);
-        if (array_key_exists('wikipedia_url', $entity['metadata'])) {
-            printf('Wikipedia URL: %s' . PHP_EOL, $entity['metadata']['wikipedia_url']);
+    try {
+        // Create a new Document
+        $document = new Document();
+        // Pass GCS URI and set document type to PLAIN_TEXT
+        $document->setGcsContentUri($gcsUri)->setType(1);
+        // Define features we need to extract.
+        $features = new Features();
+        // Set Features to extract ['entities', 'syntax', 'sentiment']
+        $features->setExtractEntities(true);
+        $features->setExtractSyntax(true);
+        $features->setExtractDocumentSentiment(true);
+        // Collect annotations
+        $response = $languageServiceClient->annotateText($document, $features);
+        // Process Entities
+        $entities = $response->getEntities();
+        foreach ($entities as $entity) {
+            printf('Name: %s' . PHP_EOL, $entity->getName());
+            printf('Type: %s' . PHP_EOL, $entity_types[$entity->getType()]);
+            printf('Salience: %s' . PHP_EOL, $entity->getSalience());
+            if($entity->getMetadata()->offsetExists('wikipedia_url')) {
+                printf('Wikipedia URL: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('wikipedia_url'));
+            }
+            if($entity->getMetadata()->offsetExists('mid')) {
+                printf('Knowledge Graph MID: %s' . PHP_EOL, $entity->getMetadata()->offsetGet('mid'));
+            }
+            printf('Mentions:' . PHP_EOL);
+            foreach ($entity->getMentions() as $mention) {
+                printf('  Begin Offset: %s' . PHP_EOL, $mention->getText()->getBeginOffset());
+                printf('  Content: %s' . PHP_EOL, $mention->getText()->getContent());
+                printf('  Mention Type: %s' . PHP_EOL, $mention_type[$mention->getType()]);
+                printf(PHP_EOL);
+            }
+            printf(PHP_EOL);
         }
-        if (array_key_exists('mid', $entity['metadata'])) {
-            printf('Knowledge Graph MID: %s' . PHP_EOL, $entity['metadata']['mid']);
+        // Process Sentiment
+        $document_sentiment = $response->getDocumentSentiment();
+        // Print document information
+        printf('Document Sentiment:' . PHP_EOL);
+        printf('  Magnitude: %s' . PHP_EOL, $document_sentiment->getMagnitude());
+        printf('  Score: %s' . PHP_EOL, $document_sentiment->getScore());
+        printf(PHP_EOL);
+        $sentences = $response->getSentences();
+        foreach ($sentences as $sentence) {
+            printf('Sentence: %s' . PHP_EOL, $sentence->getText()->getContent());
+            printf('Sentence Sentiment:' . PHP_EOL);
+            $sentiment = $sentence->getSentiment();
+            if ($sentiment) {
+                printf('Entity Magnitude: %s' . PHP_EOL, $sentiment->getMagnitude());
+                printf('Entity Score: %s' . PHP_EOL, $sentiment->getScore());
+            }
+            printf(PHP_EOL);
         }
-        printf('Mentions:' . PHP_EOL);
-        foreach ($entity['mentions'] as $mention) {
-            printf('  Begin Offset: %s' . PHP_EOL, $mention['text']['beginOffset']);
-            printf('  Content: %s' . PHP_EOL, $mention['text']['content']);
-            printf('  Mention Type: %s' . PHP_EOL, $mention['type']);
+        // Process Syntax
+        $tokens = $response->getTokens();
+        // Print out information about each entity
+        foreach ($tokens as $token) {
+            printf('Token text: %s' . PHP_EOL, $token->getText()->getContent());
+            printf('Token part of speech: %s' . PHP_EOL, $tag_types[$token->getPartOfSpeech()->getTag()]);
             printf(PHP_EOL);
         }
-        printf(PHP_EOL);
-    }
-
-    // Print document and sentence sentiment information
-    $sentiment = $annotation->sentiment();
-    printf('Document Sentiment:' . PHP_EOL);
-    printf('  Magnitude: %s' . PHP_EOL, $sentiment['magnitude']);
-    printf('  Score: %s' . PHP_EOL, $sentiment['score']);
-    printf(PHP_EOL);
-    foreach ($annotation->sentences() as $sentence) {
-        printf('Sentence: %s' . PHP_EOL, $sentence['text']['content']);
-        printf('Sentence Sentiment:' . PHP_EOL);
-        printf('  Magnitude: %s' . PHP_EOL, $sentence['sentiment']['magnitude']);
-        printf('  Score: %s' . PHP_EOL, $sentence['sentiment']['score']);
-        printf(PHP_EOL);
-    }
 
-    // Print syntax information. See https://cloud.google.com/natural-language/docs/reference/rest/v1/Token
-    // to learn about more information you can extract from Token objects.
-    $tokens = $annotation->tokens();
-    foreach ($tokens as $token) {
-        printf('Token text: %s' . PHP_EOL, $token['text']['content']);
-        printf('Token part of speech: %s' . PHP_EOL, $token['partOfSpeech']['tag']);
-        printf(PHP_EOL);
+    } finally {
+        $languageServiceClient->close();
     }
 }
 # [END analyze_all_from_file]

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,7 @@`
`79`	`79`	`// Regex to match a Cloud Storage path as the first argument`
`80`	`80`	`// e.g "gs://my-bucket/file_with_text.txt"`
`81`	`81`	`if (preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $content, $matches)) {`
`82`		`- analyze_all_from_file($matches[1], $matches[2], $projectId);`
	`82`	`+ analyze_all_from_file($matches[0], $projectId);`
`83`	`83`	`} else {`
`84`	`84`	`analyze_all($content, $projectId);`
`85`	`85`	`}`