Changing cloud speech code samples to work with v1beta1 (GoogleCloudPlatform#399)

xinjiez · Jon Wayne Parrott · commit bfef34d64e39 · 2016-07-01T14:21:36.000-07:00
diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py
diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt
@@ -1,4 +1,4 @@
 gcloud==0.17.0
 grpcio==0.14.0
 PyAudio==0.2.9
-grpc-google-cloud-speech==1.0.4
+grpc-google-cloud-speech-v1beta1==1.0.0
diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py
@@ -18,7 +18,7 @@
 import argparse
 
 from gcloud.credentials import get_credentials
-from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
+from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
 from grpc.beta import implementations
 
 # Keep the request alive for this many seconds
@@ -48,25 +48,23 @@ def make_channel(host, port):
     return implementations.secure_channel(host, port, composite_channel)
 
 
-def main(input_uri, output_uri, encoding, sample_rate):
+def main(input_uri, encoding, sample_rate):
     service = cloud_speech.beta_create_Speech_stub(
             make_channel('speech.googleapis.com', 443))
     # The method and parameters can be inferred from the proto from which the
     # grpc client lib was generated. See:
-    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
-    response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
-        initial_request=cloud_speech.InitialRecognizeRequest(
+    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
+    response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
+        config=cloud_speech.RecognitionConfig(
             encoding=encoding,
             sample_rate=sample_rate,
-            output_uri=output_uri,
         ),
-        audio_request=cloud_speech.AudioRequest(
+        audio=cloud_speech.RecognitionAudio(
             uri=input_uri,
         )
     ), DEADLINE_SECS)
-    # This shouldn't actually print anything, since the transcription is output
-    # to the GCS uri specified
-    print(response.responses)
+    # Print the recognition results.
+    print(response.results)
 
 
 def _gcs_uri(text):
@@ -77,16 +75,15 @@ def _gcs_uri(text):
 
 
 PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
-             'google/cloud/speech/v1/cloud_speech.proto')
+             'google/cloud/speech/v1beta1/cloud_speech.proto')
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('input_uri', type=_gcs_uri)
-    parser.add_argument('output_uri', type=_gcs_uri)
     parser.add_argument(
         '--encoding', default='FLAC', choices=[
             'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
         help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
     parser.add_argument('--sample_rate', default=16000)
 
     args = parser.parse_args()
-    main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
+    main(args.input_uri, args.encoding, args.sample_rate)
diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py
@@ -11,6 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import re
 import sys
 
 import pytest
@@ -24,12 +25,11 @@
                 'https://github.com/grpc/grpc/issues/282'))
 def test_main(cloud_config, capsys):
     input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
-    output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)
 
-    main(input_uri, output_uri, 'FLAC', 16000)
+    main(input_uri, 'FLAC', 16000)
 
     out, err = capsys.readouterr()
-    assert '[]\n' == out
+    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
 
 
 def test_gcs_uri():
diff --git a/speech/api/speech_rest.py b/speech/api/speech_rest.py
@@ -40,7 +40,7 @@ def get_speech_service():
     credentials.authorize(http)
 
     return discovery.build(
-        'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
+        'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
 # [END authenticating]
 
 
@@ -57,13 +57,13 @@ def main(speech_file):
         speech_content = base64.b64encode(speech.read())
 
     service = get_speech_service()
-    service_request = service.speech().recognize(
+    service_request = service.speech().syncrecognize(
         body={
-            'initialRequest': {
+            'config': {
                 'encoding': 'LINEAR16',
                 'sampleRate': 16000
             },
-            'audioRequest': {
+            'audio': {
                 'content': speech_content.decode('UTF-8')
                 }
             })
diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py
@@ -14,20 +14,22 @@
 # limitations under the License.
 """Sample that streams audio to the Google Cloud Speech API via GRPC."""
 
+from __future__ import division
+
 import contextlib
 import re
 import threading
 
 from gcloud.credentials import get_credentials
-from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
+from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
 from google.rpc import code_pb2
 from grpc.beta import implementations
 import pyaudio
 
 # Audio recording parameters
 RATE = 16000
 CHANNELS = 1
-CHUNK = RATE // 10  # 100ms
+CHUNK = int(RATE / 10)  # 100ms
 
 # Keep the request alive for this many seconds
 DEADLINE_SECS = 8 * 60 * 60
@@ -43,15 +45,15 @@ def make_channel(host, port):
     creds = get_credentials().create_scoped([SPEECH_SCOPE])
     # Add a plugin to inject the creds into the header
     auth_header = (
-            'Authorization',
-            'Bearer ' + creds.get_access_token().access_token)
+        'Authorization',
+        'Bearer ' + creds.get_access_token().access_token)
     auth_plugin = implementations.metadata_call_credentials(
-            lambda _, cb: cb([auth_header], None),
-            name='google_creds')
+        lambda _, cb: cb([auth_header], None),
+        name='google_creds')
 
     # compose the two together for both ssl and google auth
     composite_channel = implementations.composite_channel_credentials(
-            ssl_channel, auth_plugin)
+        ssl_channel, auth_plugin)
 
     return implementations.secure_channel(host, port, composite_channel)
 
@@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):
 
 
 def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
-    """Yields `RecognizeRequest`s constructed from a recording audio stream.
+    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
+    stream.
 
     Args:
         stop_audio: A threading.Event object stops the recording when set.
         channels: How many audio channels to record.
         rate: The sampling rate.
         chunk: Buffer audio into chunks of this size before sending to the api.
     """
-    with record_audio(channels, rate, chunk) as audio_stream:
-        # The initial request must contain metadata about the stream, so the
-        # server knows how to interpret it.
-        metadata = cloud_speech.InitialRecognizeRequest(
-            encoding='LINEAR16', sample_rate=rate,
-            # Note that setting interim_results to True means that you'll
-            # likely get multiple results for the same bit of audio, as the
-            # system re-interprets audio in the context of subsequent audio.
-            # However, this will give us quick results without having to tell
-            # the server when to finalize a piece of audio.
-            interim_results=True, continuous=False,
-        )
-        data = audio_stream.read(chunk)
-        audio_request = cloud_speech.AudioRequest(content=data)
-
-        yield cloud_speech.RecognizeRequest(
-            initial_request=metadata,
-            audio_request=audio_request)
+    # The initial request must contain metadata about the stream, so the
+    # server knows how to interpret it.
+    recognition_config = cloud_speech.RecognitionConfig(
+        encoding='LINEAR16', sample_rate=rate)
+    streaming_config = cloud_speech.StreamingRecognitionConfig(
+        config=recognition_config,
+        # Note that setting interim_results to True means that you'll likely
+        # get multiple results for the same bit of audio, as the system
+        # re-interprets audio in the context of subsequent audio. However, this
+        # will give us quick results without having to tell the server when to
+        # finalize a piece of audio.
+        interim_results=True, single_utterance=True
+    )
+
+    yield cloud_speech.StreamingRecognizeRequest(
+        streaming_config=streaming_config)
 
+    with record_audio(channels, rate, chunk) as audio_stream:
         while not stop_audio.is_set():
             data = audio_stream.read(chunk)
             if not data:
                 raise StopIteration()
-            # Subsequent requests can all just have the content
-            audio_request = cloud_speech.AudioRequest(content=data)
 
-            yield cloud_speech.RecognizeRequest(audio_request=audio_request)
+            # Subsequent requests can all just have the content
+            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
 
 
 def listen_print_loop(recognize_stream):
@@ -136,7 +137,8 @@ def main():
             make_channel('speech.googleapis.com', 443)) as service:
         try:
             listen_print_loop(
-                service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
+                service.StreamingRecognize(
+                    request_stream(stop_audio), DEADLINE_SECS))
         finally:
             # Stop the request stream once we're done with the loop - otherwise
             # it'll keep going in the thread that the grpc lib makes for it..