Skip to content

Commit bfef34d

Browse files
xinjiezJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Changing cloud speech code samples to work with v1beta1 (GoogleCloudPlatform#399)
1 parent 15b7063 commit bfef34d

File tree

6 files changed

+50
-51
lines changed

6 files changed

+50
-51
lines changed

speech/api/grpc_auth.py

Whitespace-only changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
gcloud==0.17.0
22
grpcio==0.14.0
33
PyAudio==0.2.9
4-
grpc-google-cloud-speech==1.0.4
4+
grpc-google-cloud-speech-v1beta1==1.0.0

speech/api/speech_gcs.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import argparse
1919

2020
from gcloud.credentials import get_credentials
21-
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
21+
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
2222
from grpc.beta import implementations
2323

2424
# Keep the request alive for this many seconds
@@ -48,25 +48,23 @@ def make_channel(host, port):
4848
return implementations.secure_channel(host, port, composite_channel)
4949

5050

51-
def main(input_uri, output_uri, encoding, sample_rate):
51+
def main(input_uri, encoding, sample_rate):
5252
service = cloud_speech.beta_create_Speech_stub(
5353
make_channel('speech.googleapis.com', 443))
5454
# The method and parameters can be inferred from the proto from which the
5555
# grpc client lib was generated. See:
56-
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
57-
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
58-
initial_request=cloud_speech.InitialRecognizeRequest(
56+
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
57+
response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
58+
config=cloud_speech.RecognitionConfig(
5959
encoding=encoding,
6060
sample_rate=sample_rate,
61-
output_uri=output_uri,
6261
),
63-
audio_request=cloud_speech.AudioRequest(
62+
audio=cloud_speech.RecognitionAudio(
6463
uri=input_uri,
6564
)
6665
), DEADLINE_SECS)
67-
# This shouldn't actually print anything, since the transcription is output
68-
# to the GCS uri specified
69-
print(response.responses)
66+
# Print the recognition results.
67+
print(response.results)
7068

7169

7270
def _gcs_uri(text):
@@ -77,16 +75,15 @@ def _gcs_uri(text):
7775

7876

7977
PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
80-
'google/cloud/speech/v1/cloud_speech.proto')
78+
'google/cloud/speech/v1beta1/cloud_speech.proto')
8179
if __name__ == '__main__':
8280
parser = argparse.ArgumentParser()
8381
parser.add_argument('input_uri', type=_gcs_uri)
84-
parser.add_argument('output_uri', type=_gcs_uri)
8582
parser.add_argument(
8683
'--encoding', default='FLAC', choices=[
8784
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
8885
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
8986
parser.add_argument('--sample_rate', default=16000)
9087

9188
args = parser.parse_args()
92-
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
89+
main(args.input_uri, args.encoding, args.sample_rate)

speech/api/speech_gcs_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14+
import re
1415
import sys
1516

1617
import pytest
@@ -24,12 +25,11 @@
2425
'https://github.com/grpc/grpc/issues/282'))
2526
def test_main(cloud_config, capsys):
2627
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
27-
output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)
2828

29-
main(input_uri, output_uri, 'FLAC', 16000)
29+
main(input_uri, 'FLAC', 16000)
3030

3131
out, err = capsys.readouterr()
32-
assert '[]\n' == out
32+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
3333

3434

3535
def test_gcs_uri():

speech/api/speech_rest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def get_speech_service():
4040
credentials.authorize(http)
4141

4242
return discovery.build(
43-
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
43+
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
4444
# [END authenticating]
4545

4646

@@ -57,13 +57,13 @@ def main(speech_file):
5757
speech_content = base64.b64encode(speech.read())
5858

5959
service = get_speech_service()
60-
service_request = service.speech().recognize(
60+
service_request = service.speech().syncrecognize(
6161
body={
62-
'initialRequest': {
62+
'config': {
6363
'encoding': 'LINEAR16',
6464
'sampleRate': 16000
6565
},
66-
'audioRequest': {
66+
'audio': {
6767
'content': speech_content.decode('UTF-8')
6868
}
6969
})

speech/api/speech_streaming.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,22 @@
1414
# limitations under the License.
1515
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
1616

17+
from __future__ import division
18+
1719
import contextlib
1820
import re
1921
import threading
2022

2123
from gcloud.credentials import get_credentials
22-
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
24+
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
2325
from google.rpc import code_pb2
2426
from grpc.beta import implementations
2527
import pyaudio
2628

2729
# Audio recording parameters
2830
RATE = 16000
2931
CHANNELS = 1
30-
CHUNK = RATE // 10 # 100ms
32+
CHUNK = int(RATE / 10) # 100ms
3133

3234
# Keep the request alive for this many seconds
3335
DEADLINE_SECS = 8 * 60 * 60
@@ -43,15 +45,15 @@ def make_channel(host, port):
4345
creds = get_credentials().create_scoped([SPEECH_SCOPE])
4446
# Add a plugin to inject the creds into the header
4547
auth_header = (
46-
'Authorization',
47-
'Bearer ' + creds.get_access_token().access_token)
48+
'Authorization',
49+
'Bearer ' + creds.get_access_token().access_token)
4850
auth_plugin = implementations.metadata_call_credentials(
49-
lambda _, cb: cb([auth_header], None),
50-
name='google_creds')
51+
lambda _, cb: cb([auth_header], None),
52+
name='google_creds')
5153

5254
# compose the two together for both ssl and google auth
5355
composite_channel = implementations.composite_channel_credentials(
54-
ssl_channel, auth_plugin)
56+
ssl_channel, auth_plugin)
5557

5658
return implementations.secure_channel(host, port, composite_channel)
5759

@@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):
7577

7678

7779
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
78-
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
80+
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
81+
stream.
7982
8083
Args:
8184
stop_audio: A threading.Event object stops the recording when set.
8285
channels: How many audio channels to record.
8386
rate: The sampling rate.
8487
chunk: Buffer audio into chunks of this size before sending to the api.
8588
"""
86-
with record_audio(channels, rate, chunk) as audio_stream:
87-
# The initial request must contain metadata about the stream, so the
88-
# server knows how to interpret it.
89-
metadata = cloud_speech.InitialRecognizeRequest(
90-
encoding='LINEAR16', sample_rate=rate,
91-
# Note that setting interim_results to True means that you'll
92-
# likely get multiple results for the same bit of audio, as the
93-
# system re-interprets audio in the context of subsequent audio.
94-
# However, this will give us quick results without having to tell
95-
# the server when to finalize a piece of audio.
96-
interim_results=True, continuous=False,
97-
)
98-
data = audio_stream.read(chunk)
99-
audio_request = cloud_speech.AudioRequest(content=data)
100-
101-
yield cloud_speech.RecognizeRequest(
102-
initial_request=metadata,
103-
audio_request=audio_request)
89+
# The initial request must contain metadata about the stream, so the
90+
# server knows how to interpret it.
91+
recognition_config = cloud_speech.RecognitionConfig(
92+
encoding='LINEAR16', sample_rate=rate)
93+
streaming_config = cloud_speech.StreamingRecognitionConfig(
94+
config=recognition_config,
95+
# Note that setting interim_results to True means that you'll likely
96+
# get multiple results for the same bit of audio, as the system
97+
# re-interprets audio in the context of subsequent audio. However, this
98+
# will give us quick results without having to tell the server when to
99+
# finalize a piece of audio.
100+
interim_results=True, single_utterance=True
101+
)
102+
103+
yield cloud_speech.StreamingRecognizeRequest(
104+
streaming_config=streaming_config)
104105

106+
with record_audio(channels, rate, chunk) as audio_stream:
105107
while not stop_audio.is_set():
106108
data = audio_stream.read(chunk)
107109
if not data:
108110
raise StopIteration()
109-
# Subsequent requests can all just have the content
110-
audio_request = cloud_speech.AudioRequest(content=data)
111111

112-
yield cloud_speech.RecognizeRequest(audio_request=audio_request)
112+
# Subsequent requests can all just have the content
113+
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
113114

114115

115116
def listen_print_loop(recognize_stream):
@@ -136,7 +137,8 @@ def main():
136137
make_channel('speech.googleapis.com', 443)) as service:
137138
try:
138139
listen_print_loop(
139-
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
140+
service.StreamingRecognize(
141+
request_stream(stop_audio), DEADLINE_SECS))
140142
finally:
141143
# Stop the request stream once we're done with the loop - otherwise
142144
# it'll keep going in the thread that the grpc lib makes for it..

0 commit comments

Comments
 (0)