Skip to content

Commit 086b47d

Browse files
authored
splits word-time-offset samples into their own files (GoogleCloudPlatform#416)
1 parent 22c00e2 commit 086b47d

11 files changed

+368
-103
lines changed

speech/README.md

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,53 @@ Configure your project using [Application Default Credentials][adc]
2222

2323
## Usage
2424

25+
To run the Speech Samples:
26+
27+
$ php speech.php
28+
29+
Cloud Speech
30+
31+
Usage:
32+
command [options] [arguments]
33+
34+
Options:
35+
-h, --help Display this help message
36+
-q, --quiet Do not output any message
37+
-V, --version Display this application version
38+
--ansi Force ANSI output
39+
--no-ansi Disable ANSI output
40+
-n, --no-interaction Do not ask any interactive question
41+
-v|vv|vvv, --verbose Increase the verbosity of messages: 1 for normal output, 2 for more verbose output and 3 for debug
42+
43+
Available commands:
44+
help Displays help for a command
45+
list Lists commands
46+
transcribe Transcribe an audio file using Google Cloud Speech API
47+
transcribe-async Transcribe an audio file asynchronously using Google Cloud Speech API
48+
transcribe-async-gcs Transcribe audio asynchronously from a Storage Object using Google Cloud Speech API
49+
transcribe-async-words Transcribe an audio file asynchronously and print word time offsets using Google Cloud Speech API
50+
transcribe-gcs Transcribe audio from a Storage Object using Google Cloud Speech API
51+
transcribe-stream Transcribe a stream of audio using Google Cloud Speech API
52+
transcribe-words Transcribe an audio file and print word time offsets using Google Cloud Speech API
53+
2554
Once you have a speech sample in the proper format, send it through the speech
2655
API using the transcribe command:
2756

2857
```sh
2958
php speech.php transcribe test/data/audio32KHz.raw --encoding LINEAR16 --sample-rate 32000
30-
php speech.php transcribe test/data/audio32KHz.flac --encoding FLAC --sample-rate 32000 --async
59+
php speech.php transcribe-async test/data/audio32KHz.flac --encoding FLAC --sample-rate 32000
60+
php speech.php transcribe-words test/data/audio32KHz.flac --encoding FLAC --sample-rate 32000
3161

3262
```
3363
## Troubleshooting
3464

65+
If you get the following error, set the environment variable `GCLOUD_PROJECT` to your project ID:
66+
67+
```
68+
[Google\Cloud\Core\Exception\GoogleException]
69+
No project ID was provided, and we were unable to detect a default project ID.
70+
```
71+
3572
If you have not set a timezone you may get an error from php. This can be resolved by:
3673

3774
1. Finding where the php.ini is stored by running php -i | grep 'Configuration File'

speech/composer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
"src/streaming_recognize.php",
1515
"src/transcribe_async.php",
1616
"src/transcribe_async_gcs.php",
17+
"src/transcribe_async_words.php",
1718
"src/transcribe_sync.php",
18-
"src/transcribe_sync_gcs.php"
19+
"src/transcribe_sync_gcs.php",
20+
"src/transcribe_sync_words.php"
1921
]
2022
},
2123
"require-dev": {

speech/speech.php

Lines changed: 153 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,90 +21,178 @@
2121
use Symfony\Component\Console\Application;
2222
use Symfony\Component\Console\Command\Command;
2323
use Symfony\Component\Console\Input\InputArgument;
24+
use Symfony\Component\Console\Input\InputDefinition;
2425
use Symfony\Component\Console\Input\InputInterface;
2526
use Symfony\Component\Console\Input\InputOption;
2627
use Symfony\Component\Console\Output\OutputInterface;
2728

29+
$inputDefinition = new InputDefinition([
30+
new InputArgument('audio-file', InputArgument::REQUIRED, 'The audio file to transcribe'),
31+
new InputOption('encoding', null, InputOption::VALUE_REQUIRED,
32+
'The encoding of the audio file. This is required if the encoding is ' .
33+
'unable to be determined. '
34+
),
35+
new InputOption('language-code', null, InputOption::VALUE_REQUIRED,
36+
'The language code for the language used in the source file. ',
37+
'en-US'
38+
),
39+
new InputOption('sample-rate', null, InputOption::VALUE_REQUIRED,
40+
'The sample rate of the audio file in hertz. This is required ' .
41+
'if the sample rate is unable to be determined. '
42+
),
43+
new InputOption('sample-rate', null, InputOption::VALUE_REQUIRED,
44+
'The sample rate of the audio file in hertz. This is required ' .
45+
'if the sample rate is unable to be determined. '
46+
),
47+
]);
48+
2849
$application = new Application('Cloud Speech');
2950
$application->add(new Command('transcribe'))
30-
->setDescription('Transcribe Audio using Google Cloud Speech API')
51+
->setDefinition($inputDefinition)
52+
->setDescription('Transcribe an audio file using Google Cloud Speech API')
3153
->setHelp(<<
32-
The %command.name% command transcribes audio using the Google Cloud Speech API.
54+
The %command.name% command transcribes audio from a file using the
55+
Google Cloud Speech API.
3356
3457
php %command.full_name% audio_file.wav
3558
3659
EOF
3760
)
38-
->addArgument(
39-
'audio-file',
40-
InputArgument::REQUIRED,
41-
'The audio file to transcribe'
42-
)
43-
->addOption(
44-
'encoding',
45-
null,
46-
InputOption::VALUE_REQUIRED,
47-
'The encoding of the audio file. This is required if the encoding is ' .
48-
'unable to be determined. '
49-
)
50-
->addOption(
51-
'language-code',
52-
null,
53-
InputOption::VALUE_REQUIRED,
54-
'The language code for the language used in the source file. ',
55-
'en-US'
56-
)
57-
->addOption(
58-
'sample-rate',
59-
null,
60-
InputOption::VALUE_REQUIRED,
61-
'The sample rate of the audio file in hertz. This is required ' .
62-
'if the sample rate is unable to be determined. '
61+
->setCode(function (InputInterface $input, OutputInterface $output) {
62+
$audioFile = $input->getArgument('audio-file');
63+
$languageCode = $input->getOption('language-code');
64+
transcribe_sync($audioFile, $languageCode, [
65+
'encoding' => $input->getOption('encoding'),
66+
'sampleRateHertz' => $input->getOption('sample-rate'),
67+
]);
68+
});
69+
70+
$application->add(new Command('transcribe-gcs'))
71+
->setDefinition($inputDefinition)
72+
->setDescription('Transcribe audio from a Storage Object using Google Cloud Speech API')
73+
->setHelp(<<
74+
The %command.name% command transcribes audio from a Cloud Storage
75+
Object using the Google Cloud Speech API.
76+
77+
php %command.full_name% gs://my-bucket/audio_file.wav
78+
79+
EOF
6380
)
64-
->addOption(
65-
'async',
66-
null,
67-
InputOption::VALUE_NONE,
68-
'Run the transcription asynchronously. '
81+
->setCode(function (InputInterface $input, OutputInterface $output) {
82+
$audioFile = $input->getArgument('audio-file');
83+
$languageCode = $input->getOption('language-code');
84+
if (!preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $audioFile, $matches)) {
85+
throw new \Exception('Invalid file name. Must be gs://[bucket]/[audiofile]');
86+
}
87+
list($bucketName, $objectName) = array_slice($matches, 1);
88+
transcribe_sync_gcs($bucketName, $objectName, $languageCode, [
89+
'encoding' => $input->getOption('encoding'),
90+
'sampleRateHertz' => $input->getOption('sample-rate'),
91+
]);
92+
});
93+
94+
$application->add(new Command('transcribe-words'))
95+
->setDefinition($inputDefinition)
96+
->setDescription('Transcribe an audio file and print word time offsets using Google Cloud Speech API')
97+
->setHelp(<<
98+
The %command.name% command transcribes audio from a file using the
99+
Google Cloud Speech API and prints word time offsets.
100+
101+
php %command.full_name% audio_file.wav
102+
103+
EOF
69104
)
70-
->addOption(
71-
'stream',
72-
null,
73-
InputOption::VALUE_NONE,
74-
'Stream the audio file.'
105+
->setCode(function (InputInterface $input, OutputInterface $output) {
106+
$audioFile = $input->getArgument('audio-file');
107+
$languageCode = $input->getOption('language-code');
108+
transcribe_sync_words($audioFile, $languageCode, [
109+
'encoding' => $input->getOption('encoding'),
110+
'sampleRateHertz' => $input->getOption('sample-rate'),
111+
]);
112+
});
113+
114+
$application->add(new Command('transcribe-async'))
115+
->setDefinition($inputDefinition)
116+
->setDescription('Transcribe an audio file asynchronously using Google Cloud Speech API')
117+
->setHelp(<<
118+
The %command.name% command transcribes audio from a file using the
119+
Google Cloud Speech API asynchronously.
120+
121+
php %command.full_name% audio_file.wav
122+
123+
EOF
75124
)
76125
->setCode(function (InputInterface $input, OutputInterface $output) {
77-
$encoding = $input->getOption('encoding');
126+
$audioFile = $input->getArgument('audio-file');
78127
$languageCode = $input->getOption('language-code');
79-
$sampleRate = $input->getOption('sample-rate');
128+
transcribe_async($audioFile, $languageCode, [
129+
'encoding' => $input->getOption('encoding'),
130+
'sampleRateHertz' => $input->getOption('sample-rate'),
131+
]);
132+
});
133+
134+
$application->add(new Command('transcribe-async-gcs'))
135+
->setDefinition($inputDefinition)
136+
->setDescription('Transcribe audio asynchronously from a Storage Object using Google Cloud Speech API')
137+
->setHelp(<<
138+
The %command.name% command transcribes audio from a Cloud Storage
139+
object asynchronously using the Google Cloud Speech API.
140+
141+
php %command.full_name% gs://my-bucket/audio_file.wav
142+
143+
EOF
144+
)
145+
->setCode(function (InputInterface $input, OutputInterface $output) {
80146
$audioFile = $input->getArgument('audio-file');
81-
$options = [
82-
'encoding' => $encoding,
83-
'languageCode' => $languageCode,
84-
'sampleRateHertz' => $sampleRate,
85-
];
86-
if ($isGcs = preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $audioFile, $matches)) {
87-
list($bucketName, $objectName) = array_slice($matches, 1);
88-
}
89-
if ($isGcs) {
90-
if ($input->getOption('stream')) {
91-
throw new LogicException('Cannot stream from a bucket!');
92-
}
93-
if ($input->getOption('async')) {
94-
transcribe_async_gcs($bucketName, $objectName, $languageCode, $options);
95-
} else {
96-
transcribe_sync_gcs($bucketName, $objectName, $languageCode, $options);
97-
}
98-
} else {
99-
if ($input->getOption('async')) {
100-
transcribe_async($audioFile, $languageCode, $options);
101-
} elseif ($input->getOption('stream')) {
102-
$encodingInt = constant("Google\Cloud\Speech\V1\RecognitionConfig_AudioEncoding::$encoding");
103-
streaming_recognize($audioFile, $languageCode, $encodingInt, $sampleRate);
104-
} else {
105-
transcribe_sync($audioFile, $languageCode, $options);
106-
}
147+
$languageCode = $input->getOption('language-code');
148+
if (!preg_match('/^gs:\/\/([a-z0-9\._\-]+)\/(\S+)$/', $audioFile, $matches)) {
149+
throw new \Exception('Invalid file name. Must be gs://[bucket]/[audiofile]');
107150
}
151+
list($bucketName, $objectName) = array_slice($matches, 1);
152+
transcribe_async_gcs($bucketName, $objectName, $languageCode, [
153+
'encoding' => $input->getOption('encoding'),
154+
'sampleRateHertz' => $input->getOption('sample-rate'),
155+
]);
156+
});
157+
158+
$application->add(new Command('transcribe-async-words'))
159+
->setDefinition($inputDefinition)
160+
->setDescription('Transcribe an audio file asynchronously and print word time offsets using Google Cloud Speech API')
161+
->setHelp(<<
162+
The %command.name% command transcribes audio from a file using the
163+
Google Cloud Speech API asynchronously and prints word time offsets.
164+
165+
php %command.full_name% audio_file.wav
166+
167+
EOF
168+
)
169+
->setCode(function (InputInterface $input, OutputInterface $output) {
170+
$audioFile = $input->getArgument('audio-file');
171+
$languageCode = $input->getOption('language-code');
172+
transcribe_async_words($audioFile, $languageCode, [
173+
'encoding' => $input->getOption('encoding'),
174+
'sampleRateHertz' => $input->getOption('sample-rate'),
175+
]);
176+
});
177+
178+
$application->add(new Command('transcribe-stream'))
179+
->setDefinition($inputDefinition)
180+
->setDescription('Transcribe a stream of audio using Google Cloud Speech API')
181+
->setHelp(<<
182+
The %command.name% command transcribes audio from a stream using
183+
the Google Cloud Speech API.
184+
185+
php %command.full_name% audio_file.wav
186+
187+
EOF
188+
)
189+
->setCode(function (InputInterface $input, OutputInterface $output) {
190+
streaming_recognize(
191+
$input->getArgument('audio-file'),
192+
$input->getOption('language-code'),
193+
$input->getOption('encoding'),
194+
$input->getOption('sample-rate')
195+
);
108196
});
109197

110198
// for testing

speech/src/streaming_recognize.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
use Google\Cloud\Speech\V1\RecognitionConfig;
2929
use Google\Cloud\Speech\V1\StreamingRecognitionConfig;
3030
use Google\Cloud\Speech\V1\StreamingRecognizeRequest;
31+
use Google\Cloud\Speech\V1\RecognitionConfig_AudioEncoding;
3132

3233
/**
3334
* Transcribe an audio file using Google Cloud Speech API
@@ -64,8 +65,10 @@ function streaming_recognize($audioFile, $languageCode, $encoding, $sampleRateHe
6465
try {
6566
$config = new RecognitionConfig();
6667
$config->setLanguageCode($languageCode);
67-
$config->setEncoding($encoding);
6868
$config->setSampleRateHertz($sampleRateHertz);
69+
// encoding must be an enum, convert from string
70+
$encodingEnum = constant(RecognitionConfig_AudioEncoding::class . '::' . $encoding);
71+
$config->setEncoding($encodingEnum);
6972

7073
$strmConfig = new StreamingRecognitionConfig();
7174
$strmConfig->setConfig($config);

speech/src/transcribe_async.php

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@ function transcribe_async($audioFile, $languageCode = 'en-US', $options = [])
4949
'languageCode' => $languageCode,
5050
]);
5151

52-
// When true, time offsets for every word will be included in the response.
53-
$options['enableWordTimeOffsets'] = true;
54-
5552
// Create the asyncronous recognize operation
5653
$operation = $speech->beginRecognizeOperation(
5754
fopen($audioFile, 'r'),
@@ -74,12 +71,6 @@ function transcribe_async($audioFile, $languageCode = 'en-US', $options = [])
7471
foreach ($alternatives as $alternative) {
7572
printf('Transcript: %s' . PHP_EOL, $alternative['transcript']);
7673
printf('Confidence: %s' . PHP_EOL, $alternative['confidence']);
77-
foreach ($alternative['words'] as $wordInfo) {
78-
printf(' Word: %s (start: %s, end: %s)' . PHP_EOL,
79-
$wordInfo['word'],
80-
$wordInfo['startTime'],
81-
$wordInfo['endTime']);
82-
}
8374
}
8475
}
8576
}

speech/src/transcribe_async_gcs.php

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ function transcribe_async_gcs($bucketName, $objectName, $languageCode = 'en-US',
5555
$storage = new StorageClient();
5656
$object = $storage->bucket($bucketName)->object($objectName);
5757

58-
// When true, time offsets for every word will be included in the response.
59-
$options['enableWordTimeOffsets'] = true;
60-
6158
// Create the asyncronous recognize operation
6259
$operation = $speech->beginRecognizeOperation(
6360
$object,
@@ -80,12 +77,6 @@ function transcribe_async_gcs($bucketName, $objectName, $languageCode = 'en-US',
8077
foreach ($alternatives as $alternative) {
8178
printf('Transcript: %s' . PHP_EOL, $alternative['transcript']);
8279
printf('Confidence: %s' . PHP_EOL, $alternative['confidence']);
83-
foreach ($alternative['words'] as $wordInfo) {
84-
printf(' Word: %s (start: %s, end: %s)' . PHP_EOL,
85-
$wordInfo['word'],
86-
$wordInfo['startTime'],
87-
$wordInfo['endTime']);
88-
}
8980
}
9081
}
9182
}

0 commit comments

Comments
 (0)