Speech to text
Download all scripts: speech_to_text.zip
Set up a virtual environment
https://docs.python.org/3/tutorial/venv.html
# Create a new virtual environment
$ python3 -m venv venv-COURSE
# Activate the virtual environment
# macOS or Unix
$ source venv-COURSE/bin/activate
# Windows
$ venv-COURSE\Scripts\activate.bat
speech_to_text.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This example is based on https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py
import speech_recognition # Requires PyAudio
language = 'en-US'
# Record Audio
recognizer = speech_recognition.Recognizer()
with speech_recognition.Microphone() as source:
print('Say something!')
audio = recognizer.listen(source)
# Try to convert audio to text using Google Speech Recognition
try:
# To use the service You have to register (for free) at Google and request an API key.
# The service also works without an API key, since Google kindly provides a default API key for testing purposes.
text = recognizer.recognize_google(audio, language = language) # Using the service with Googles default API key
# text = recognizer.recognize_google(audio, language = language, key='GOOGLE_SPEECH_RECOGNITION_API_KEY') # Using the service with another API key
print('Google Speech Recognition thinks you said \'{}\'.'.format(text))
except speech_recognition.UnknownValueError:
print('Google Speech Recognition could not understand audio')
except speech_recognition.RequestError as exception:
print('Could not request results from Google Speech Recognition service; {}'.format(exception))
'''
SpeechRecognition
https://pypi.python.org/pypi/SpeechRecognition/
https://github.com/Uberi/speech_recognition
PyAudio
https://pypi.python.org/pypi/PyAudio
http://people.csail.mit.edu/hubert/pyaudio/
'''
speech_to_text_multilingual.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This example is based on https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py
import argparse # Used to parse command line arguments
import speech_recognition # Requires PyAudio
# Setup argument parser
parser = argparse.ArgumentParser(description = 'Convert audio to text.')
parser.add_argument('-v', '--verbose', help = 'Comment what\'s going on', default = False, action='store_true')
parser.add_argument('-l', '--language', help = 'Select the language', default = 'en', choices=['de', 'en'])
args = parser.parse_args()
languages = {
'de': 'de-DE',
'en': 'en-US'}
language = languages[args.language]
# Record Audio
recognizer = speech_recognition.Recognizer()
with speech_recognition.Microphone() as source:
print('Say something.')
audio = recognizer.listen(source)
# Try to convert audio to text using Google Speech Recognition
try:
# To use the service You have to register (for free) at Google and request an API key.
# The service also works without an API key, since Google kindly provides a default API key for testing purposes.
text = recognizer.recognize_google(audio, language = language) # Using the service with Googles default API key
# text = recognizer.recognize_google(audio, language = language, key='GOOGLE_SPEECH_RECOGNITION_API_KEY') # Using the service with another API key
if args.verbose:
print('Google Speech Recognition thinks you said \'{}\'.'.format(text))
else:
print(text)
except speech_recognition.UnknownValueError:
print('Google Speech Recognition could not understand audio')
except speech_recognition.RequestError as exception:
print('Could not request results from Google Speech Recognition service; {}'.format(exception))
'''
SpeechRecognition
https://pypi.python.org/pypi/SpeechRecognition/
https://github.com/Uberi/speech_recognition
PyAudio
https://pypi.python.org/pypi/PyAudio
http://people.csail.mit.edu/hubert/pyaudio/
'''