Speech to text

Download all scripts: speech_to_text.zip

Set up a virtual environment

https://docs.python.org/3/tutorial/venv.html

# Create a new virtual environment
$ python3 -m venv venv-COURSE

# Activate the virtual environment
# macOS or Unix
$ source venv-COURSE/bin/activate

# Windows
$ venv-COURSE\Scripts\activate.bat

speech_to_text.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# This example is based on https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py

import speech_recognition  # Requires PyAudio
 

language = 'en-US'

# Record Audio
recognizer = speech_recognition.Recognizer()
with speech_recognition.Microphone() as source:
	print('Say something!')
	audio = recognizer.listen(source)
 
# Try to convert audio to text using Google Speech Recognition
try:
	# To use the service You have to register (for free) at Google and request an API key.
	# The service also works without an API key, since Google kindly provides a default API key for testing purposes.
	text = recognizer.recognize_google(audio, language = language)  # Using the service with Googles default API key
	# text = recognizer.recognize_google(audio, language = language, key='GOOGLE_SPEECH_RECOGNITION_API_KEY')  # Using the service with another API key
	print('Google Speech Recognition thinks you said \'{}\'.'.format(text))
except speech_recognition.UnknownValueError:
	print('Google Speech Recognition could not understand audio')
except speech_recognition.RequestError as exception:
	print('Could not request results from Google Speech Recognition service; {}'.format(exception))

'''
SpeechRecognition
	https://pypi.python.org/pypi/SpeechRecognition/
	https://github.com/Uberi/speech_recognition

PyAudio
	https://pypi.python.org/pypi/PyAudio
	http://people.csail.mit.edu/hubert/pyaudio/

'''

speech_to_text_multilingual.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# This example is based on https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py

import argparse  # Used to parse command line arguments
import speech_recognition  # Requires PyAudio
 
# Setup argument parser
parser = argparse.ArgumentParser(description = 'Convert audio to text.')
parser.add_argument('-v', '--verbose', help = 'Comment what\'s going on', default = False, action='store_true')
parser.add_argument('-l', '--language', help = 'Select the language', default = 'en', choices=['de', 'en'])
args = parser.parse_args()

languages = {
	'de': 'de-DE',
	'en': 'en-US'}
language = languages[args.language]

# Record Audio
recognizer = speech_recognition.Recognizer()
with speech_recognition.Microphone() as source:
	print('Say something.')
	audio = recognizer.listen(source)
 
# Try to convert audio to text using Google Speech Recognition
try:
	# To use the service You have to register (for free) at Google and request an API key.
	# The service also works without an API key, since Google kindly provides a default API key for testing purposes.
	text = recognizer.recognize_google(audio, language = language)  # Using the service with Googles default API key
	# text = recognizer.recognize_google(audio, language = language, key='GOOGLE_SPEECH_RECOGNITION_API_KEY')  # Using the service with another API key
	if args.verbose:
		print('Google Speech Recognition thinks you said \'{}\'.'.format(text))
	else:
		print(text)
except speech_recognition.UnknownValueError:
	print('Google Speech Recognition could not understand audio')
except speech_recognition.RequestError as exception:
	print('Could not request results from Google Speech Recognition service; {}'.format(exception))

'''
SpeechRecognition
	https://pypi.python.org/pypi/SpeechRecognition/
	https://github.com/Uberi/speech_recognition

PyAudio
	https://pypi.python.org/pypi/PyAudio
	http://people.csail.mit.edu/hubert/pyaudio/

'''