diff --git a/tools/add_lead_in_messages.py b/tools/add_lead_in_messages.py new file mode 100755 index 0000000..06c3e06 --- /dev/null +++ b/tools/add_lead_in_messages.py @@ -0,0 +1,124 @@ +#!/usr/bin/python + +# Adds a lead-in message to each mp3 file of a directory storing the result in another directory. +# So - when played e.g. on a TonUINO - you first will hear the title of the track, then the track itself. + + +import argparse, base64, json, os, re, subprocess, sys, text_to_speech + + +argFormatter = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=27, width=100) +argparser = text_to_speech.PatchedArgumentParser( + description= + 'Adds a lead-in message to each mp3 file of a directory storing the result in another directory.\n' + + 'So - when played e.g. on a TonUINO - you first will hear the title of the track, then the track itself.\n\n' + + text_to_speech.textToSpeechDescription, + usage='%(prog)s -i my/source/dir -o my/output/dir [optional arguments...]', + formatter_class=argFormatter) +argparser.add_argument('-i', '--input', type=str, required=True, help='The input directory or mp3 file to process (input won\'t be changed)') +argparser.add_argument('-o', '--output', type=str, required=True, help='The output directory where to write the mp3 files (will be created if not existing)') +text_to_speech.addArgumentsToArgparser(argparser) +argparser.add_argument('--file-regex', type=str, default=None, help="The regular expression to use for parsing the mp3 file name. If missing the whole file name except a leading number will be used as track title.") +argparser.add_argument('--title-pattern', type=str, default=None, help="The pattern to use as track title. May contain groups of `--file-regex`, e.g. '\\1'") +argparser.add_argument('--add-numbering', action='store_true', help='Whether to add a three-digit number to the mp3 files (suitable for DFPlayer Mini)') +argparser.add_argument('--dry-run', action='store_true', help='Dry run: Only prints what the script would do, without actually creating files') +args = argparser.parse_args() + +text_to_speech.checkArgs(argparser, args) + +fileRegex = re.compile(args.file_regex if args.file_regex is not None else '\\d*(.*)') +titlePattern = args.title_pattern if args.title_pattern is not None else '\\1' + +mp3FileIndex = 0 + + +def fail(msg): + print('ERROR: ' + msg) + sys.exit(1) + + +def addLeadInMessage(inputPath, outputPath): + global mp3FileIndex + + if not os.path.exists(inputPath): + fail('Input does not exist: ' + os.path.abspath(inputPath)) + + if os.path.isdir(inputPath): + if os.path.exists(outputPath): + if not os.path.isdir(outputPath): + fail('Input is a directory, but output isn\'t: ' + os.path.abspath(outputPath)) + elif not args.dry_run: + os.mkdir(outputPath) + + mp3FileIndex = 0 + for child in sorted(os.listdir(inputPath)): + addLeadInMessage(os.path.join(inputPath, child), os.path.join(outputPath, child)) + + return + + inputFileNameSplit = os.path.splitext(os.path.basename(inputPath)) + inputFileName = inputFileNameSplit[0] + inputFileExt = inputFileNameSplit[1].lower() + + if inputFileExt != '.mp3': + print('Ignoring {} (no mp3 file)'.format(os.path.abspath(inputPath))) + return + + if args.add_numbering: + outputPathSplit = os.path.split(outputPath) + outputPath = os.path.join(outputPathSplit[0], '{:0>3}_{}'.format(mp3FileIndex + 1, outputPathSplit[1])) + mp3FileIndex += 1 + + if os.path.isfile(outputPath): + print('Skipping {} (file already exists)'.format(os.path.abspath(outputPath))) + return + + text = re.sub(fileRegex, titlePattern, inputFileName).replace('_', ' ').strip() + print('Adding lead-in "{}" to {}'.format(text, os.path.abspath(outputPath))) + + if not args.dry_run: + tempLeadInFile = 'temp-lead-in.mp3' + tempLeadInFileAdjusted = 'temp-lead-in_adjusted.mp3' + text_to_speech.textToSpeechUsingArgs(text=text, targetFile=tempLeadInFile, args=args) + + # Adjust sample rate and mono/stereo + print('Detecting sample rate and channels') + detectionInfo = detectAudioData(inputPath) + if detectionInfo is None: + # We can't adjust + print('Detecting sample rate and channels failed -> Skipping adjustment') + tempLeadInFileAdjusted = tempLeadInFile + else: + print('Adjust sample rate to {} and channels to {}'.format(detectionInfo['sampleRate'], detectionInfo['channels'])) + subprocess.call([ 'ffmpeg', '-i', tempLeadInFile, '-vn', '-ar', detectionInfo['sampleRate'], '-ac', detectionInfo['channels'], tempLeadInFileAdjusted ]) + + print('Concat') + subprocess.call([ 'ffmpeg', '-i', 'concat:{}|{}'.format(tempLeadInFileAdjusted, inputPath), '-acodec', 'copy', outputPath, '-map_metadata', '0:1' ]) + + os.remove(tempLeadInFile) + os.remove(tempLeadInFileAdjusted) + print('\n') + + +def detectAudioData(mp3File): + try: + output = subprocess.check_output([ 'ffmpeg', '-i', mp3File, '-hide_banner' ], stderr=subprocess.STDOUT) + except Exception, e: + output = str(e.output) + + match = re.match('.*Stream #\\d+:\\d+: Audio: mp3, (\\d+) Hz, (mono|stereo), .*', output, re.S) + if match: + return { + 'sampleRate': match.group(1), + 'channels': '2' if match.group(2) == 'stereo' else '1' + } + else: + return None + + +if not os.path.exists(args.output) and not args.dry_run: + outputParent = os.path.dirname(os.path.abspath(args.output)) + if not os.path.isdir(outputParent): + fail('Parent of output is no directory: ' + os.path.abspath(outputParent)) + +addLeadInMessage(args.input, args.output) diff --git a/tools/create_audio_messages.py b/tools/create_audio_messages.py new file mode 100755 index 0000000..5e4097e --- /dev/null +++ b/tools/create_audio_messages.py @@ -0,0 +1,55 @@ +#!/usr/bin/python + +# Creates the audio messages needed by TonUINO. + + +import argparse, os, re, shutil, sys, text_to_speech + + +if __name__ == '__main__': + argFormatter = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=30, width=100) + argparser = text_to_speech.PatchedArgumentParser( + description= + 'Creates the audio messages needed by TonUINO.\n\n' + + text_to_speech.textToSpeechDescription, + usage='%(prog)s [optional arguments...]', + formatter_class=argFormatter) + argparser.add_argument('-i', '--input', type=str, default='.', help='The directory where `audio_messages_*.txt` files are located. (default: current directory)') + argparser.add_argument('-o', '--output', type=str, default='sd-card', help='The directory where to create the audio messages. (default: `sd-card`)') + text_to_speech.addArgumentsToArgparser(argparser) + argparser.add_argument('--skip-numbers', action='store_true', help='If set, no number messages will be generated (`0001.mp3` - `0255.mp3`)') + args = argparser.parse_args() + + + text_to_speech.checkArgs(argparser, args) + + audioMessagesFile = '{}/audio_messages_{}.txt'.format(args.input, args.lang) + if not os.path.isfile(audioMessagesFile): + print('Input file does not exist: ' + os.path.abspath(audioMessagesFile)) + exit(1) + + targetDir = args.output + if os.path.isdir(targetDir): + print("Directory `" + targetDir + "` already exists.") + exit(1) + else: + os.mkdir(targetDir) + os.mkdir(targetDir + '/advert') + os.mkdir(targetDir + '/mp3') + + + if not args.skip_numbers: + for i in range(1,256): + targetFile1 = '{}/mp3/{:0>4}.mp3'.format(targetDir, i) + targetFile2 = '{}/advert/{:0>4}.mp3'.format(targetDir, i) + text_to_speech.textToSpeechUsingArgs(text='{}'.format(i), targetFile=targetFile1, args=args) + shutil.copy(targetFile1, targetFile2) + + with open(audioMessagesFile) as f: + lineRe = re.compile('^([^|]+)\\|(.*)$') + for line in f: + match = lineRe.match(line.strip()) + if match: + fileName = match.group(1) + text = match.group(2) + text_to_speech.textToSpeechUsingArgs(text=text, targetFile=targetDir + '/mp3/' + fileName, args=args) diff --git a/tools/text_to_speech.py b/tools/text_to_speech.py new file mode 100755 index 0000000..a9e4ca6 --- /dev/null +++ b/tools/text_to_speech.py @@ -0,0 +1,127 @@ +#!/usr/bin/python + +# Converts text into spoken language saved to an mp3 file. + + +import argparse, base64, json, os, subprocess, sys, urllib + + +class PatchedArgumentParser(argparse.ArgumentParser): + def error(self, message): + sys.stderr.write('error: %s\n\n' % message) + self.print_help() + sys.exit(2) + + +sayVoiceByLang = { + 'de': 'Anna', + 'en': 'Samantha', +} +googleVoiceByLang = { + 'de': { 'languageCode': 'de-DE', 'name': 'de-DE-Wavenet-C' }, + 'en': { 'languageCode': 'en-US', 'name': 'en-US-Wavenet-D' }, +} +amazonVoiceByLang = { + # See: https://docs.aws.amazon.com/de_de/polly/latest/dg/voicelist.html + 'de': 'Vicki', + 'en': 'Joanna', +} + + +textToSpeechDescription = """ +The following text-to-speech engines are supported: +- With `--use-say` the text-to-speech engine of MacOS is used (command `say`). +- With `--use-amazon` Amazon Polly is used. Requires the AWS CLI to be installed and configured. See: https://aws.amazon.com/cli/ +- With `--use-google-key=ABCD` Google text-to-speech is used. See: https://cloud.google.com/text-to-speech/ + +Amazon Polly sounds best, Google text-to-speech is second, MacOS `say` sounds worst.' +""".strip() + +def addArgumentsToArgparser(argparser): + argparser.add_argument('--lang', choices=['de', 'en'], default='de', help='The language (default: de)') + argparser.add_argument('--use-say', action='store_true', default=None, help="If set, the MacOS tool `say` will be used.") + argparser.add_argument('--use-amazon', action='store_true', default=None, help="If set, Amazon Polly is used. If missing the MacOS tool `say` will be used.") + argparser.add_argument('--use-google-key', type=str, default=None, help="The API key of the Google text-to-speech account to use.") + + +def checkArgs(argparser, args): + if not args.use_say and not args.use_amazon and args.use_google_key is None: + print('ERROR: You have to provide one of the arguments `--use-say`, `--use-amazon` or `--use-google-key`\n') + argparser.print_help() + sys.exit(2) + + +def textToSpeechUsingArgs(text, targetFile, args): + textToSpeech(text, targetFile, lang=args.lang, useAmazon=args.use_amazon, useGoogleKey=args.use_google_key) + + +def textToSpeech(text, targetFile, lang='de', useAmazon=False, useGoogleKey=None): + print('\nGenerating: ' + targetFile + ' - ' + text) + if useAmazon: + response = subprocess.check_output(['aws', 'polly', 'synthesize-speech', '--output-format', 'mp3', + '--voice-id', amazonVoiceByLang[lang], '--text-type', 'ssml', + '--text', '' + text + '', + targetFile]) + elif useGoogleKey: + responseJson = postJson( + 'https://texttospeech.googleapis.com/v1beta1/text:synthesize?key=' + useGoogleKey, + { + 'audioConfig': { + 'audioEncoding': 'MP3', + 'speakingRate': 1.0, + 'pitch': 2.0, # Default is 0.0 + 'sampleRateHertz': 44100, + 'effectsProfileId': [ 'small-bluetooth-speaker-class-device' ] + }, + 'voice': googleVoiceByLang[lang], + 'input': { 'text': text } + } + ) + + mp3Data = base64.b64decode(responseJson['audioContent']) + + with open(targetFile, 'wb') as f: + f.write(mp3Data) + else: + subprocess.call([ 'say', '-v', sayVoiceByLang[lang], '-o', 'temp.aiff', text ]) + subprocess.call([ 'ffmpeg', '-y', '-i', 'temp.aiff', '-acodec', 'libmp3lame', '-ab', '128k', '-ac', '1', targetFile ]) + os.remove('temp.aiff') + + +def postJson(url, postBody, headers = None): + cmd = ['curl'] + if headers is not None: + for header in headers: + cmd.extend(['-H', header]) + cmd.extend(['-H', 'Content-Type: application/json; charset=utf-8', '--data', json.dumps(postBody).encode('utf-8'), url]) + response = subprocess.check_output(cmd) + return json.loads(response) + + +def postForm(url, formData): + response = subprocess.check_output(['curl', '-H', 'Content-Type: application/x-www-form-urlencoded; charset=utf-8', '--data', urllib.urlencode(formData), url]) + return json.loads(response) + + +if __name__ == '__main__': + argFormatter = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=30, width=100) + argparser = PatchedArgumentParser( + description= + 'Converts text into spoken language saved to an mp3 file.\n\n' + + textToSpeechDescription, + usage='%(prog)s -t "This is my text" -o my-output.mp3 [optional arguments...]', + formatter_class=argFormatter) + argparser.add_argument('-t', '--text', type=str, required=True, help='The text to convert into spoken language.') + argparser.add_argument('-o', '--output', type=str, required=True, help='The output mp3 file to create') + addArgumentsToArgparser(argparser) + args = argparser.parse_args() + + + checkArgs(argparser, args) + + if os.path.exists(args.output): + print('ERROR: Output file alread exists: ' + os.path.abspath(args.output)) + sys.exit(1) + + + textToSpeechUsingArgs(text=args.text, targetFile=args.output, args=args)