From 75fa9a53a8315fff282142755a4be1f145d19a32 Mon Sep 17 00:00:00 2001 From: Til Schneider Date: Tue, 2 Apr 2019 10:14:11 +0200 Subject: [PATCH] Added script for adding spoken lead-in messages to mp3 files --- add_lead_in_messages.py | 141 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100755 add_lead_in_messages.py diff --git a/add_lead_in_messages.py b/add_lead_in_messages.py new file mode 100755 index 0000000..b131a68 --- /dev/null +++ b/add_lead_in_messages.py @@ -0,0 +1,141 @@ +#!/usr/bin/python + +# Adds a lead-in message to each mp3 file of a directory storing the result in another directory. +# So - when played e.g. on a TonUINO - you first will hear the title of the track, then the track itself. + + +import argparse, base64, json, os, re, subprocess, sys + + +class PatchedArgumentParser(argparse.ArgumentParser): + def error(self, message): + sys.stderr.write('error: %s\n\n' % message) + self.print_help() + sys.exit(2) + + +argFormatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=27, width=100) +argparser = PatchedArgumentParser( + description= + 'Adds a lead-in message to each mp3 file of a directory storing the result in another directory.\n' + + 'So - when played e.g. on a TonUINO - you first will hear the title of the track, then the track itself.', + usage='%(prog)s -i my/source/dir -o my/output/dir [optional arguments...]', + formatter_class=argFormatter) +argparser.add_argument('-i', '--input', type=str, required=True, help='The input directory or mp3 file to process (input won\'t be changed)') +argparser.add_argument('-o', '--output', type=str, required=True, help='The output directory where to write the mp3 files (will be created if not existing)') +argparser.add_argument('--lang', choices=['de', 'en'], default='de', help='The language') +argparser.add_argument('--google-key', type=str, default=None, help="The API key of the Google text-to-speech account to use. If missing the MacOS tool `say` will be used.") +argparser.add_argument('--file-regex', type=str, default=None, help="The regular expression to use for parsing the mp3 file name. If missing the whole file name except a leading number will be used as track title.") +argparser.add_argument('--title-pattern', type=str, default=None, help="The pattern to use as track title. May contain groups of `--file-regex`, e.g. '\\1'") +argparser.add_argument('--add-numbering', action='store_true', help='Whether to add a three-digit number to the mp3 files (suitable for DFPlayer Mini)') +argparser.add_argument('--dry-run', action='store_true', help='Dry run: Only prints what the script would do, without actually creating files') +args = argparser.parse_args() + + +googleVoiceByLang = { + 'de': { 'languageCode': 'de-DE', 'name': 'de-DE-Wavenet-C' }, + 'en': { 'languageCode': 'en-US', 'name': 'en-US-Wavenet-D' }, +} +sayVoiceByLang = { + 'de': 'Anna', + 'en': 'Samantha', +} + +fileRegex = re.compile(args.file_regex if args.file_regex is not None else '\\d*(.*)') +titlePattern = args.title_pattern if args.title_pattern is not None else '\\1' + +mp3FileIndex = 0 + + +def fail(msg): + print('ERROR: ' + msg) + sys.exit(1) + + +def postJson(url, postBody): + response = subprocess.check_output(['curl', '--header', 'Content-Type: application/json; charset=utf-8', '--data', json.dumps(postBody).encode('utf-8'), url]) + return json.loads(response) + + +def textToSpeech(text, targetFile): + if args.google_key: + responseJson = postJson( + 'https://texttospeech.googleapis.com/v1beta1/text:synthesize?key=' + args.google_key, + { + 'audioConfig': { + 'audioEncoding': 'MP3', + 'speakingRate': 1.0, + 'pitch': 2.0, # Default is 0.0 + 'sampleRateHertz': 44100, + 'effectsProfileId': [ 'small-bluetooth-speaker-class-device' ] + }, + 'voice': googleVoiceByLang[args.lang], + 'input': { 'text': text } + } + ) + + mp3Data = base64.b64decode(responseJson['audioContent']) + + with open(targetFile, 'wb') as f: + f.write(mp3Data) + else: + subprocess.call([ 'say', '-v', sayVoiceByLang[args.lang], '-o', 'temp.aiff', text ]) + subprocess.call([ 'ffmpeg', '-y', '-i', 'temp.aiff', '-acodec', 'libmp3lame', '-ab', '128k', '-ac', '1', targetFile ]) + os.remove('temp.aiff') + + +def addLeadInMessage(inputPath, outputPath): + global mp3FileIndex + + if not os.path.exists(inputPath): + fail('Input does not exist: ' + os.path.abspath(inputPath)) + + if os.path.isdir(inputPath): + if os.path.exists(outputPath): + if not os.path.isdir(outputPath): + fail('Input is a directory, but output isn\'t: ' + os.path.abspath(outputPath)) + elif not args.dry_run: + os.mkdir(outputPath) + + mp3FileIndex = 0 + for child in sorted(os.listdir(inputPath)): + addLeadInMessage(os.path.join(inputPath, child), os.path.join(outputPath, child)) + + return + + inputFileNameSplit = os.path.splitext(os.path.basename(inputPath)) + inputFileName = inputFileNameSplit[0] + inputFileExt = inputFileNameSplit[1].lower() + + if inputFileExt != '.mp3': + print('Ignoring {} (no mp3 file)'.format(os.path.abspath(inputPath))) + return + + if args.add_numbering: + outputPathSplit = os.path.split(outputPath) + outputPath = os.path.join(outputPathSplit[0], '{:0>3}_{}'.format(mp3FileIndex + 1, outputPathSplit[1])) + mp3FileIndex += 1 + + if os.path.isfile(outputPath): + print('Skipping {} (file already exists)'.format(os.path.abspath(outputPath))) + return + + text = re.sub(fileRegex, titlePattern, inputFileName).replace('_', ' ').strip() + print('Adding lead-in "{}" to {}'.format(text, os.path.abspath(outputPath))) + + if not args.dry_run: + tempLeadInFile = 'temp-lead-in.mp3' + textToSpeech(text, tempLeadInFile) + + subprocess.call([ 'ffmpeg', '-i', 'concat:{}|{}'.format(tempLeadInFile, inputPath), '-acodec', 'copy', outputPath, '-map_metadata', '0:1' ]) + + os.remove(tempLeadInFile) + print('\n') + + +if not os.path.exists(args.output) and not args.dry_run: + outputParent = os.path.dirname(args.output) + if not os.path.isdir(outputParent): + fail('Parent of output is no directory: ' + os.path.abspath(outputParent)) + +addLeadInMessage(args.input, args.output)