From 63a53df6926717a9427e4a47aeaa18daf09cc32d Mon Sep 17 00:00:00 2001 From: Kedar Date: Thu, 27 Jul 2023 18:50:34 +0530 Subject: [PATCH] Update mysqldumpsplitter.sh First commit to v8 --- mysqldumpsplitter.sh | 405 ++++++++++++------------------------------- 1 file changed, 111 insertions(+), 294 deletions(-) diff --git a/mysqldumpsplitter.sh b/mysqldumpsplitter.sh index 863d39b..b6f0128 100644 --- a/mysqldumpsplitter.sh +++ b/mysqldumpsplitter.sh @@ -1,6 +1,6 @@ -#!/bin/sh +#!/bin/bash -# Current Version: 6.1 +# Current Version: 8 # Extracts database, table, all databases, all tables or tables matching on regular expression from the mysqldump. # Includes output compression options. # By: Kedar Vaijanapurkar @@ -9,30 +9,12 @@ # Follow GIT: https://github.com/kedarvj/mysqldumpsplitter/ ## Version Info: -# Ver. 1.0: Feb 11, 2010 -# ... Initial version extract table(s) based on name, regexp or all of them from database-dump. -# Ver. 2.0: Feb, 2015 -# ... Added database extract and compression -# Ver. 3.0: March, 2015 -# ... Complete rewrite. -# ... Extract all databases. -# Ver. 4.0: March, 2015 -# ... More validations and bug fixes. -# ... Support for config file. -# ... Detecting source dump types (compressed/sql). -# ... Support for compressed backup and bz2 format. -# Credit: Andrzej Wroblewski (andrzej.wroblewski@packetstorm.pl) for his inputs on compressed backup & bz2 support. -# Ver. 5.0: Apr, 2015 -# ... Describing the dump, listing all databases and tables -# ... Extracting one or more tables from single database -# Ver. 6.1: Oct, 2015 -# ... Bug fixing in REGEXP extraction functionlity -# ... Bug fixing in describe functionality -# ... Preserving time_zone & charset env settings in extracted sqls. -# Credit: @PeterTheDBA helped understanding the possible issues with environment variable settings included in first 17 lines of mysqldump. -## +# Ver. 8: Apr 30, 2023 +# ... Faster mysqldumpsplitter +# IMPORTANT +# This is still ongoing work and not all the functionalities are in place. Aim is to optimize the existing mysqldumpsplitter flow. +# -# ToDo: Work with straming input ## Formating Colour # Text color variables txtund=$(tput sgr 0 1) # Underline @@ -58,191 +40,98 @@ COMPRESSION='gzip'; DECOMPRESSION='cat'; VERSION=6.1 -## Usage Description -usage() + +# Create a directory to store the extracted databases and tables +mkdir -p extracted_data + +# Initialize variables to hold the current database and table names +current_db="" +current_table="" +ignore_db_filter=1 # if only specific database needs to be downloaded +ignore_db_table_filter=1 # database and table need to be extracted +dump_splitter() { - echo "\n\t\t\t\t\t\t\t${txtgrn}${txtund}************ Usage ************ \n"${txtrst}; - echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|DBTABLES|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression [gzip|pigz|bzip2|xz|pxz|none] --decompression [gzip|pigz|bzip2|xz|pxz|none] --output_dir [path to output dir] [--config /path/to/config] ${txtrst}" - echo "${txtund} ${txtrst}" - echo "OPTIONS:" - echo "${txtund} ${txtrst}" - echo " --source: mysqldump filename to process. It could be a compressed or regular file." - echo " --desc: This option will list out all databases and tables." - echo " --extract: Specify what to extract. Possible values DB, TABLE, ALLDBS, ALLTABLES, REGEXP" - echo " --match_str: Specify match string for extract command option." - echo " --compression: gzip/pigz/bzip2/xz/pxz/none (default: gzip). Extracted file will be of this compression." - echo " --decompression: gzip/pigz/bzip2/xz/pxz/none (default: gzip). This will be used against input file." - echo " --output_dir: path to output dir. (default: ./out/)" - echo " --config: path to config file. You may use --config option to specify the config file that includes following variables." - echo "\t\tSOURCE= -\t\tEXTRACT= -\t\tCOMPRESSION= -\t\tDECOMPRESSION= -\t\tOUTPUT_DIR= -\t\tMATCH_STR= -" - echo "${txtund} ${txtrst}" - echo "Ver. $VERSION" - exit 0; -} +# Loop through each line in the dump file +cat $SOURCE | while read -r line; do + # Check if the current line defines a new database + if echo "$line" | grep -Eqwi "^-- Current Database"; then + # Extract the database name from the line + db=$(echo $line | sed -E "s/.*\`(.+)\`.*/\1/") -## Parsing and processing input -parse_result() -{ + # Update the current database name + current_db=$db + + # Reset to OFF + ignore_db_table_filter=1 + ignore_db_filter=1 - ## Validate SOURCE is provided and exists - if [ -z $SOURCE ]; then - echo "${txtred}ERROR: Source file not specified or does not exist. (Entered: $SOURCE)${txtrst}" - echo "${txtgrn}* Make sure --source is first argument. ${txtrst}"; - exit 2; - elif [ ! -f $SOURCE ]; then - echo "${txtred}ERROR: Source file does not exist. (Entered: $SOURCE)${txtrst}" - exit 2; + echo "${txtwht}Current Database $current_db ${txtrst}" + echo "---------------------------------------" + + if [[ $EXTRACT == 'DB' ]]; then + if [[ $db != $MATCH_STR ]]; then + echo "${txtred}Ignoring Database $db ${txtrst}" + ignore_db_filter=1 + continue; + else + echo "${txtgrn}Extracting Database $db ${txtrst}" + ignore_db_filter=0 + fi + # If this database needs to be ignored, continue to next line + [[ $ignore_db_filter == 1 ]] && continue; fi - - ## Parse Extract Operation - case $EXTRACT in - ALLDBS|ALLTABLES|DESCRIBE ) - if [ "$MATCH_STR" != '' ]; then - echo "${txtylw}Ignoring option --match_string.${txtrst}" - fi; - ;; - DB|TABLE|REGEXP|DBTABLE) - if [ "$MATCH_STR" = '' ]; then - echo "${txtred}ERROR: Expecting input for option --match_string.${txtrst}" - exit 1; - fi; - ;; - * ) echo "${txtred}ERROR: Please specify correct option for --extract.${txtrst}" - usage; - esac; - - ## Parse compression - if [ "$COMPRESSION" = 'none' ]; then - COMPRESSION='cat'; - EXT="sql" - echo "${txtgrn}Setting no compression.${txtrst}"; - elif [ "$COMPRESSION" = 'pigz' ]; then - which $COMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; - COMPRESSION="gzip"; - fi; - echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; - EXT="sql.gz" - elif [ "$COMPRESSION" = 'bzip2' ]; then - which $COMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; - COMPRESSION="gzip"; - fi; - echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; - EXT="sql.bz2"; - elif [ "$COMPRESSION" = 'xz' ]; then - which $COMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; - COMPRESSION="gzip"; - fi; - echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; - EXT="sql.xz"; - elif [ "$COMPRESSION" = 'pxz' ]; then - which $COMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; - COMPRESSION="gzip"; - fi; - echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; - EXT="sql.xz"; - else - COMPRESSION='gzip'; - echo "${txtgrn}Setting compression $COMPRESSION (default).${txtrst}"; - EXT="sql.gz" - fi; + fi - ## Parse decompression - if [ "$DECOMPRESSION" = 'none' ]; then - DECOMPRESSION='cat'; - echo "${txtgrn}Setting no decompression.${txtrst}"; - elif [ "$DECOMPRESSION" = 'pigz' ]; then - which $DECOMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; - DECOMPRESSION="gzip -d -c"; - else - DECOMPRESSION="pigz -d -c"; - fi; - echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; - elif [ "$DECOMPRESSION" = 'bzip2' ]; then - which $DECOMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; - DECOMPRESSION="gzip -d -c"; - else - DECOMPRESSION="bzip2 -d -c"; - fi; - echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; - elif [ "$DECOMPRESSION" = 'xz' ]; then - which $DECOMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; - DECOMPRESSION="gzip -d -c"; - else - DECOMPRESSION="xz -d -c"; - fi; - echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; - elif [ "$DECOMPRESSION" = 'pxz' ]; then - which $DECOMPRESSION &>/dev/null - if [ $? -ne 0 ]; then - echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; - DECOMPRESSION="gzip -d -c"; - else - DECOMPRESSION="pxz -d -c"; - fi; - echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; - else - DECOMPRESSION="gzip -d -c"; - echo "${txtgrn}Setting decompression $DECOMPRESSION (default).${txtrst}"; - fi; + # Check if the current line defines a new table + if echo "$line" | grep -Eqwi "^-- Table structure for table"; then + # Extract the table name from the line + table=$(echo $line | sed -E "s/-- Table structure for table.*\`(.+)\`.*/\1/") + # Set the current table name + current_table=$table - ## Verify file type: - filecommand=`file $SOURCE` - echo $filecommand | grep "compressed" 1>/dev/null - if [ `echo $?` -eq 0 ] - then - echo "${txtylw}File $SOURCE is a compressed dump.${txtrst}" - if [ "$DECOMPRESSION" = 'cat' ]; then - echo "${txtred} The input file $SOURCE appears to be a compressed dump. \n While the decompression is set to none.\n Please specify ${txtund}--decompression [gzip|bzip2|pigz|xz|pxz]${txtrst}${txtred} argument.${txtrst}"; - exit 1; - fi; - else - echo "${txtylw}File $SOURCE is a regular dump.${txtrst}" - if [ "$DECOMPRESSION" != 'cat' ]; then - echo "${txtred} Default decompression method for source is gzip. \n The input file $SOURCE does not appear a compressed dump. \n ${txtylw}We will try using no decompression. Please consider specifying ${txtund}--decompression none${txtrst}${txtylw} argument.${txtrst}"; - DECOMPRESSION='cat'; ## Auto correct decompression to none for regular files. - fi; - fi; + if [[ $EXTRACT == 'DBTABLE' ]]; then + MATCH_DB=`echo $MATCH_STR | awk -F "." {'print $1'}` + MATCH_TBLS=`echo $MATCH_STR | awk -F "." {'print $2'}` + if [[ $db != $MATCH_DB ]]; then + ignore_db_filter=1 + ignore_db_table_filter=1 + echo "${txtred} Ignoring Table $db.$table ${txtrst} --- db=$ignore_db_filter dbtbl=$ignore_db_table_filter " + continue; + else + # Create a directory for the current database + mkdir -p extracted_data/$db + if [[ $table != $MATCH_TBLS ]]; then + ignore_db_table_filter=1 + ignore_db_filter=1 + echo "${txtred}Ignoring Table $db.$table ${txtrst} --- db=$ignore_db_filter dbtbl=$ignore_db_table_filter" + else + ignore_db_filter=0 + ignore_db_table_filter=0 + echo "${txtgrn}Extracting Table $db.$table ${txtrst}--- db=$ignore_db_filter dbtbl=$ignore_db_table_filter" + fi + fi + fi + fi + # Ignore if for DB, ignore_db_filter is ON (1) + [[ $ignore_db_filter == 1 && $EXTRACT == 'DB' ]] && continue; - # Output directory - if [ "$OUTPUT_DIR" = "" ]; then - OUTPUT_DIR="out"; - fi; - mkdir -p $OUTPUT_DIR - if [ $? -eq 0 ]; then - echo "${txtgrn}Setting output directory: $OUTPUT_DIR.${txtrst}"; - else - echo "${txtred}ERROR:Issue while checking output directory: $OUTPUT_DIR.${txtrst}"; - exit 2; - fi; + # Ignore if for DB, ignore_db_table_filter is ON (1) + [[ $ignore_db_table_filter == 1 && $EXTRACT == 'DBTABLE' ]] && continue; -echo "${txtylw}Processing: Extract $EXTRACT $MATCH_STR from $SOURCE with compression option as $COMPRESSION and output location as $OUTPUT_DIR${txtrst}"; + # Write the current line to the SQL file for the current database and table + echo "$line" >> extracted_data/$current_db/$current_table.sql + continue +done } + +## + # Include first 17 lines of full mysqldump - preserve time_zone/charset/environment variables. include_dump_info() { @@ -259,103 +148,33 @@ include_dump_info() echo "" | $COMPRESSION >> $OUTPUT_DIR/$MATCH_STR.$EXT } -## Actual dump splitting -dump_splitter() + +## Usage Description +usage() { - case $EXTRACT in - DB) - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $MATCH_STR - - echo "Extracting Database: $MATCH_STR"; - $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$MATCH_STR\`/,/^-- Current Database: /p" | $COMPRESSION >> $OUTPUT_DIR/$MATCH_STR.$EXT - echo "${txtbld} Database $MATCH_STR extracted from $SOURCE at $OUTPUT_DIR${txtrst}" - ;; - - TABLE) - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $MATCH_STR - - #Loop for each tablename found in provided dumpfile - echo "Extracting $MATCH_STR." - #Extract table specific dump to tablename.sql - $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$MATCH_STR\`/,/^-- Table structure for table/p" | $COMPRESSION >> $OUTPUT_DIR/$MATCH_STR.$EXT - echo "${txtbld} Table $MATCH_STR extracted from $SOURCE at $OUTPUT_DIR${txtrst}" - ;; - - ALLDBS) - for dbname in $($DECOMPRESSION $SOURCE | grep -aE "^-- Current Database: " | awk -F"\`" {'print $2'}) - do - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $dbname - - echo "Extracting Database $dbname..." - #Extract database specific dump to database.sql.gz - $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$dbname\`/,/^-- Current Database: /p" | $COMPRESSION >> $OUTPUT_DIR/$dbname.$EXT - DB_COUNT=$((DB_COUNT+1)) - echo "${txtbld}Database $dbname extracted from $SOURCE at $OUTPUT_DIR/$dbname.$EXT${txtrst}" - done; - echo "${txtbld}Total $DB_COUNT databases extracted.${txtrst}" - ;; - - ALLTABLES) - - for tablename in $($DECOMPRESSION $SOURCE | grep -a "Table structure for table " | awk -F"\`" {'print $2'}) - do - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $tablename - - #Extract table specific dump to tablename.sql - $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION >> $OUTPUT_DIR/$tablename.$EXT - TABLE_COUNT=$((TABLE_COUNT+1)) - echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}" - done; - echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}" - ;; - REGEXP) - - TABLE_COUNT=0; - for tablename in $($DECOMPRESSION $SOURCE | grep -aE "Table structure for table \`$MATCH_STR" | awk -F"\`" {'print $2'}) - do - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $tablename - - echo "Extracting $tablename..." - #Extract table specific dump to tablename.sql - $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION >> $OUTPUT_DIR/$tablename.$EXT - echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}" - TABLE_COUNT=$((TABLE_COUNT+1)) - done; - echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}" - ;; - - DBTABLE) - - MATCH_DB=`echo $MATCH_STR | awk -F "." {'print $1'}` - MATCH_TBLS=`echo $MATCH_STR | awk -F "." {'print $2'}` - if [ "$MATCH_TBLS" = "*" ]; then - MATCH_TBLS=''; - fi; - TABLE_COUNT=0; - - for tablename in $( $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$MATCH_DB\`/,/^-- Current Database: /p" | grep -aE "^-- Table structure for table \`$MATCH_TBLS" | awk -F '\`' {'print $2'} ) - do - echo "Extracting $tablename..." - #Extract table specific dump to tablename.sql - # Include first 17 lines of standard mysqldump to preserve time_zone and charset. - include_dump_info $tablename - - $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$MATCH_DB\`/,/^-- Current Database: /p" | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION >> $OUTPUT_DIR/$tablename.$EXT - echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}" - TABLE_COUNT=$((TABLE_COUNT+1)) - done; - echo "${txtbld}Total $TABLE_COUNT tables extracted from $MATCH_DB.${txtrst}" - ;; - - *) echo "Wrong option, exiting."; - usage; - exit 1;; - esac + echo "\n\t\t\t\t\t\t\t${txtgrn}${txtund}************ Usage ************ \n"${txtrst}; + echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|DBTABLES|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression [gzip|pigz|bzip2|xz|pxz|none] --decompression [gzip|pigz|bzip2|xz|pxz|none] --output_dir [path to output dir] [--config /path/to/config] ${txtrst}" + echo "${txtund} ${txtrst}" + echo "OPTIONS:" + echo "${txtund} ${txtrst}" + echo " --source: mysqldump filename to process. It could be a compressed or regular file." + echo " --desc: This option will list out all databases and tables." + echo " --extract: Specify what to extract. Possible values DB, TABLE, ALLDBS, ALLTABLES, REGEXP" + echo " --match_str: Specify match string for extract command option." + echo " --compression: gzip/pigz/bzip2/xz/pxz/none (default: gzip). Extracted file will be of this compression." + echo " --decompression: gzip/pigz/bzip2/xz/pxz/none (default: gzip). This will be used against input file." + echo " --output_dir: path to output dir. (default: ./out/)" + echo " --config: path to config file. You may use --config option to specify the config file that includes following variables." + echo -e "\t\tSOURCE= +\t\tEXTRACT= +\t\tCOMPRESSION= +\t\tDECOMPRESSION= +\t\tOUTPUT_DIR= +\t\tMATCH_STR= +" + echo "${txtund} ${txtrst}" + echo "Ver. $VERSION" + exit 0; } missing_arg() @@ -409,7 +228,7 @@ while [ "$1" != "" ]; do echo "-------------------------------"; echo "Database\t\tTables"; echo "-------------------------------"; - $DECOMPRESSION $SOURCE | grep -aE "(^-- Current Database:|^-- Table structure for table)" | sed 's/-- Current Database: /-------------------------------\n/' | sed 's/-- Table structure for table /\t\t/'| sed 's/`//g' ; + $DECOMPRESSION $SOURCE | grep -aE "(^-- Current Database:|^-- Table structure for table)" | sed "s/-- Current Database: /-------------------------------\n/" | sed 's/-- Table structure for table /\t\t/' | sed 's/`//g' ; echo "-------------------------------"; exit 0; ;; @@ -432,6 +251,4 @@ while [ "$1" != "" ]; do shift done -parse_result dump_splitter -exit 0;