diff --git a/mysqldumpsplitter.sh b/mysqldumpsplitter.sh index a54af68..8cfc5ca 100644 --- a/mysqldumpsplitter.sh +++ b/mysqldumpsplitter.sh @@ -1,6 +1,6 @@ #!/bin/sh -# Current Version: 3.0 +# Current Version: 4.0 # Extracts database, table, all databases, all tables or tables matching on regular expression from the mysqldump. # Includes output compression options. # By: Kedar Vaijanapurkar @@ -16,6 +16,13 @@ # Ver. 3.0: March, 2015 # ... Complete rewrite. # ... Extract all databases. +# Ver. 4.0: March, 2015 +# ... More validations and bug fixes. +# ... Detecting source dump types (compressed/sql). +# ... Configuration file support. +# ... Support for compressed backup and bz2 format. +# Credit: Andrzej Wroblewski (andrzej.wroblewski@packetstorm.pl) for his inputs on compressed backup & bz2 support. +# ## # ToDo: Work with straming input @@ -33,30 +40,41 @@ txtwht=$(tput setaf 7) # White txtrst=$(tput sgr0) # Text reset ## Variable Declaration -SOURCE_DUMP=''; -OBJECT_NAME=''; +SOURCE=''; +MATCH_STR=''; EXTRACT=''; -OUT_DIR='out'; +OUTPUT_DIR='out'; EXT="sql.gz"; TABLE_NAME=''; DB_NAME=''; - +COMPRESSION='gzip'; +DECOMPRESSION='cat'; +VERSION=4.0 ## Usage Description usage() { - echo "${txtgrn}${txtund}************ Usage ************ "${txtrst}; - echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression=[gzip|pigz|none] --output_dir=[path to output dir] ${txtrst}" + echo "\n\t\t\t\t\t\t\t${txtgrn}${txtund}************ Usage ************ \n"${txtrst}; + echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression [gzip|pigz|bzip2|none] --decompression [gzip|pigz|bzip2|none] --output_dir [path to output dir] [--config /path/to/config] ${txtrst}" echo "${txtund} ${txtrst}" echo "OPTIONS:" echo "${txtund} ${txtrst}" - echo " --source: mysqldump filename (with path) to process" + echo " --source: mysqldump filename to process. It could be a compressed or regular file." echo " --extract: Specify what to extract. Possible values DB, TABLE, ALLDBS, ALLTABLES, REGEXP" echo " --match_str: Specify match string for extract command option." - echo " --compression: gzip/pigz/no_compression (default: gzip) " + echo " --compression: gzip/pigz/bzip2/none (default: gzip). Extracted file will be of this compression." + echo " --decompression: gzip/pigz/bzip2/none (default: gzip). This will be used against input file." echo " --output_dir: path to output dir. (default: ./out/)" - echo "" - echo "${txtund} ${txtrst}" + echo " --config: path to config file. You may use --config option to specify the config file that includes following variables." + echo "\t\tSOURCE= +\t\tEXTRACT= +\t\tCOMPRESSION= +\t\tDECOMPRESSION= +\t\tOUTPUT_DIR= +\t\tMATCH_STR= +" + echo "${txtund} ${txtrst}" + echo "Ver. $VERSION" exit 0; } @@ -64,19 +82,24 @@ usage() parse_result() { - ## Validate SOURCE_DUMP is provided and exists - if [ -z $SOURCE_DUMP ]; then - echo "${txtred}ERROR: Source file not specified or does not exist. (Entered: $SOURCE_DUMP)${txtrst}" - elif [ ! -f $SOURCE_DUMP ]; then - echo "${txtred}ERROR: Source file does not exist. (Entered: $SOURCE_DUMP)${txtrst}" + + ## Validate SOURCE is provided and exists + if [ -z $SOURCE ]; then + echo "${txtred}ERROR: Source file not specified or does not exist. (Entered: $SOURCE)${txtrst}" + elif [ ! -f $SOURCE ]; then + echo "${txtred}ERROR: Source file does not exist. (Entered: $SOURCE)${txtrst}" exit 2; fi ## Parse Extract Operation case $EXTRACT in - ALLDBS|ALLTABLES|REGEXP ) ;; + ALLDBS|ALLTABLES ) + if [ "$MATCH_STR" != '' ]; then + echo "${txtylw}Ignoring option --match_string.${txtrst}" + fi; + ;; DB|TABLE|REGEXP) - if [ "$OBJECT_NAME" = '' ]; then + if [ "$MATCH_STR" = '' ]; then echo "${txtred}ERROR: Expecting input for option --match_string.${txtrst}" exit 1; fi; @@ -91,29 +114,88 @@ parse_result() EXT="sql" echo "${txtgrn}Setting no compression.${txtrst}"; elif [ "$COMPRESSION" = 'pigz' ]; then - which $COMPRESSION + which $COMPRESSION &>/dev/null if [ $? -ne 0 ]; then echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; COMPRESSION="gzip"; fi; echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; EXT="sql.gz" + elif [ "$COMPRESSION" = 'bzip2' ]; then + which $COMPRESSION &>/dev/null + if [ $? -ne 0 ]; then + echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}"; + COMPRESSION="gzip"; + fi; + echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}"; + EXT="sql.bz2"; else COMPRESSION='gzip'; - echo "${txtgrn}Using default compression $COMPRESSION.${txtrst}"; + echo "${txtgrn}Setting compression $COMPRESSION (default).${txtrst}"; EXT="sql.gz" fi; - # Output directory - mkdir -p $OUT_DIR - if [ $? -eq 0 ]; then - echo "${txtgrn}Settingup output directory: $OUT_DIR.${txtrst}"; + + ## Parse decompression + if [ "$DECOMPRESSION" = 'none' ]; then + DECOMPRESSION='cat'; + echo "${txtgrn}Setting no decompression.${txtrst}"; + elif [ "$DECOMPRESSION" = 'pigz' ]; then + which $DECOMPRESSION &>/dev/null + if [ $? -ne 0 ]; then + echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; + DECOMPRESSION="gzip -d -c"; + else + DECOMPRESSION="pigz -d -c"; + fi; + echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; + elif [ "$DECOMPRESSION" = 'bzip2' ]; then + which $DECOMPRESSION &>/dev/null + if [ $? -ne 0 ]; then + echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}"; + DECOMPRESSION="gzip -d -c"; + else + DECOMPRESSION="bzip2 -d -c"; + fi; + echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}"; + else + DECOMPRESSION="gzip -d -c"; + echo "${txtgrn}Setting decompression $DECOMPRESSION (default).${txtrst}"; + fi; + + + ## Verify file type: + filecommand=`file $SOURCE` + echo $filecommand | grep "compressed" 1>/dev/null + if [ `echo $?` -eq 0 ] + then + echo "${txtylw}File $SOURCE is a compressed dump.${txtrst}" + if [ "$DECOMPRESSION" = 'cat' ]; then + echo "${txtred} The input file $SOURCE appears to be a compressed dump. \n While the decompression is set to none.\n Please specify ${txtund}--decompression [gzip|bzip2|pigz]${txtrst}${txtred} argument.${txtrst}"; + exit 1; + fi; else - echo "${txtred}ERROR:Issue while checking output directory: $OUT_DIR.${txtrst}"; + echo "${txtylw}File $SOURCE is a regular dump.${txtrst}" + if [ "$DECOMPRESSION" != 'cat' ]; then + echo "${txtred} Default decompression method for source is gzip. \n The input file $SOURCE does not appear a compressed dump. \n ${txtylw}We will try using no decompression. Please consider specifying ${txtund}--decompression none${txtrst}${txtylw} argument.${txtrst}"; + DECOMPRESSION='cat'; ## Auto correct decompression to none for regular files. + fi; + fi; + + + # Output directory + if [ "$OUTPUT_DIR" = "" ]; then + OUTPUT_DIR="out"; + fi; + mkdir -p $OUTPUT_DIR + if [ $? -eq 0 ]; then + echo "${txtgrn}Setting output directory: $OUTPUT_DIR.${txtrst}"; + else + echo "${txtred}ERROR:Issue while checking output directory: $OUTPUT_DIR.${txtrst}"; exit 2; fi; -echo "${txtylw}Processing: Extract $OBJECT_NAME from $SOURCE_DUMP with compression option as $COMPRESSION and output location as $OUT_DIR${txtrst}"; +echo "${txtylw}Processing: Extract $EXTRACT $MATCH_STR from $SOURCE with compression option as $COMPRESSION and output location as $OUTPUT_DIR${txtrst}"; } @@ -122,55 +204,57 @@ dump_splitter() { case $EXTRACT in DB) - echo "Extracting Database: $OBJECT_NAME"; - sed -n "/^-- Current Database: \`$OBJECT_NAME\`/,/^-- Current Database: /p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$OBJECT_NAME.$EXT - echo "${txtbld} Database $OBJECT_NAME extracted from $SOURCE_DUMP at $OUTFILE${txtrst}" + echo "Extracting Database: $MATCH_STR"; + $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$MATCH_STR\`/,/^-- Current Database: /p" | $COMPRESSION > $OUTPUT_DIR/$MATCH_STR.$EXT + echo "${txtbld} Database $MATCH_STR extracted from $SOURCE at $OUTPUT_DIR${txtrst}" ;; TABLE) #Loop for each tablename found in provided dumpfile - echo "Extracting $OBJECT_NAME." + echo "Extracting $MATCH_STR." #Extract table specific dump to tablename.sql - sed -n "/^-- Table structure for table \`$OBJECT_NAME\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$OBJECT_NAME.$EXT - echo "${txtbld} Table $OBJECT_NAME extracted from $SOURCE_DUMP at $OUTFILE${txtrst}" + $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$MATCH_STR\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$MATCH_STR.$EXT + echo "${txtbld} Table $MATCH_STR extracted from $SOURCE at $OUTPUT_DIR${txtrst}" ;; ALLDBS) - for dbname in $(grep -E "^-- Current Database: " $SOURCE_DUMP| awk -F"\`" {'print $2'}) + for dbname in $($DECOMPRESSION $SOURCE | grep -E "^-- Current Database: " | awk -F"\`" {'print $2'}) do echo "Extracting Database $dbname..." #Extract database specific dump to database.sql.gz - sed -n "/^-- Current Database: \`$dbname\`/,/^-- Current Database: /p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$dbname.$EXT + $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$dbname\`/,/^-- Current Database: /p" | $COMPRESSION > $OUTPUT_DIR/$dbname.$EXT DB_COUNT=$((DB_COUNT+1)) - echo "${txtbld}Database $dbname extracted from $DUMP_FILE at $OUT_DIR/$dbname.$EXT${txtrst}" + echo "${txtbld}Database $dbname extracted from $SOURCE at $OUTPUT_DIR/$dbname.$EXT${txtrst}" done; echo "${txtbld}Total $DB_COUNT databases extracted.${txtrst}" ;; ALLTABLES) - for tablename in $(grep "Table structure for table " $SOURCE_DUMP | awk -F"\`" {'print $2'}) + for tablename in $($DECOMPRESSION $SOURCE | grep "Table structure for table " | awk -F"\`" {'print $2'}) do #Extract table specific dump to tablename.sql - sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$tablename.$EXT + $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$tablename.$EXT TABLE_COUNT=$((TABLE_COUNT+1)) - echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUT_DIR/$tablename.$EXT${txtrst}" + echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}" done; echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}" ;; REGEXP) TABLE_COUNT=0; - for tablename in $(grep -E "Table structure for table \`$OBJECT_NAME" $SOURCE_DUMP| awk -F"\`" {'print $2'}) + for tablename in $(grep -E "Table structure for table \`$MATCH_STR" $SOURCE| awk -F"\`" {'print $2'}) do echo "Extracting $tablename..." #Extract table specific dump to tablename.sql - sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$tablename.$EXT - echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUT_DIR/$tablename.$EXT${txtrst}" + $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$tablename.$EXT + echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}" TABLE_COUNT=$((TABLE_COUNT+1)) done; echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}" ;; - *) echo "Wrong option, exiting.";; + *) echo "Wrong option, exiting."; + usage; + exit 1;; esac } @@ -180,6 +264,10 @@ missing_arg() exit 1; } +if [ "$#" -eq 0 ]; then + usage; + exit 1; +fi # Accepts Parameters while [ "$1" != "" ]; do @@ -188,7 +276,7 @@ while [ "$1" != "" ]; do if [ -z $1 ]; then missing_arg --source fi; - SOURCE_DUMP=$1 ;; + SOURCE=$1 ;; --extract|-E ) shift if [ -z $1 ]; then missing_arg --extract @@ -196,21 +284,35 @@ while [ "$1" != "" ]; do EXTRACT=$1 ;; --compression|-C ) shift if [ -z $1 ]; then - missing_arg --source + missing_arg --compression fi; COMPRESSION=$1 ;; + --decompression|-D) shift + if [ -z $1 ]; then + missing_arg --decompression + fi; + DECOMPRESSION=$1 ;; --output_dir|-O ) shift if [ -z $1 ]; then - missing_arg --source + missing_arg --output_dir fi; - OUT_DIR=$1 ;; + OUTPUT_DIR=$1 ;; --match_str|-M ) shift if [ -z $1 ]; then missing_arg --match_str fi; - OBJECT_NAME=$1 ;; + MATCH_STR=$1 ;; + --config ) shift; + if [ -z $1 ]; then + missing_arg --config + fi; + if [ ! -f $1 ]; then + echo "${txtred}ERROR: Config file $1 does not exist.${txtrst}" + exit 2; + fi; +. ./$1 ;; -h ) usage - exit ;; + exit ;; * ) echo ""; usage exit 1 @@ -220,4 +322,4 @@ done parse_result dump_splitter -exit 0; \ No newline at end of file +exit 0;