Compressed backup, bz2 & config file support.

More validations and bug fixes. Detecting source dump types (compressed/sql). Configuration file support. Support for compressed backup and bz2 format.
2025-08-21 15:52:00 +02:00 · 2015-03-15 17:14:33 +05:30
parent 4e696ae411
commit d26b0a4dad
1 changed files with 151 additions and 49 deletions
--- a/mysqldumpsplitter.sh
+++ b/mysqldumpsplitter.sh
@@ -1,6 +1,6 @@
 #!/bin/sh

-# Current Version: 3.0
+# Current Version: 4.0
 # Extracts database, table, all databases, all tables or tables matching on regular expression from the mysqldump.
 # Includes output compression options.
 # By: Kedar Vaijanapurkar
@@ -16,6 +16,13 @@
 # Ver. 3.0: March, 2015
 # ... Complete rewrite. 
 # ... Extract all databases.
+# Ver. 4.0: March, 2015
+# ... More validations and bug fixes.
+# ... Detecting source dump types (compressed/sql).
+# ... Configuration file support.
+# ... Support for compressed backup and bz2 format.
+# Credit: Andrzej Wroblewski (andrzej.wroblewski@packetstorm.pl) for his inputs on compressed backup & bz2 support.
+# 
 ##

 # ToDo: Work with straming input
@@ -33,30 +40,41 @@ txtwht=$(tput setaf 7)    # White
 txtrst=$(tput sgr0)       # Text reset

 ## Variable Declaration
-SOURCE_DUMP='';
-OBJECT_NAME='';
+SOURCE='';
+MATCH_STR='';
 EXTRACT='';
-OUT_DIR='out';
+OUTPUT_DIR='out';
 EXT="sql.gz";
 TABLE_NAME='';
 DB_NAME='';
-
+COMPRESSION='gzip';
+DECOMPRESSION='cat';
+VERSION=4.0

 ## Usage Description
 usage()
 {
-	echo "${txtgrn}${txtund}************ Usage ************ "${txtrst};
-	echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression=[gzip|pigz|none] --output_dir=[path to output dir] ${txtrst}" 
+	echo "\n\t\t\t\t\t\t\t${txtgrn}${txtund}************ Usage ************ \n"${txtrst};
+	echo "${txtgrn}sh mysqldumpsplitter.sh --source filename --extract [DB|TABLE|ALLDBS|ALLTABLES|REGEXP] --match_str string --compression [gzip|pigz|bzip2|none] --decompression [gzip|pigz|bzip2|none] --output_dir [path to output dir] [--config /path/to/config] ${txtrst}" 
 	echo "${txtund}                                                    ${txtrst}"	
 	echo "OPTIONS:"
 	echo "${txtund}                                                    ${txtrst}"	
-	echo "	--source: mysqldump filename (with path) to process"
+	echo "	--source: mysqldump filename to process. It could be a compressed or regular file."
 	echo "	--extract: Specify what to extract. Possible values DB, TABLE, ALLDBS, ALLTABLES, REGEXP"
 	echo "	--match_str: Specify match string for extract command option."
-	echo "	--compression: gzip/pigz/no_compression (default: gzip) "
+	echo "	--compression: gzip/pigz/bzip2/none (default: gzip). Extracted file will be of this compression."
+	echo "	--decompression: gzip/pigz/bzip2/none (default: gzip). This will be used against input file."
 	echo "	--output_dir: path to output dir. (default: ./out/)"
-	echo ""
-	echo "${txtund}                                                    ${txtrst}"	
+	echo "	--config: path to config file. You may use --config option to specify the config file that includes following variables."
+	echo "\t\tSOURCE=
+\t\tEXTRACT=
+\t\tCOMPRESSION=
+\t\tDECOMPRESSION=
+\t\tOUTPUT_DIR=
+\t\tMATCH_STR=
+"
+	echo "${txtund}                                                    ${txtrst}"
+	echo "Ver. $VERSION"	
 	exit 0;
 }

@@ -64,19 +82,24 @@ usage()
 parse_result()
 {

-	## Validate SOURCE_DUMP is provided and exists
-	if [ -z $SOURCE_DUMP ]; then
-	    echo "${txtred}ERROR: Source file not specified or does not exist. (Entered: $SOURCE_DUMP)${txtrst}"
-	elif [ ! -f $SOURCE_DUMP ]; then
-	    echo "${txtred}ERROR: Source file does not exist. (Entered: $SOURCE_DUMP)${txtrst}"
+
+	## Validate SOURCE is provided and exists
+	if [ -z $SOURCE ]; then
+	    echo "${txtred}ERROR: Source file not specified or does not exist. (Entered: $SOURCE)${txtrst}"
+	elif [ ! -f $SOURCE ]; then
+	    echo "${txtred}ERROR: Source file does not exist. (Entered: $SOURCE)${txtrst}"
 	    exit 2;
 	fi

 	## Parse Extract Operation
 	case $EXTRACT in
-		ALLDBS|ALLTABLES|REGEXP ) ;;
+		ALLDBS|ALLTABLES ) 
+			if [ "$MATCH_STR" != '' ]; then
+			    echo "${txtylw}Ignoring option --match_string.${txtrst}"
+			fi;
+				;;
 		DB|TABLE|REGEXP)
-			if [ "$OBJECT_NAME" = '' ]; then
+			if [ "$MATCH_STR" = '' ]; then
 			    echo "${txtred}ERROR: Expecting input for option --match_string.${txtrst}"
 			    exit 1;
 			fi;
@@ -91,29 +114,88 @@ parse_result()
 		EXT="sql"
 		echo "${txtgrn}Setting no compression.${txtrst}";
 	elif [ "$COMPRESSION" = 'pigz' ]; then
-		which $COMPRESSION
+		which $COMPRESSION &>/dev/null
 		if [ $? -ne 0 ]; then 
 			echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}";
 			COMPRESSION="gzip";
 		fi;
 		echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}";
 		EXT="sql.gz"
+	elif [ "$COMPRESSION" = 'bzip2' ]; then
+                which $COMPRESSION &>/dev/null
+                if [ $? -ne 0 ]; then
+                        echo "${txtred}WARNING:$COMPRESSION appears having issues, using default gzip.${txtrst}";
+                        COMPRESSION="gzip";
+                fi;
+                echo "${txtgrn}Setting compression as $COMPRESSION.${txtrst}";
+                EXT="sql.bz2";
 	else
 		COMPRESSION='gzip';
-		echo "${txtgrn}Using default compression $COMPRESSION.${txtrst}";
+		echo "${txtgrn}Setting compression $COMPRESSION (default).${txtrst}";
 		EXT="sql.gz"
 	fi;

-	# Output directory
-	mkdir -p $OUT_DIR
-	if [ $? -eq 0 ]; then
-		echo "${txtgrn}Settingup output directory: $OUT_DIR.${txtrst}";
+
+	## Parse  decompression
+        if [ "$DECOMPRESSION" = 'none' ]; then
+                DECOMPRESSION='cat';
+                echo "${txtgrn}Setting no decompression.${txtrst}";
+        elif [ "$DECOMPRESSION" = 'pigz' ]; then
+                which $DECOMPRESSION &>/dev/null
+                if [ $? -ne 0 ]; then
+                        echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}";
+                        DECOMPRESSION="gzip -d -c";
+                else
+                        DECOMPRESSION="pigz -d -c";
+                fi;
+                echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}";
+       elif [ "$DECOMPRESSION" = 'bzip2' ]; then
+                which $DECOMPRESSION &>/dev/null
+                if [ $? -ne 0 ]; then
+                        echo "${txtred}WARNING:$DECOMPRESSION appears having issues, using default gzip.${txtrst}";
+                        DECOMPRESSION="gzip -d -c";
+                else
+                        DECOMPRESSION="bzip2 -d -c";
+                fi;
+                echo "${txtgrn}Setting decompression as $DECOMPRESSION.${txtrst}";
+        else
+                DECOMPRESSION="gzip -d -c";
+                echo "${txtgrn}Setting decompression $DECOMPRESSION (default).${txtrst}";
+        fi;
+
+
+	## Verify file type:
+	filecommand=`file $SOURCE`
+	echo $filecommand | grep "compressed"  1>/dev/null 
+	if [ `echo $?` -eq 0 ]
+	then
+		echo "${txtylw}File $SOURCE is a compressed dump.${txtrst}"
+		if [ "$DECOMPRESSION" = 'cat' ]; then
+			echo "${txtred} The input file $SOURCE appears to be a compressed dump. \n While the decompression is set to none.\n Please specify ${txtund}--decompression [gzip|bzip2|pigz]${txtrst}${txtred} argument.${txtrst}";
+			exit 1;
+		fi;
 	else
-		echo "${txtred}ERROR:Issue while checking output directory: $OUT_DIR.${txtrst}";
+		echo "${txtylw}File $SOURCE is a regular dump.${txtrst}"
+		if [ "$DECOMPRESSION" != 'cat' ]; then
+			echo "${txtred} Default decompression method for source is gzip. \n The input file $SOURCE does not appear a compressed dump. \n ${txtylw}We will try using no decompression. Please consider specifying ${txtund}--decompression none${txtrst}${txtylw} argument.${txtrst}";
+			DECOMPRESSION='cat'; ## Auto correct decompression to none for regular files.
+		fi;
+	fi;
+
+
+	# Output directory
+	if [ "$OUTPUT_DIR" = "" ]; then
+		OUTPUT_DIR="out";
+	fi;
+	mkdir -p $OUTPUT_DIR
+	if [ $? -eq 0 ]; then
+		echo "${txtgrn}Setting output directory: $OUTPUT_DIR.${txtrst}";
+	else
+		echo "${txtred}ERROR:Issue while checking output directory: $OUTPUT_DIR.${txtrst}";
 		exit 2;
 	fi;

-echo "${txtylw}Processing: Extract $OBJECT_NAME from $SOURCE_DUMP with compression option as $COMPRESSION and output location as $OUT_DIR${txtrst}";
+echo "${txtylw}Processing: Extract $EXTRACT $MATCH_STR from $SOURCE with compression option as $COMPRESSION and output location as $OUTPUT_DIR${txtrst}";

 }

@@ -122,55 +204,57 @@ dump_splitter()
 {
 	case $EXTRACT in
 		DB) 
-			echo "Extracting Database: $OBJECT_NAME";
-			sed -n "/^-- Current Database: \`$OBJECT_NAME\`/,/^-- Current Database: /p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$OBJECT_NAME.$EXT
-			echo "${txtbld} Database $OBJECT_NAME  extracted from $SOURCE_DUMP at $OUTFILE${txtrst}"
+			echo "Extracting Database: $MATCH_STR";
+			$DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$MATCH_STR\`/,/^-- Current Database: /p" | $COMPRESSION > $OUTPUT_DIR/$MATCH_STR.$EXT
+			echo "${txtbld} Database $MATCH_STR  extracted from $SOURCE at $OUTPUT_DIR${txtrst}"
 			;;

 		TABLE) 
 			#Loop for each tablename found in provided dumpfile
-		        echo "Extracting $OBJECT_NAME."
+		        echo "Extracting $MATCH_STR."
 		        #Extract table specific dump to tablename.sql
-		        sed -n "/^-- Table structure for table \`$OBJECT_NAME\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$OBJECT_NAME.$EXT
-			echo "${txtbld} Table $OBJECT_NAME  extracted from $SOURCE_DUMP at $OUTFILE${txtrst}"
+		        $DECOMPRESSION  $SOURCE | sed -n "/^-- Table structure for table \`$MATCH_STR\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$MATCH_STR.$EXT
+			echo "${txtbld} Table $MATCH_STR  extracted from $SOURCE at $OUTPUT_DIR${txtrst}"
 		 	;;

 		ALLDBS) 
-		        for dbname in $(grep -E "^-- Current Database: " $SOURCE_DUMP| awk -F"\`" {'print $2'})
+		        for dbname in $($DECOMPRESSION $SOURCE | grep -E "^-- Current Database: " | awk -F"\`" {'print $2'})
 		        do
 		                echo "Extracting Database $dbname..."
 		                #Extract database specific dump to database.sql.gz
-		                sed -n "/^-- Current Database: \`$dbname\`/,/^-- Current Database: /p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$dbname.$EXT
+		                $DECOMPRESSION $SOURCE | sed -n "/^-- Current Database: \`$dbname\`/,/^-- Current Database: /p" | $COMPRESSION > $OUTPUT_DIR/$dbname.$EXT
 		                DB_COUNT=$((DB_COUNT+1))
-				echo "${txtbld}Database $dbname extracted from $DUMP_FILE at $OUT_DIR/$dbname.$EXT${txtrst}"
+				echo "${txtbld}Database $dbname extracted from $SOURCE at $OUTPUT_DIR/$dbname.$EXT${txtrst}"
 		        done;
 			echo "${txtbld}Total $DB_COUNT databases extracted.${txtrst}"
 			;;

 		ALLTABLES) 
-			for tablename in $(grep "Table structure for table " $SOURCE_DUMP | awk -F"\`" {'print $2'})
+			for tablename in $($DECOMPRESSION $SOURCE | grep "Table structure for table " | awk -F"\`" {'print $2'})
 			do
 				#Extract table specific dump to tablename.sql
-				sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$tablename.$EXT
+				$DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$tablename.$EXT
 				TABLE_COUNT=$((TABLE_COUNT+1))
-				echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUT_DIR/$tablename.$EXT${txtrst}"
+				echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}"
 			done;
 				echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}"
 			;;
 		REGEXP) 
 			TABLE_COUNT=0;
-		        for tablename in $(grep -E "Table structure for table \`$OBJECT_NAME" $SOURCE_DUMP| awk -F"\`" {'print $2'})
+		        for tablename in $(grep -E "Table structure for table \`$MATCH_STR" $SOURCE| awk -F"\`" {'print $2'})
 		        do
 		                echo "Extracting $tablename..."
 		                #Extract table specific dump to tablename.sql
-		                sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" $SOURCE_DUMP | $COMPRESSION > $OUT_DIR/$tablename.$EXT
-				echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUT_DIR/$tablename.$EXT${txtrst}"
+		                $DECOMPRESSION $SOURCE | sed -n "/^-- Table structure for table \`$tablename\`/,/^-- Table structure for table/p" | $COMPRESSION > $OUTPUT_DIR/$tablename.$EXT
+				echo "${txtbld}Table $tablename extracted from $DUMP_FILE at $OUTPUT_DIR/$tablename.$EXT${txtrst}"
 		                TABLE_COUNT=$((TABLE_COUNT+1))
 		        done;
 			echo "${txtbld}Total $TABLE_COUNT tables extracted.${txtrst}"
 			;;

-		*) echo "Wrong option, exiting.";;
+		*)	echo "Wrong option, exiting.";
+			usage; 
+			exit 1;;
 	esac
 }

@@ -180,6 +264,10 @@ missing_arg()
 	exit 1;
 }

+if [ "$#" -eq 0 ]; then
+	usage;
+	exit 1;
+fi

 # Accepts Parameters
 while [ "$1" != "" ]; do
@@ -188,7 +276,7 @@ while [ "$1" != "" ]; do
 		if [ -z $1 ]; then 
 			missing_arg --source
 		fi;
-		SOURCE_DUMP=$1 ;;
+		SOURCE=$1 ;;
        --extract|-E  )   shift	
 		if [ -z $1 ]; then 
 			missing_arg --extract
@@ -196,21 +284,35 @@ while [ "$1" != "" ]; do
 		EXTRACT=$1 ;;
        --compression|-C  )   shift
 		if [ -z $1 ]; then 
-			missing_arg --source
+			missing_arg --compression
 		fi;
 		COMPRESSION=$1 ;;
+	--decompression|-D) shift
+		if [ -z $1 ]; then 
+			missing_arg --decompression
+		fi;
+		DECOMPRESSION=$1 ;;		
 	--output_dir|-O  ) shift
 		if [ -z $1 ]; then 
-			missing_arg --source
+			missing_arg --output_dir
 		fi;
-		OUT_DIR=$1 ;;
+		OUTPUT_DIR=$1 ;;
 	--match_str|-M ) shift
 		if [ -z $1 ]; then 
 			missing_arg --match_str
 		fi;
-		OBJECT_NAME=$1 ;;
+		MATCH_STR=$1 ;;
+	--config	) shift;
+		if [ -z $1 ]; then 
+			missing_arg --config
+		fi;
+		if [ ! -f $1 ]; then
+		    echo "${txtred}ERROR: Config file $1 does not exist.${txtrst}"
+		    exit 2;
+		fi;
+. ./$1 ;;
        -h  )   usage
-                exit ;;
+		exit ;;
        * )     echo "";
 		usage
                exit 1
@@ -220,4 +322,4 @@ done

 parse_result
 dump_splitter
-exit 0;
+exit 0;