MDL-41838 Files: Implement .tar.gz packer (1 of 2)

A new packer for .tar.gz files (MIME type application/x-gzip) has been
implemented, including unit tests.

This packer is intended for use during backup/restore only and is not
otherwise exposed in the user interface at present. However, it is
supposed to follow the (ancient) POSIX .tar standard; files created
with this packer will open in GNU tar.

There are restrictions in support corresponding to the POSIX standard:

- Non-ASCII filenames are not supported.
- Very long file/path names are not supported.
- Individual files within the .tar may not be more than 8GB.

Some of these limitations might need to be addressed in future changes
if the packer were to be used in areas other than backup/restore.
This commit is contained in:
sam marshall 2013-09-18 17:58:27 +01:00
parent 56cc9b387e
commit c858655dde
9 changed files with 1938 additions and 6 deletions

View File

@ -221,6 +221,7 @@ $string['duplicateroleshortname'] = 'There is already a role with this short nam
$string['duplicateusername'] = 'Duplicate username - skipping record';
$string['emailfail'] = 'Emailing failed';
$string['error'] = 'Error occurred';
$string['errorprocessingarchive'] = 'Error processing archive file';
$string['errorcleaningdirectory'] = 'Error cleaning directory "{$a}"';
$string['errorcopyingfiles'] = 'Error copying files';
$string['errorcreatingdirectory'] = 'Error creating directory "{$a}"';

View File

@ -35,7 +35,19 @@ abstract class file_packer {
/**
* Archive files and store the result in file storage.
*
* @param array $files array with zip paths as keys (archivepath=>ospathname or archivepath=>stored_file)
* The key of the $files array is always the path within the archive, e.g.
* 'folder/subfolder/file.txt'. There are several options for the values of
* the array:
* - null = this entry represents a directory, so no file
* - string = full path to file within operating system filesystem
* - stored_file = file within Moodle filesystem
* - array with one string element = use in-memory string for file content
*
* For the string (OS path) and stored_file (Moodle filesystem) cases, you
* can specify a directory instead of a file to recursively include all files
* within this directory.
*
* @param array $files Array of files to archive
* @param int $contextid context ID
* @param string $component component
* @param string $filearea file area
@ -54,6 +66,18 @@ abstract class file_packer {
/**
* Archive files and store the result in os file.
*
* The key of the $files array is always the path within the archive, e.g.
* 'folder/subfolder/file.txt'. There are several options for the values of
* the array:
* - null = this entry represents a directory, so no file
* - string = full path to file within operating system filesystem
* - stored_file = file within Moodle filesystem
* - array with one string element = use in-memory string for file content
*
* For the string (OS path) and stored_file (Moodle filesystem) cases, you
* can specify a directory instead of a file to recursively include all files
* within this directory.
*
* @param array $files array with zip paths as keys (archivepath=>ospathname or archivepath=>stored_file)
* @param string $archivefile path to target zip file
* @param bool $ignoreinvalidfiles true means ignore missing or invalid files, false means abort on any error

View File

@ -38,14 +38,32 @@ interface file_progress {
* This function will be called periodically during the operation, assuming
* it is successful.
*
* The $max value will be the same for each call to progress() within an
* operation.
*
* If numbers (rather than INDETERMINATE) are provided, then:
* - The $progress value will either be the same as last call, or increased
* by some value (not necessarily 1).
* by some value (not necessarily 1)
* - The $progress value will be less than or equal to the $max value.
*
* There is no guarantee that this function will be called for every value
* in the range, or that it will be called with $progress == $max.
*
* The function may be called very frequently (e.g. after each file) or
* quite rarely (e.g. after each large file).
*
* When creating an implementation of this function, you probably want to
* do the following:
*
* 1. Check the current time and do not do anything if it's less than a
* second since the last time you reported something.
* 2. Update the PHP timeout (i.e. set it back to 2 minutes or whatever)
* so that the system will not time out.
* 3. If the progress is unchanged since last second, still display some
* output to the user. (Setting PHP timeout is not sufficient; some
* front-end servers require that data is output to the browser every
* minute or so, or they will time out on their own.)
*
* @param int $progress Current progress, or INDETERMINATE if unknown
* @param int $max Max progress, or INDETERMINATE if unknown
*/

View File

@ -49,6 +49,17 @@ class stored_file {
/** @var repository repository plugin instance */
private $repository;
/**
* @var int Indicates a file handle of the type returned by fopen.
*/
const FILE_HANDLE_FOPEN = 0;
/**
* @var int Indicates a file handle of the type returned by gzopen.
*/
const FILE_HANDLE_GZOPEN = 1;
/**
* Constructor, this constructor should be called ONLY from the file_storage class!
*
@ -381,16 +392,26 @@ class stored_file {
*
* When you want to modify a file, create a new file and delete the old one.
*
* @param int $type Type of file handle (FILE_HANDLE_xx constant)
* @return resource file handle
*/
public function get_content_file_handle() {
public function get_content_file_handle($type = self::FILE_HANDLE_FOPEN) {
$path = $this->get_content_file_location();
if (!is_readable($path)) {
if (!$this->fs->try_content_recovery($this) or !is_readable($path)) {
throw new file_exception('storedfilecannotread', '', $path);
}
}
return fopen($path, 'rb'); // Binary reading only!!
switch ($type) {
case self::FILE_HANDLE_FOPEN:
// Binary reading.
return fopen($path, 'rb');
case self::FILE_HANDLE_GZOPEN:
// Binary reading of file in gz format.
return gzopen($path, 'rb');
default:
throw new coding_exception('Unexpected file handle type');
}
}
/**

View File

@ -0,0 +1,431 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Unit tests for /lib/filestorage/tgz_packer.php and tgz_extractor.php.
*
* @package core_files
* @copyright 2013 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
global $CFG;
require_once($CFG->libdir . '/filestorage/file_progress.php');
class core_files_tgz_packer_testcase extends advanced_testcase implements file_progress {
/**
* @var array Progress information passed to the progress reporter
*/
protected $progress;
/**
* Puts contents with specified time.
*
* @param string $path File path
* @param string $contents Contents of file
* @param int $mtime Time modified
*/
protected static function file_put_contents_at_time($path, $contents, $mtime) {
file_put_contents($path, $contents);
touch($path, $mtime);
}
/**
* Set up some files to be archived.
*
* @return array Array listing files of all types
*/
protected function prepare_file_list() {
global $CFG;
$this->resetAfterTest(true);
// Make array listing files to archive.
$filelist = array();
// Normal file.
self::file_put_contents_at_time($CFG->tempdir . '/file1.txt', 'File 1', 1377993601);
$filelist['out1.txt'] = $CFG->tempdir . '/file1.txt';
// Recursive directory w/ file and directory with file.
check_dir_exists($CFG->tempdir . '/dir1/dir2');
self::file_put_contents_at_time($CFG->tempdir . '/dir1/file2.txt', 'File 2', 1377993602);
self::file_put_contents_at_time($CFG->tempdir . '/dir1/dir2/file3.txt', 'File 3', 1377993603);
$filelist['out2'] = $CFG->tempdir . '/dir1';
// Moodle stored_file.
$context = context_system::instance();
$filerecord = array('contextid' => $context->id, 'component' => 'phpunit',
'filearea' => 'data', 'itemid' => 0, 'filepath' => '/',
'filename' => 'file4.txt', 'timemodified' => 1377993604);
$fs = get_file_storage();
$sf = $fs->create_file_from_string($filerecord, 'File 4');
$filelist['out3.txt'] = $sf;
// Moodle stored_file directory.
$filerecord['itemid'] = 1;
$filerecord['filepath'] = '/dir1/';
$filerecord['filename'] = 'file5.txt';
$filerecord['timemodified'] = 1377993605;
$fs->create_file_from_string($filerecord, 'File 5');
$filerecord['filepath'] = '/dir1/dir2/';
$filerecord['filename'] = 'file6.txt';
$filerecord['timemodified'] = 1377993606;
$fs->create_file_from_string($filerecord, 'File 6');
$filerecord['filepath'] = '/';
$filerecord['filename'] = 'excluded.txt';
$fs->create_file_from_string($filerecord, 'Excluded');
$filelist['out4'] = $fs->get_file($context->id, 'phpunit', 'data', 1, '/dir1/', '.');
// File stored as raw content.
$filelist['out5.txt'] = array('File 7');
// File where there's just an empty directory.
$filelist['out6'] = null;
return $filelist;
}
/**
* Tests getting the item.
*/
public function test_get_packer() {
$packer = get_file_packer('application/x-gzip');
$this->assertInstanceOf('tgz_packer', $packer);
}
/**
* Tests basic archive and extract to file paths.
*/
public function test_to_normal_files() {
global $CFG;
$packer = get_file_packer('application/x-gzip');
// Archive files.
$files = $this->prepare_file_list();
$archivefile = $CFG->tempdir . '/test.tar.gz';
$packer->archive_to_pathname($files, $archivefile);
// Extract same files.
$outdir = $CFG->tempdir . '/out';
check_dir_exists($outdir);
$result = $packer->extract_to_pathname($archivefile, $outdir);
// The result array should have file entries + directory entries for
// all implicit directories + entry for the explicit directory.
$expectedpaths = array('out1.txt', 'out2/', 'out2/dir2/', 'out2/dir2/file3.txt',
'out2/file2.txt', 'out3.txt', 'out4/', 'out4/dir2/', 'out4/file5.txt',
'out4/dir2/file6.txt', 'out5.txt', 'out6/');
sort($expectedpaths);
$actualpaths = array_keys($result);
sort($actualpaths);
$this->assertEquals($expectedpaths, $actualpaths);
foreach ($result as $path => $booleantrue) {
$this->assertTrue($booleantrue);
}
// Check the files are as expected.
$this->assertEquals('File 1', file_get_contents($outdir . '/out1.txt'));
$this->assertEquals('File 2', file_get_contents($outdir . '/out2/file2.txt'));
$this->assertEquals('File 3', file_get_contents($outdir . '/out2/dir2/file3.txt'));
$this->assertEquals('File 4', file_get_contents($outdir . '/out3.txt'));
$this->assertEquals('File 5', file_get_contents($outdir . '/out4/file5.txt'));
$this->assertEquals('File 6', file_get_contents($outdir . '/out4/dir2/file6.txt'));
$this->assertEquals('File 7', file_get_contents($outdir . '/out5.txt'));
$this->assertTrue(is_dir($outdir . '/out6'));
}
/**
* Tests archive and extract to Moodle file system.
*/
public function test_to_stored_files() {
global $CFG;
$packer = get_file_packer('application/x-gzip');
// Archive files.
$files = $this->prepare_file_list();
$archivefile = $CFG->tempdir . '/test.tar.gz';
$context = context_system::instance();
$sf = $packer->archive_to_storage($files,
$context->id, 'phpunit', 'archive', 1, '/', 'archive.tar.gz');
$this->assertInstanceOf('stored_file', $sf);
// Extract (from storage) to disk.
$outdir = $CFG->tempdir . '/out';
check_dir_exists($outdir);
$packer->extract_to_pathname($sf, $outdir);
// Check the files are as expected.
$this->assertEquals('File 1', file_get_contents($outdir . '/out1.txt'));
$this->assertEquals('File 2', file_get_contents($outdir . '/out2/file2.txt'));
$this->assertEquals('File 3', file_get_contents($outdir . '/out2/dir2/file3.txt'));
$this->assertEquals('File 4', file_get_contents($outdir . '/out3.txt'));
$this->assertEquals('File 5', file_get_contents($outdir . '/out4/file5.txt'));
$this->assertEquals('File 6', file_get_contents($outdir . '/out4/dir2/file6.txt'));
$this->assertEquals('File 7', file_get_contents($outdir . '/out5.txt'));
$this->assertTrue(is_dir($outdir . '/out6'));
// Extract to Moodle storage.
$packer->extract_to_storage($sf, $context->id, 'phpunit', 'data', 2, '/out/');
$fs = get_file_storage();
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/', 'out1.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 1', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/out2/', 'file2.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 2', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/out2/dir2/', 'file3.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 3', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/', 'out3.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 4', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/out4/', 'file5.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 5', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/out4/dir2/', 'file6.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 6', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/', 'out5.txt');
$this->assertNotEmpty($out);
$this->assertEquals('File 7', $out->get_content());
$out = $fs->get_file($context->id, 'phpunit', 'data', 2, '/out/out6/', '.');
$this->assertNotEmpty($out);
$this->assertTrue($out->is_directory());
// These functions are supposed to overwrite existing files; test they
// don't give errors when run twice.
$sf = $packer->archive_to_storage($files,
$context->id, 'phpunit', 'archive', 1, '/', 'archive.tar.gz');
$this->assertInstanceOf('stored_file', $sf);
$packer->extract_to_storage($sf, $context->id, 'phpunit', 'data', 2, '/out/');
}
/**
* Tests extracting with a list of specified files.
*/
public function test_only_specified_files() {
global $CFG;
$packer = get_file_packer('application/x-gzip');
// Archive files.
$files = $this->prepare_file_list();
$archivefile = $CFG->tempdir . '/test.tar.gz';
$packer->archive_to_pathname($files, $archivefile);
// Extract same files.
$outdir = $CFG->tempdir . '/out';
check_dir_exists($outdir);
$result = $packer->extract_to_pathname($archivefile, $outdir,
array('out3.txt', 'out6/', 'out4/file5.txt'));
// Check result reporting only includes specified files.
$expectedpaths = array('out3.txt', 'out4/file5.txt', 'out6/');
sort($expectedpaths);
$actualpaths = array_keys($result);
sort($actualpaths);
$this->assertEquals($expectedpaths, $actualpaths);
// Check the files are as expected.
$this->assertFalse(file_exists($outdir . '/out1.txt'));
$this->assertEquals('File 4', file_get_contents($outdir . '/out3.txt'));
$this->assertEquals('File 5', file_get_contents($outdir . '/out4/file5.txt'));
$this->assertTrue(is_dir($outdir . '/out6'));
}
/**
* Tests the progress reporting.
*/
public function test_file_progress() {
global $CFG;
// Set up.
$filelist = $this->prepare_file_list();
$packer = get_file_packer('application/x-gzip');
$archive = "$CFG->tempdir/archive.tgz";
$context = context_system::instance();
// Archive to pathname.
$this->progress = array();
$result = $packer->archive_to_pathname($filelist, $archive, true, $this);
$this->assertTrue($result);
// Should send progress at least once per file.
$this->assertTrue(count($this->progress) >= count($filelist));
// Progress should obey some restrictions.
$this->check_progress_toward_max();
// Archive to storage.
$this->progress = array();
$archivefile = $packer->archive_to_storage($filelist, $context->id,
'phpunit', 'test', 0, '/', 'archive.tgz', null, true, $this);
$this->assertInstanceOf('stored_file', $archivefile);
$this->assertTrue(count($this->progress) >= count($filelist));
$this->check_progress_toward_max();
// Extract to pathname.
$this->progress = array();
$target = "$CFG->tempdir/test/";
check_dir_exists($target);
$result = $packer->extract_to_pathname($archive, $target, null, $this);
remove_dir($target);
// We only output progress once per block, and this is kind of a small file.
$this->assertTrue(count($this->progress) >= 1);
$this->check_progress_toward_max();
// Extract to storage (from storage).
$this->progress = array();
$result = $packer->extract_to_storage($archivefile, $context->id,
'phpunit', 'target', 0, '/', null, $this);
$this->assertTrue(count($this->progress) >= 1);
$this->check_progress_toward_max();
// Extract to storage (from path).
$this->progress = array();
$result = $packer->extract_to_storage($archive, $context->id,
'phpunit', 'target', 0, '/', null, $this);
$this->assertTrue(count($this->progress) >= 1);
$this->check_progress_toward_max();
// Wipe created disk file.
unlink($archive);
}
/**
* Tests the list_files function with and without an index file.
*/
public function test_list_files() {
global $CFG;
// Set up.
$filelist = $this->prepare_file_list();
$packer = get_file_packer('application/x-gzip');
$archive = "$CFG->tempdir/archive.tgz";
// Archive with an index (default).
$packer = get_file_packer('application/x-gzip');
$result = $packer->archive_to_pathname($filelist, $archive, true, $this);
$this->assertTrue($result);
$hashwith = sha1_file($archive);
// List files.
$files = $packer->list_files($archive);
// Check they match expected.
$expectedinfo = array(
array('out1.txt', 1377993601, false, 6),
array('out2/', tgz_packer::DEFAULT_TIMESTAMP, true, 0),
array('out2/dir2/', tgz_packer::DEFAULT_TIMESTAMP, true, 0),
array('out2/dir2/file3.txt', 1377993603, false, 6),
array('out2/file2.txt', 1377993602, false, 6),
array('out3.txt', 1377993604, false, 6),
array('out4/', tgz_packer::DEFAULT_TIMESTAMP, true, 0),
array('out4/dir2/', tgz_packer::DEFAULT_TIMESTAMP, true, 0),
array('out4/dir2/file6.txt', 1377993606, false, 6),
array('out4/file5.txt', 1377993605, false, 6),
array('out5.txt', tgz_packer::DEFAULT_TIMESTAMP, false, 6),
array('out6/', tgz_packer::DEFAULT_TIMESTAMP, true, 0),
);
$this->assertEquals($expectedinfo, self::convert_info_for_assert($files));
// Archive with no index. Should have same result.
$this->progress = array();
$packer->set_include_index(false);
$result = $packer->archive_to_pathname($filelist, $archive, true, $this);
$this->assertTrue($result);
$hashwithout = sha1_file($archive);
$files = $packer->list_files($archive);
$this->assertEquals($expectedinfo, self::convert_info_for_assert($files));
// Check it actually is different (does have index in)!
$this->assertNotEquals($hashwith, $hashwithout);
// Put the index back on in case of future tests.
$packer->set_include_index(true);
}
/**
* Utility function to convert the file info array into a simpler format
* for making comparisons.
*
* @param array $files Array from list_files result
*/
protected static function convert_info_for_assert(array $files) {
$actualinfo = array();
foreach ($files as $file) {
$actualinfo[] = array($file->pathname, $file->mtime, $file->is_directory, $file->size);
}
usort($actualinfo, function($a, $b) {
return strcmp($a[0], $b[0]);
});
return $actualinfo;
}
public function test_is_tgz_file() {
global $CFG;
// Set up.
$filelist = $this->prepare_file_list();
$packer1 = get_file_packer('application/x-gzip');
$packer2 = get_file_packer('application/zip');
$archive2 = "$CFG->tempdir/archive.zip";
// Archive in tgz and zip format.
$context = context_system::instance();
$archive1 = $packer1->archive_to_storage($filelist, $context->id,
'phpunit', 'test', 0, '/', 'archive.tgz', null, true, $this);
$this->assertInstanceOf('stored_file', $archive1);
$result = $packer2->archive_to_pathname($filelist, $archive2);
$this->assertTrue($result);
// Use is_tgz_file to detect which is which. First check is from storage,
// second check is from filesystem.
$this->assertTrue(tgz_packer::is_tgz_file($archive1));
$this->assertFalse(tgz_packer::is_tgz_file($archive2));
}
/**
* Checks that progress reported is numeric rather than indeterminate,
* and follows the progress reporting rules.
*/
protected function check_progress_toward_max() {
$lastvalue = -1; $lastmax = -1;
foreach ($this->progress as $progressitem) {
list($value, $max) = $progressitem;
if ($lastmax != -1) {
$this->assertEquals($max, $lastmax);
} else {
$lastmax = $max;
}
$this->assertTrue(is_integer($value));
$this->assertTrue(is_integer($max));
$this->assertNotEquals(file_progress::INDETERMINATE, $max);
$this->assertTrue($value <= $max);
$this->assertTrue($value >= $lastvalue);
$lastvalue = $value;
}
}
/**
* Handles file_progress interface.
*
* @param int $progress
* @param int $max
*/
public function progress($progress = file_progress::INDETERMINATE, $max = file_progress::INDETERMINATE) {
$this->progress[] = array($progress, $max);
}
}

View File

@ -0,0 +1,563 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Implementation of .tar.gz extractor. Handles extraction of .tar.gz files.
* Do not call directly; use methods in tgz_packer.
*
* @see tgz_packer
* @package core_files
* @copyright 2013 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
/**
* Extracts .tar.gz files (POSIX format).
*/
class tgz_extractor {
/**
* @var int When writing data, the system writes blocks of this size.
*/
const WRITE_BLOCK_SIZE = 65536;
/**
* @var int When reading data, the system reads blocks of this size.
*/
const READ_BLOCK_SIZE = 65536;
/**
* @var stored_file File object for archive.
*/
protected $storedfile;
/**
* @var string OS path for archive.
*/
protected $ospath;
/**
* @var int Number of files (-1 if not known).
*/
protected $numfiles;
/**
* @var int Number of files processed so far.
*/
protected $donefiles;
/**
* @var string Current file path within archive.
*/
protected $currentarchivepath;
/**
* @var string Full path to current file.
*/
protected $currentfile;
/**
* @var int Size of current file in bytes.
*/
protected $currentfilesize;
/**
* @var int Number of bytes of current file already written into buffer.
*/
protected $currentfileprocessed;
/**
* @var resource File handle to current file.
*/
protected $currentfp;
/**
* @var int Modified time of current file.
*/
protected $currentmtime;
/**
* @var string Buffer containing file data awaiting write.
*/
protected $filebuffer;
/**
* @var int Current length of buffer in bytes.
*/
protected $filebufferlength;
/**
* @var array Results array of all files processed.
*/
protected $results;
/**
* @var array In list mode, content of the list; outside list mode, null.
*/
protected $listresults = null;
/**
* @var int Whether listing or extracting.
*/
protected $mode = self::MODE_EXTRACT;
/**
* @var int If extracting (default).
*/
const MODE_EXTRACT = 0;
/**
* @var int Listing contents.
*/
const MODE_LIST = 1;
/**
* @var int Listing contents; list now complete.
*/
const MODE_LIST_COMPLETE = 2;
/**
* Constructor.
*
* @param stored_file|string $archivefile Moodle file or OS path to archive
*/
public function __construct($archivefile) {
if (is_a($archivefile, 'stored_file')) {
$this->storedfile = $archivefile;
} else {
$this->ospath = $archivefile;
}
}
/**
* Extracts the archive.
*
* @param tgz_extractor_handler $handler Will be called for extracted files
* @param file_progress $progress Optional progress reporting
* @return array Array from archive path => true of processed files
* @throws moodle_exception If there is any error processing the archive
*/
public function extract(tgz_extractor_handler $handler, file_progress $progress = null) {
$this->mode = self::MODE_EXTRACT;
$this->extract_or_list($handler, $progress);
$results = $this->results;
unset($this->results);
return $results;
}
/**
* Extracts or lists the archive depending on $this->listmode.
*
* @param tgz_extractor_handler $handler Optional handler
* @param file_progress $progress Optional progress reporting
* @throws moodle_exception If there is any error processing the archive
*/
protected function extract_or_list(tgz_extractor_handler $handler = null, file_progress $progress = null) {
// Open archive.
if ($this->storedfile) {
$gz = $this->storedfile->get_content_file_handle(stored_file::FILE_HANDLE_GZOPEN);
// Estimate number of read-buffers (64KB) in file. Guess that the
// uncompressed size is 2x compressed size. Add one just to ensure
// it's non-zero.
$estimatedbuffers = ($this->storedfile->get_filesize() * 2 / self::READ_BLOCK_SIZE) + 1;
} else {
$gz = gzopen($this->ospath, 'rb');
$estimatedbuffers = (filesize($this->ospath) * 2 / self::READ_BLOCK_SIZE) + 1;
}
if (!$gz) {
throw new moodle_exception('errorprocessingarchive', '', '', null,
'Failed to open gzip file');
}
// Calculate how much progress to report per buffer read.
$progressperbuffer = (int)(tgz_packer::PROGRESS_MAX / $estimatedbuffers);
// Process archive in 512-byte blocks (but reading 64KB at a time).
$buffer = '';
$bufferpos = 0;
$bufferlength = 0;
$this->numfiles = -1;
$read = 0;
$done = 0;
$beforeprogress = -1;
while (true) {
if ($bufferpos == $bufferlength) {
$buffer = gzread($gz, self::READ_BLOCK_SIZE);
$bufferpos = 0;
$bufferlength = strlen($buffer);
if ($bufferlength == 0) {
// EOF.
break;
}
// Report progress if enabled.
if ($progress) {
if ($this->numfiles === -1) {
// If we don't know the number of files, do an estimate based
// on number of buffers read.
$done += $progressperbuffer;
if ($done >= tgz_packer::PROGRESS_MAX) {
$done = tgz_packer::PROGRESS_MAX - 1;
}
$progress->progress($done, tgz_packer::PROGRESS_MAX);
} else {
// Once we know the number of files, use this.
if ($beforeprogress === -1) {
$beforeprogress = $done;
}
// Calculate progress as whatever progress we reported
// before we knew how many files there were (might be 0)
// plus a proportion of the number of files out of the
// remaining progress value.
$done = $beforeprogress + (int)(($this->donefiles / $this->numfiles) *
(tgz_packer::PROGRESS_MAX - $beforeprogress));
}
$progress->progress($done, tgz_packer::PROGRESS_MAX);
}
}
$block = substr($buffer, $bufferpos, tgz_packer::TAR_BLOCK_SIZE);
if ($this->currentfile) {
$this->process_file_block($block, $handler);
} else {
$this->process_header($block, $handler);
}
// When listing, if we read an index file, we abort archive processing.
if ($this->mode === self::MODE_LIST_COMPLETE) {
break;
}
$bufferpos += tgz_packer::TAR_BLOCK_SIZE;
$read++;
}
// Close archive and finish.
gzclose($gz);
}
/**
* Lists files in the archive, either using the index file (if present),
* or by basically extracting the whole thing if there isn't an index file.
*
* @return array Array of file listing results:
*/
public function list_files() {
$this->listresults = array();
$this->mode = self::MODE_LIST;
$this->extract_or_list();
$listresults = $this->listresults;
$this->listresults = null;
return $listresults;
}
/**
* Process 512-byte header block.
*
* @param string $block Tar block
* @param tgz_extractor_handler $handler Will be called for extracted files
*/
protected function process_header($block, $handler) {
// If the block consists entirely of nulls, ignore it. (This happens
// twice at end of archive.)
if ($block === str_pad('', tgz_packer::TAR_BLOCK_SIZE, "\0")) {
return;
}
// struct header_posix_ustar {
// char name[100];
$name = rtrim(substr($block, 0, 100), "\0");
// char mode[8];
// char uid[8];
// char gid[8];
// char size[12];
$filesize = octdec(substr($block, 124, 11));
// char mtime[12];
$mtime = octdec(substr($block, 136, 11));
// char checksum[8];
// char typeflag[1];
$typeflag = substr($block, 156, 1);
// char linkname[100];
// char magic[6];
$magic = substr($block, 257, 6);
if ($magic !== "ustar\0" && $magic !== "ustar ") {
// There are two checks above; the first is the correct POSIX format
// and the second is for GNU tar default format.
throw new moodle_exception('errorprocessingarchive', '', '', null,
'Header does not have POSIX ustar magic string');
}
// char version[2];
// char uname[32];
// char gname[32];
// char devmajor[8];
// char devminor[8];
// char prefix[155];
$prefix = rtrim(substr($block, 345, 155), "\0");
// char pad[12];
// };
$archivepath = ltrim($prefix . '/' . $name, '/');
// For security, ensure there is no .. folder in the archivepath.
$archivepath = clean_param($archivepath, PARAM_PATH);
// Handle file depending on the type.
switch ($typeflag) {
case '1' :
case '2' :
case '3' :
case '4' :
case '6' :
case '7' :
// Ignore these special cases.
break;
case '5' :
// Directory.
if ($this->mode === self::MODE_LIST) {
$this->listresults[] = (object)array(
'original_pathname' => $archivepath,
'pathname' => $archivepath,
'mtime' => $mtime,
'is_directory' => true,
'size' => 0);
} else if ($handler->tgz_directory($archivepath, $mtime)) {
$this->results[$archivepath] = true;
}
break;
default:
// All other values treated as normal file.
$this->start_current_file($archivepath, $filesize, $mtime, $handler);
break;
}
}
/**
* Processes one 512-byte block of an existing file.
*
* @param string $block Data block
* @param tgz_extractor_handler $handler Will be called for extracted files
*/
protected function process_file_block($block, tgz_extractor_handler $handler = null) {
// Write block into buffer.
$blocksize = tgz_packer::TAR_BLOCK_SIZE;
if ($this->currentfileprocessed + tgz_packer::TAR_BLOCK_SIZE > $this->currentfilesize) {
// Partial block at end of file.
$blocksize = $this->currentfilesize - $this->currentfileprocessed;
$this->filebuffer .= substr($block, 0, $blocksize);
} else {
// Full-length block.
$this->filebuffer .= $block;
}
$this->filebufferlength += $blocksize;
$this->currentfileprocessed += $blocksize;
// Write block to file if necessary.
$eof = $this->currentfileprocessed == $this->currentfilesize;
if ($this->filebufferlength >= self::WRITE_BLOCK_SIZE || $eof) {
// Except when skipping the file, write it out.
if ($this->currentfile !== true) {
if (!fwrite($this->currentfp, $this->filebuffer)) {
throw new moodle_exception('errorprocessingarchive', '', '', null,
'Failed to write buffer to output file: ' . $this->currentfile);
}
}
$this->filebuffer = '';
$this->filebufferlength = 0;
}
// If file is finished, close it.
if ($eof) {
$this->close_current_file($handler);
}
}
/**
* Starts processing a file from archive.
*
* @param string $archivepath Path inside archive
* @param int $filesize Size in bytes
* @param int $mtime File-modified time
* @param tgz_extractor_handler $handler Will be called for extracted files
* @throws moodle_exception
*/
protected function start_current_file($archivepath, $filesize, $mtime,
tgz_extractor_handler $handler = null) {
global $CFG;
$this->currentarchivepath = $archivepath;
$this->currentmtime = $mtime;
$this->currentfilesize = $filesize;
$this->currentfileprocessed = 0;
if ($archivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
// For index file, store in temp directory.
$tempfolder = $CFG->tempdir . '/core_files';
check_dir_exists($tempfolder);
$this->currentfile = tempnam($tempfolder, '.index');
} else {
if ($this->mode === self::MODE_LIST) {
// If listing, add to list.
$this->listresults[] = (object)array(
'original_pathname' => $archivepath,
'pathname' => $archivepath,
'mtime' => $mtime,
'is_directory' => false,
'size' => $filesize);
// Discard file.
$this->currentfile = true;
} else {
// For other files, ask handler for location.
$this->currentfile = $handler->tgz_start_file($archivepath);
if ($this->currentfile === null) {
// This indicates that we are discarding the current file.
$this->currentfile = true;
}
}
}
$this->filebuffer = '';
$this->filebufferlength = 0;
// Open file.
if ($this->currentfile !== true) {
$this->currentfp = fopen($this->currentfile, 'wb');
if (!$this->currentfp) {
throw new moodle_exception('errorprocessingarchive', '', '', null,
'Failed to open output file: ' . $this->currentfile);
}
} else {
$this->currentfp = null;
}
// If it has no size, close it right away.
if ($filesize == 0) {
$this->close_current_file($handler);
}
}
/**
* Closes the current file, calls handler, and sets up data.
*
* @param tgz_extractor_handler $handler Will be called for extracted files
* @throws moodle_exception If there is an error closing it
*/
protected function close_current_file($handler) {
if ($this->currentfp !== null) {
if (!fclose($this->currentfp)) {
throw new moodle_exception('errorprocessingarchive', '', '', null,
'Failed to close output file: ' . $this->currentfile);
}
// Update modified time.
touch($this->currentfile, $this->currentmtime);
}
if ($this->currentarchivepath === tgz_packer::ARCHIVE_INDEX_FILE) {
if ($this->mode === self::MODE_LIST) {
// When listing array, use the archive index to produce the list.
$index = file($this->currentfile);
$ok = true;
foreach ($index as $num => $value) {
// For first line (header), check it's valid then skip it.
if ($num == 0) {
if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) . '~', $value)) {
continue;
} else {
// Not valid, better ignore the file.
$ok = false;
break;
}
}
// Split on tabs and store in results array.
$values = explode("\t", trim($value));
$this->listresults[] = (object)array(
'original_pathname' => $values[0],
'pathname' => $values[0],
'mtime' => ($values[3] === '?' ? tgz_packer::DEFAULT_TIMESTAMP : (int)$values[3]),
'is_directory' => $values[1] === 'd',
'size' => (int)$values[2]);
}
if ($ok) {
$this->mode = self::MODE_LIST_COMPLETE;
}
unlink($this->currentfile);
} else {
// For index file, get number of files and delete temp file.
$contents = file_get_contents($this->currentfile, null, null, null, 128);
$matches = array();
if (preg_match('~^' . preg_quote(tgz_packer::ARCHIVE_INDEX_COUNT_PREFIX) .
'([0-9]+)~', $contents, $matches)) {
$this->numfiles = (int)$matches[1];
}
unlink($this->currentfile);
}
} else {
// Report to handler and put in results.
if ($this->currentfp !== null) {
$handler->tgz_end_file($this->currentarchivepath, $this->currentfile);
$this->results[$this->currentarchivepath] = true;
}
$this->donefiles++;
}
// No longer have a current file.
$this->currentfp = null;
$this->currentfile = null;
$this->currentarchivepath = null;
}
}
/**
* Interface for callback from tgz_extractor::extract.
*
* The file functions will be called (in pairs tgz_start_file, tgz_end_file) for
* each file in the archive. (There is only one exception, the special
* .ARCHIVE_INDEX file which is not reported to the handler.)
*
* The directory function is called whenever the archive contains a directory
* entry.
*/
interface tgz_extractor_handler {
/**
* Called when the system begins to extract a file. At this point, the
* handler must decide where on disk the extracted file should be located.
* This can be a temporary location or final target, as preferred.
*
* The handler can request for files to be skipped, in which case no data
* will be written and tgz_end_file will not be called.
*
* @param string $archivepath Path and name of file within archive
* @return string Location for output file in filesystem, or null to skip file
*/
public function tgz_start_file($archivepath);
/**
* Called when the system has finished extracting a file. The handler can
* now process the extracted file if required.
*
* @param string $archivepath Path and name of file within archive
* @param string $realpath Path in filesystem (from tgz_start_file return)
* @return bool True to continue processing, false to abort archive extract
*/
public function tgz_end_file($archivepath, $realpath);
/**
* Called when a directory entry is found in the archive.
*
* The handler can create a corresponding directory if required.
*
* @param string $archivepath Path and name of directory within archive
* @param int $mtime Modified time of directory
* @return bool True if directory was created, false if skipped
*/
public function tgz_directory($archivepath, $mtime);
}

View File

@ -0,0 +1,867 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Implementation of .tar.gz packer.
*
* A limited subset of the .tar format is supported. This packer can open files
* that it wrote, but may not be able to open files from other sources,
* especially if they use extensions. There are restrictions on file
* length and character set of filenames.
*
* We generate POSIX-compliant ustar files. As a result, the following
* restrictions apply to archive paths:
*
* - Filename may not be more than 100 characters.
* - Total of path + filename may not be more than 256 characters.
* - For path more than 155 characters it may or may not work.
* - May not contain non-ASCII characters.
*
* @package core_files
* @copyright 2013 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
require_once("$CFG->libdir/filestorage/file_packer.php");
require_once("$CFG->libdir/filestorage/tgz_extractor.php");
/**
* Utility class - handles all packing/unpacking of .tar.gz files.
*
* @package core_files
* @category files
* @copyright 2013 The Open University
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class tgz_packer extends file_packer {
/**
* @var int Default timestamp used where unknown (Jan 1st 2013 00:00)
*/
const DEFAULT_TIMESTAMP = 1356998400;
/**
* @var string Name of special archive index file added by Moodle.
*/
const ARCHIVE_INDEX_FILE = '.ARCHIVE_INDEX';
/**
* @var string Required text at start of archive index file before file count.
*/
const ARCHIVE_INDEX_COUNT_PREFIX = 'Moodle archive file index. Count: ';
/**
* @var bool If true, includes .ARCHIVE_INDEX file in root of tar file.
*/
protected $includeindex = true;
/**
* @var int Max value for total progress.
*/
const PROGRESS_MAX = 1000000;
/**
* @var int Tar files have a fixed block size of 512 bytes.
*/
const TAR_BLOCK_SIZE = 512;
/**
* Archive files and store the result in file storage.
*
* Any existing file at that location will be overwritten.
*
* @param array $files array from archive path => pathname or stored_file
* @param int $contextid context ID
* @param string $component component
* @param string $filearea file area
* @param int $itemid item ID
* @param string $filepath file path
* @param string $filename file name
* @param int $userid user ID
* @param bool $ignoreinvalidfiles true means ignore missing or invalid files, false means abort on any error
* @param file_progress $progress Progress indicator callback or null if not required
* @return stored_file|bool false if error stored_file instance if ok
* @throws file_exception If file operations fail
* @throws coding_exception If any archive paths do not meet the restrictions
*/
public function archive_to_storage(array $files, $contextid,
$component, $filearea, $itemid, $filepath, $filename,
$userid = null, $ignoreinvalidfiles = true, file_progress $progress = null) {
global $CFG;
// Set up a temporary location for the file.
$tempfolder = $CFG->tempdir . '/core_files';
check_dir_exists($tempfolder);
$tempfile = tempnam($tempfolder, '.tgz');
// Archive to the given path.
if ($result = $this->archive_to_pathname($files, $tempfile, $ignoreinvalidfiles, $progress)) {
// If there is an existing file, delete it.
$fs = get_file_storage();
if ($existing = $fs->get_file($contextid, $component, $filearea, $itemid, $filepath, $filename)) {
$existing->delete();
}
$filerecord = array('contextid' => $contextid, 'component' => $component,
'filearea' => $filearea, 'itemid' => $itemid, 'filepath' => $filepath,
'filename' => $filename, 'userid' => $userid, 'mimetype' => 'application/x-tgz');
self::delete_existing_file_record($fs, $filerecord);
$result = $fs->create_file_from_pathname($filerecord, $tempfile);
}
// Delete the temporary file (if created) and return.
@unlink($tempfile);
return $result;
}
/**
* Wrapper function useful for deleting an existing file (if present) just
* before creating a new one.
*
* @param file_storage $fs File storage
* @param array $filerecord File record in same format used to create file
*/
public static function delete_existing_file_record(file_storage $fs, array $filerecord) {
if ($existing = $fs->get_file($filerecord['contextid'], $filerecord['component'],
$filerecord['filearea'], $filerecord['itemid'], $filerecord['filepath'],
$filerecord['filename'])) {
$existing->delete();
}
}
/**
* By default, the .tar file includes a .ARCHIVE_INDEX file as its first
* entry. This makes list_files much faster and allows for better progress
* reporting.
*
* If you need to disable the inclusion of this file, use this function
* before calling one of the archive_xx functions.
*
* @param bool $includeindex If true, includes index
*/
public function set_include_index($includeindex) {
$this->includeindex = $includeindex;
}
/**
* Archive files and store the result in an OS file.
*
* @param array $files array from archive path => pathname or stored_file
* @param string $archivefile path to target zip file
* @param bool $ignoreinvalidfiles true means ignore missing or invalid files, false means abort on any error
* @param file_progress $progress Progress indicator callback or null if not required
* @return bool true if file created, false if not
* @throws coding_exception If any archive paths do not meet the restrictions
*/
public function archive_to_pathname(array $files, $archivefile,
$ignoreinvalidfiles=true, file_progress $progress = null) {
// Open .gz file.
if (!($gz = gzopen($archivefile, 'wb'))) {
return false;
}
try {
// Because we update how we calculate progress after we already
// analyse the directory list, we can't just use a number of files
// as progress. Instead, progress always goes to PROGRESS_MAX
// and we do estimates as a proportion of that. To begin with,
// assume that counting files will be 10% of the work, so allocate
// one-tenth of PROGRESS_MAX to the total of all files.
if ($files) {
$progressperfile = (int)(self::PROGRESS_MAX / (count($files) * 10));
} else {
// If there are no files, avoid divide by zero.
$progressperfile = 1;
}
$done = 0;
// Expand the provided files into a complete list of single files.
$expandedfiles = array();
foreach ($files as $archivepath => $file) {
// Update progress if required.
if ($progress) {
$progress->progress($done, self::PROGRESS_MAX);
}
$done += $progressperfile;
if (is_null($file)) {
// Empty directory record. Ensure it ends in a /.
if (!preg_match('~/$~', $archivepath)) {
$archivepath .= '/';
}
$expandedfiles[$archivepath] = null;
} else if (is_string($file)) {
// File specified as path on disk.
if (!$this->list_files_path($expandedfiles, $archivepath, $file,
$progress, $done)) {
gzclose($gz);
unlink($archivefile);
return false;
}
} else if (is_array($file)) {
// File specified as raw content in array.
$expandedfiles[$archivepath] = $file;
} else {
// File specified as stored_file object.
$this->list_files_stored($expandedfiles, $archivepath, $file);
}
}
// Store the list of files as a special file that is first in the
// archive. This contains enough information to implement list_files
// if required later.
$list = self::ARCHIVE_INDEX_COUNT_PREFIX . count($expandedfiles) . "\n";
$sizes = array();
$mtimes = array();
foreach ($expandedfiles as $archivepath => $file) {
// Check archivepath doesn't contain any non-ASCII characters.
if (!preg_match('~^[\x00-\xff]*$~', $archivepath)) {
throw new coding_exception(
'Non-ASCII paths not supported: ' . $archivepath);
}
// Build up the details.
$type = 'f';
$mtime = '?';
if (is_null($file)) {
$type = 'd';
$size = 0;
} else if (is_string($file)) {
$stat = stat($file);
$mtime = (int)$stat['mtime'];
$size = (int)$stat['size'];
} else if (is_array($file)) {
$size = (int)strlen(reset($file));
} else {
$mtime = (int)$file->get_timemodified();
$size = (int)$file->get_filesize();
}
$sizes[$archivepath] = $size;
$mtimes[$archivepath] = $mtime;
// Write a line in the index.
$list .= "$archivepath\t$type\t$size\t$mtime\n";
}
// The index file is optional; only write into archive if needed.
if ($this->includeindex) {
// Put the index file into the archive.
$this->write_tar_entry($gz, self::ARCHIVE_INDEX_FILE, null, strlen($list), '?', $list);
}
// Update progress ready for main stage.
$done = (int)(self::PROGRESS_MAX / 10);
if ($progress) {
$progress->progress($done, self::PROGRESS_MAX);
}
if ($expandedfiles) {
// The remaining 9/10ths of progress represents these files.
$progressperfile = (int)((9 * self::PROGRESS_MAX) / (10 * count($expandedfiles)));
} else {
$progressperfile = 1;
}
// Actually write entries for each file/directory.
foreach ($expandedfiles as $archivepath => $file) {
if (is_null($file)) {
// Null entry indicates a directory.
$this->write_tar_entry($gz, $archivepath, null,
$sizes[$archivepath], $mtimes[$archivepath]);
} else if (is_string($file)) {
// String indicates an OS file.
$this->write_tar_entry($gz, $archivepath, $file,
$sizes[$archivepath], $mtimes[$archivepath], null, $progress, $done);
} else if (is_array($file)) {
// Array indicates in-memory data.
$data = reset($file);
$this->write_tar_entry($gz, $archivepath, null,
$sizes[$archivepath], $mtimes[$archivepath], $data, $progress, $done);
} else {
// Stored_file object.
$this->write_tar_entry($gz, $archivepath, $file->get_content_file_handle(),
$sizes[$archivepath], $mtimes[$archivepath], null, $progress, $done);
}
$done += $progressperfile;
if ($progress) {
$progress->progress($done, self::PROGRESS_MAX);
}
}
// Finish tar file with two empty 512-byte records.
gzwrite($gz, str_pad('', 2 * self::TAR_BLOCK_SIZE, "\x00"));
gzclose($gz);
return true;
} catch (Exception $e) {
// If there is an exception, delete the in-progress file.
gzclose($gz);
unlink($archivefile);
throw $e;
}
}
/**
* Writes a single tar file to the archive, including its header record and
* then the file contents.
*
* @param resource $gz Gzip file
* @param string $archivepath Full path of file within archive
* @param string|resource $file Full path of file on disk or file handle or null if none
* @param int $size Size or 0 for directories
* @param int|string $mtime Time or ? if unknown
* @param string $content Actual content of file to write (null if using $filepath)
* @param file_progress $progress Progress indicator or null if none
* @param int $done Value for progress indicator
* @return bool True if OK
* @throws coding_exception If names aren't valid
*/
protected function write_tar_entry($gz, $archivepath, $file, $size, $mtime, $content = null,
file_progress $progress = null, $done = 0) {
// Header based on documentation of POSIX ustar format from:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current .
// For directories, ensure name ends in a slash.
$directory = false;
if ($size === 0 && is_null($file)) {
$directory = true;
if (!preg_match('~/$~', $archivepath)) {
$archivepath .= '/';
}
$mode = '755';
} else {
$mode = '644';
}
// Split archivepath into name and prefix.
$name = $archivepath;
$prefix = '';
while (strlen($name) > 100) {
$slash = strpos($name, '/');
if ($slash === false) {
throw new coding_exception(
'Name cannot fit length restrictions (> 100 characters): ' . $archivepath);
}
if ($prefix !== '') {
$prefix .= '/';
}
$prefix .= substr($name, 0, $slash);
$name = substr($name, $slash + 1);
if (strlen($prefix) > 155) {
throw new coding_exception(
'Name cannot fit length restrictions (path too long): ' . $archivepath);
}
}
// Checksum performance is a bit slow because of having to call 'ord'
// lots of times (it takes about 1/3 the time of the actual gzwrite
// call). To improve performance of checksum calculation, we will
// store all the non-zero, non-fixed bytes that need adding to the
// checksum, and checksum only those bytes.
$forchecksum = $name;
// struct header_posix_ustar {
// char name[100];
$header = str_pad($name, 100, "\x00");
// char mode[8];
// char uid[8];
// char gid[8];
$header .= '0000' . $mode . "\x000000000\x000000000\x00";
$forchecksum .= $mode;
// char size[12];
$octalsize = decoct($size);
if (strlen($octalsize) > 11) {
throw new coding_exception(
'File too large for .tar file: ' . $archivepath . ' (' . $size . ' bytes)');
}
$paddedsize = str_pad($octalsize, 11, '0', STR_PAD_LEFT);
$forchecksum .= $paddedsize;
$header .= $paddedsize . "\x00";
// char mtime[12];
if ($mtime === '?') {
// Use a default timestamp rather than zero; GNU tar outputs
// warnings about zeroes here.
$mtime = self::DEFAULT_TIMESTAMP;
}
$octaltime = decoct($mtime);
$paddedtime = str_pad($octaltime, 11, '0', STR_PAD_LEFT);
$forchecksum .= $paddedtime;
$header .= $paddedtime . "\x00";
// char checksum[8];
// Checksum needs to be completed later.
$header .= ' ';
// char typeflag[1];
$typeflag = $directory ? '5' : '0';
$forchecksum .= $typeflag;
$header .= $typeflag;
// char linkname[100];
$header .= str_pad('', 100, "\x00");
// char magic[6];
// char version[2];
$header .= "ustar\x0000";
// char uname[32];
// char gname[32];
// char devmajor[8];
// char devminor[8];
$header .= str_pad('', 80, "\x00");
// char prefix[155];
// char pad[12];
$header .= str_pad($prefix, 167, "\x00");
$forchecksum .= $prefix;
// };
// We have now calculated the header, but without the checksum. To work
// out the checksum, sum all the bytes that aren't fixed or zero, and add
// to a standard value that contains all the fixed bytes.
// The fixed non-zero bytes are:
//
// '000000000000000000 ustar00'
// mode (except 3 digits), uid, gid, checksum space, magic number, version
//
// To calculate the number, call the calculate_checksum function on the
// above string. The result is 1775.
$checksum = 1775 + self::calculate_checksum($forchecksum);
$octalchecksum = str_pad(decoct($checksum), 6, '0', STR_PAD_LEFT) . "\x00 ";
// Slot it into place in the header.
$header = substr($header, 0, 148) . $octalchecksum . substr($header, 156);
if (strlen($header) != self::TAR_BLOCK_SIZE) {
throw new coding_exception('Header block wrong size!!!!!');
}
// Awesome, now write out the header.
gzwrite($gz, $header);
// Special pre-handler for OS filename.
if (is_string($file)) {
$file = fopen($file, 'rb');
if (!$file) {
return false;
}
}
if ($content !== null) {
// Write in-memory content if any.
if (strlen($content) !== $size) {
throw new coding_exception('Mismatch between provided sizes: ' . $archivepath);
}
gzwrite($gz, $content);
} else if ($file !== null) {
// Write file content if any, using a 64KB buffer.
$written = 0;
$chunks = 0;
while (true) {
$data = fread($file, 65536);
if ($data === false || strlen($data) == 0) {
break;
}
$written += gzwrite($gz, $data);
// After every megabyte of large files, update the progress
// tracker (so there are no long gaps without progress).
$chunks++;
if ($chunks == 16) {
$chunks = 0;
if ($progress) {
// This call always has the same values, but that gives
// the tracker a chance to indicate indeterminate
// progress and output something to avoid timeouts.
$progress->progress($done, self::PROGRESS_MAX);
}
}
}
fclose($file);
if ($written !== $size) {
throw new coding_exception('Mismatch between provided sizes: ' . $archivepath .
' (was ' . $written . ', expected ' . $size . ')');
}
} else if ($size != 0) {
throw new coding_exception('Missing data file handle for non-empty file');
}
// Pad out final 512-byte block in file, if applicable.
$leftover = self::TAR_BLOCK_SIZE - ($size % self::TAR_BLOCK_SIZE);
if ($leftover == 512) {
$leftover = 0;
} else {
gzwrite($gz, str_pad('', $leftover, "\x00"));
}
return true;
}
/**
* Calculates a checksum by summing all characters of the binary string
* (treating them as unsigned numbers).
*
* @param string $str Input string
* @return int Checksum
*/
protected static function calculate_checksum($str) {
$checksum = 0;
$checklength = strlen($str);
for ($i = 0; $i < $checklength; $i++) {
$checksum += ord($str[$i]);
}
return $checksum;
}
/**
* Based on an OS path, adds either that path (if it's a file) or
* all its children (if it's a directory) into the list of files to
* archive.
*
* If a progress indicator is supplied and if this corresponds to a
* directory, then it will be repeatedly called with the same values. This
* allows the progress handler to respond in some way to avoid timeouts
* if required.
*
* @param array $expandedfiles List of all files to archive (output)
* @param string $archivepath Current path within archive
* @param string $path OS path on disk
* @param file_progress $progress Progress indicator or null if none
* @param int $done Value for progress indicator
* @return bool True if successful
*/
protected function list_files_path(array &$expandedfiles, $archivepath, $path,
file_progress $progress = null, $done) {
if (is_dir($path)) {
// Unless we're using this directory as archive root, add a
// directory entry.
if ($archivepath != '') {
// Add directory-creation record.
$expandedfiles[$archivepath . '/'] = null;
}
// Loop through directory contents and recurse.
if (!$handle = opendir($path)) {
return false;
}
while (false !== ($entry = readdir($handle))) {
if ($entry === '.' || $entry === '..') {
continue;
}
$result = $this->list_files_path($expandedfiles,
$archivepath . '/' . $entry, $path . '/' . $entry,
$progress, $done);
if (!$result) {
return false;
}
if ($progress) {
$progress->progress($done, self::PROGRESS_MAX);
}
}
closedir($handle);
} else {
// Just add it to list.
$expandedfiles[$archivepath] = $path;
}
return true;
}
/**
* Based on a stored_file objects, adds either that file (if it's a file) or
* all its children (if it's a directory) into the list of files to
* archive.
*
* If a progress indicator is supplied and if this corresponds to a
* directory, then it will be repeatedly called with the same values. This
* allows the progress handler to respond in some way to avoid timeouts
* if required.
*
* @param array $expandedfiles List of all files to archive (output)
* @param string $archivepath Current path within archive
* @param stored_file $file File object
*/
protected function list_files_stored(array &$expandedfiles, $archivepath, stored_file $file) {
if ($file->is_directory()) {
// Add a directory-creation record.
$expandedfiles[$archivepath . '/'] = null;
// Loop through directory contents (this is a recursive collection
// of all children not just one directory).
$fs = get_file_storage();
$baselength = strlen($file->get_filepath());
$files = $fs->get_directory_files(
$file->get_contextid(), $file->get_component(), $file->get_filearea(), $file->get_itemid(),
$file->get_filepath(), true, true);
foreach ($files as $childfile) {
// Get full pathname after original part.
$path = $childfile->get_filepath();
$path = substr($path, $baselength);
$path = $archivepath . '/' . $path;
if ($childfile->is_directory()) {
$childfile = null;
} else {
$path .= $childfile->get_filename();
}
$expandedfiles[$path] = $childfile;
}
} else {
// Just add it to list.
$expandedfiles[$archivepath] = $file;
}
}
/**
* Extract file to given file path (real OS filesystem), existing files are overwritten.
*
* @param stored_file|string $archivefile full pathname of zip file or stored_file instance
* @param string $pathname target directory
* @param array $onlyfiles only extract files present in the array
* @param file_progress $progress Progress indicator callback or null if not required
* @return array list of processed files (name=>true)
* @throws moodle_exception If error
*/
public function extract_to_pathname($archivefile, $pathname,
array $onlyfiles = null, file_progress $progress = null) {
$extractor = new tgz_extractor($archivefile);
return $extractor->extract(
new tgz_packer_extract_to_pathname($pathname, $onlyfiles), $progress);
}
/**
* Extract file to given file path (real OS filesystem), existing files are overwritten.
*
* @param string|stored_file $archivefile full pathname of zip file or stored_file instance
* @param int $contextid context ID
* @param string $component component
* @param string $filearea file area
* @param int $itemid item ID
* @param string $pathbase file path
* @param int $userid user ID
* @param file_progress $progress Progress indicator callback or null if not required
* @return array list of processed files (name=>true)
* @throws moodle_exception If error
*/
public function extract_to_storage($archivefile, $contextid,
$component, $filearea, $itemid, $pathbase, $userid = null,
file_progress $progress = null) {
$extractor = new tgz_extractor($archivefile);
return $extractor->extract(
new tgz_packer_extract_to_storage($contextid, $component,
$filearea, $itemid, $pathbase, $userid), $progress);
}
/**
* Returns array of info about all files in archive.
*
* @param string|stored_file $archivefile
* @return array of file infos
*/
public function list_files($archivefile) {
$extractor = new tgz_extractor($archivefile);
return $extractor->list_files();
}
/**
* Checks whether a file appears to be a .tar.gz file.
*
* @param string|stored_file $archivefile
* @return bool True if file contains the gzip magic number
*/
public static function is_tgz_file($archivefile) {
if (is_a($archivefile, 'stored_file')) {
$fp = $archivefile->get_content_file_handle();
} else {
$fp = fopen($archivefile, 'rb');
}
$firstbytes = fread($fp, 2);
fclose($fp);
return ($firstbytes[0] == "\x1f" && $firstbytes[1] == "\x8b");
}
}
/**
* Handles extraction to pathname.
*/
class tgz_packer_extract_to_pathname implements tgz_extractor_handler {
/**
* @var string Target directory for extract.
*/
protected $pathname;
/**
* @var array Array of files to extract (other files are skipped).
*/
protected $onlyfiles;
/**
* Constructor.
*
* @param string $pathname target directory
* @param array $onlyfiles only extract files present in the array
*/
public function __construct($pathname, array $onlyfiles = null) {
$this->pathname = $pathname;
$this->onlyfiles = $onlyfiles;
}
/**
* @see tgz_extractor_handler::tgz_start_file()
*/
public function tgz_start_file($archivepath) {
// Check file restriction.
if ($this->onlyfiles !== null && !in_array($archivepath, $this->onlyfiles)) {
return null;
}
// Ensure directory exists and prepare filename.
$fullpath = $this->pathname . '/' . $archivepath;
check_dir_exists(dirname($fullpath));
return $fullpath;
}
/**
* @see tgz_extractor_handler::tgz_end_file()
*/
public function tgz_end_file($archivepath, $realpath) {
// Do nothing.
}
/**
* @see tgz_extractor_handler::tgz_directory()
*/
public function tgz_directory($archivepath, $mtime) {
// Check file restriction.
if ($this->onlyfiles !== null && !in_array($archivepath, $this->onlyfiles)) {
return false;
}
// Ensure directory exists.
$fullpath = $this->pathname . '/' . $archivepath;
check_dir_exists($fullpath);
return true;
}
}
/**
* Handles extraction to file storage.
*/
class tgz_packer_extract_to_storage implements tgz_extractor_handler {
/**
* @var string Path to temp file.
*/
protected $tempfile;
/**
* @var int Context id for files.
*/
protected $contextid;
/**
* @var string Component name for files.
*/
protected $component;
/**
* @var string File area for files.
*/
protected $filearea;
/**
* @var int Item ID for files.
*/
protected $itemid;
/**
* @var string Base path for files (subfolders will go inside this).
*/
protected $pathbase;
/**
* @var int User id for files or null if none.
*/
protected $userid;
/**
* Constructor.
*
* @param int $contextid Context id for files.
* @param string $component Component name for files.
* @param string $filearea File area for files.
* @param int $itemid Item ID for files.
* @param string $pathbase Base path for files (subfolders will go inside this).
* @param int $userid User id for files or null if none.
*/
public function __construct($contextid, $component, $filearea, $itemid, $pathbase, $userid) {
global $CFG;
// Store all data.
$this->contextid = $contextid;
$this->component = $component;
$this->filearea = $filearea;
$this->itemid = $itemid;
$this->pathbase = $pathbase;
$this->userid = $userid;
// Obtain temp filename.
$tempfolder = $CFG->tempdir . '/core_files';
check_dir_exists($tempfolder);
$this->tempfile = tempnam($tempfolder, '.dat');
}
/**
* @see tgz_extractor_handler::tgz_start_file()
*/
public function tgz_start_file($archivepath) {
// All files are stored in the same filename.
return $this->tempfile;
}
/**
* @see tgz_extractor_handler::tgz_end_file()
*/
public function tgz_end_file($archivepath, $realpath) {
// Place temp file into storage.
$fs = get_file_storage();
$filerecord = array('contextid' => $this->contextid, 'component' => $this->component,
'filearea' => $this->filearea, 'itemid' => $this->itemid);
$filerecord['filepath'] = $this->pathbase . dirname($archivepath) . '/';
$filerecord['filename'] = basename($archivepath);
if ($this->userid) {
$filerecord['userid'] = $this->userid;
}
// Delete existing file (if any) and create new one.
tgz_packer::delete_existing_file_record($fs, $filerecord);
$fs->create_file_from_pathname($filerecord, $this->tempfile);
unlink($this->tempfile);
}
/**
* @see tgz_extractor_handler::tgz_directory()
*/
public function tgz_directory($archivepath, $mtime) {
// Standardise path.
if (!preg_match('~/$~', $archivepath)) {
$archivepath .= '/';
}
// Create directory if it doesn't already exist.
$fs = get_file_storage();
if (!$fs->file_exists($this->contextid, $this->component, $this->filearea, $this->itemid,
$this->pathbase . $archivepath, '.')) {
$fs->create_directory($this->contextid, $this->component, $this->filearea, $this->itemid,
$this->pathbase . $archivepath);
}
return true;
}
}

View File

@ -6153,8 +6153,10 @@ function get_file_packer($mimetype='application/zip') {
case 'application/vnd.moodle.profiling':
$classname = 'zip_packer';
break;
case 'application/x-tar':
// One day we hope to support tar - for the time being it is a pipe dream.
case 'application/x-gzip' :
$classname = 'tgz_packer';
break;
default:
return false;
}

View File

@ -41,6 +41,11 @@ information provided here is intended especially for developers.
* Each plugin should include version information in version.php.
* Module and block tables do not contain version column any more, use get_config('xx_yy', 'version') instead.
* $USER->password field is intentionally unset so that session data does not contain password hashes.
* New file packer for .tar.gz files; obtain by calling get_file_packer('application/x-gzip'). Intended initially
for use in backup/restore only, as there are limitations on supported filenames. Also new packer for
backups which supports both compression formats; get_file_packer('application/vnd.moodle.backup').
* New optional parameter to stored_file::get_content_file_handle to open file handle with 'gzopen' instead
of 'fopen' to read gzip-compressed files if required.
DEPRECATIONS:
Various previously deprecated functions have now been altered to throw DEBUG_DEVELOPER debugging notices