MDL-70631 files: Fix performance of zip_packer::extract_to_pathname()

The original implementation was based on ZipArchive::getStream() which
turns out to be very slow and if the archive contains many files, the
unzipping performance is very slow.

The patch changes the implementation to use ZipArchive::extractTo()
unless the extracted entry path contains a folder name ending with dot
(such as some/path./to/file.txt). There is a known upstream bug in the
PHP ZIP extension #77214 (also #74619 and #69477) so that we fall back
to keep using the stream in those cases.
This commit is contained in:
David Mudrák 2021-01-14 21:33:44 +01:00
parent 9dabd071fe
commit b227b6ea45
3 changed files with 112 additions and 16 deletions

View File

@ -254,6 +254,55 @@ class core_files_zip_packer_testcase extends advanced_testcase implements file_p
}
}
/**
* Test functionality of {@see zip_packer} for entries with folders ending with dots.
*
* @link https://bugs.php.net/bug.php?id=77214
*/
public function test_zip_entry_path_having_folder_ending_with_dot() {
$this->resetAfterTest(false);
$packer = get_file_packer('application/zip');
$tmp = make_request_directory();
$now = time();
// Create a test archive containing a folder ending with dot.
$zippath = $tmp . '/test_archive.zip';
$zipcontents = [
'HOW.TO' => ['Just run tests.'],
'README.' => ['This is a test ZIP file'],
'./Current time' => [$now],
'Data/sub1./sub2/1221' => ['1221'],
'Data/sub1./sub2./Příliš žluťoučký kůň úpěl Ďábelské Ódy.txt' => [''],
];
// Check that the archive can be created.
$result = $packer->archive_to_pathname($zipcontents, $zippath, false);
$this->assertTrue($result);
// Check list of files.
$listfiles = $packer->list_files($zippath);
$this->assertEquals(count($zipcontents), count($listfiles));
foreach ($listfiles as $fileinfo) {
$this->assertSame($fileinfo->pathname, $fileinfo->original_pathname);
$this->assertArrayHasKey($fileinfo->pathname, $zipcontents);
}
// Check actual extracting.
$targetpath = $tmp . '/target';
check_dir_exists($targetpath);
$result = $packer->extract_to_pathname($zippath, $targetpath, null, null, true);
$this->assertTrue($result);
foreach ($zipcontents as $filename => $filecontents) {
$filecontents = reset($filecontents);
$this->assertTrue(is_readable($targetpath . '/' . $filename));
$this->assertEquals($filecontents, file_get_contents($targetpath . '/' . $filename));
}
}
/**
* @depends test_archive_to_storage
*/

View File

@ -254,6 +254,28 @@ class zip_archive extends file_archive {
return $this->za->getStream($name);
}
/**
* Extract the archive contents to the given location.
*
* @param string $destination Path to the location where to extract the files.
* @param int $index Index of the archive entry.
* @return bool true on success or false on failure
*/
public function extract_to($destination, $index) {
if (!isset($this->za)) {
return false;
}
$name = $this->za->getNameIndex($index);
if ($name === false) {
return false;
}
return $this->za->extractTo($destination, $name);
}
/**
* Returns file information.
*

View File

@ -341,33 +341,58 @@ class zip_packer extends file_packer {
}
$newfile = "$newdir/$filename";
if (!$fp = fopen($newfile, 'wb')) {
$processed[$name] = 'Can not write target file'; // TODO: localise
$success = false;
continue;
}
if (!$fz = $ziparch->get_stream($info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise
$success = false;
if (strpos($newfile, './') > 1) {
// The path to the entry contains a directory ending with dot. We cannot use extract_to() due to
// upstream PHP bugs #69477, #74619 and #77214. Extract the file from its stream which is slower but
// should work even in this case.
if (!$fp = fopen($newfile, 'wb')) {
$processed[$name] = 'Can not write target file'; // TODO: localise.
$success = false;
continue;
}
if (!$fz = $ziparch->get_stream($info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise.
$success = false;
fclose($fp);
continue;
}
while (!feof($fz)) {
$content = fread($fz, 262143);
fwrite($fp, $content);
}
fclose($fz);
fclose($fp);
} else {
if (!$fz = $ziparch->extract_to($pathname, $info->index)) {
$processed[$name] = 'Can not read file from zip archive'; // TODO: localise.
$success = false;
continue;
}
}
// Check that the file was correctly created in the destination.
if (!file_exists($newfile)) {
$processed[$name] = 'Unknown error during zip extraction (file not created).'; // TODO: localise.
$success = false;
continue;
}
while (!feof($fz)) {
$content = fread($fz, 262143);
fwrite($fp, $content);
}
fclose($fz);
fclose($fp);
// Check that the size of extracted file matches the expectation.
if (filesize($newfile) !== $size) {
$processed[$name] = 'Unknown error during zip extraction'; // TODO: localise
$processed[$name] = 'Unknown error during zip extraction (file size mismatch).'; // TODO: localise.
$success = false;
// something went wrong :-(
@unlink($newfile);
continue;
}
$processed[$name] = true;
}
$ziparch->close();
if ($returnbool) {