mirror of
https://github.com/moodle/moodle.git
synced 2025-01-18 05:58:34 +01:00
MDL-70038 assign: add suport for pdftoppm tool
pdftoppm (from poppler-utils package) is several orders of magnitude quicker than ghostscript extracting PNG images from PDF documents. We add support for this tool and use it whenever set up, or using gs as before if missing. Revisited code with peer review comments. In particular: 1. Make default path for pdftoppm empty string. 2. Fix a typo on the method description for get_gs_command_for_image. 3. Added more information why is useful pdftoppm into string pathtopdftoppm_help. 4. Check that path for pdftoppm is executable to prevent errors. Otherwise, use gs.
This commit is contained in:
parent
41037efa7a
commit
96557076a2
@ -38,6 +38,8 @@ if ($hassiteconfig) {
|
||||
new lang_string('pathtodot_help', 'admin'), ''));
|
||||
$temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'),
|
||||
new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs'));
|
||||
$temp->add(new admin_setting_configexecutable('pathtopdftoppm', new lang_string('pathtopdftoppm', 'admin'),
|
||||
new lang_string('pathtopdftoppm_help', 'admin'), ''));
|
||||
$temp->add(new admin_setting_configexecutable('pathtopython', new lang_string('pathtopython', 'admin'),
|
||||
new lang_string('pathtopythondesc', 'admin'), ''));
|
||||
$ADMIN->add('server', $temp);
|
||||
|
@ -949,6 +949,8 @@ $string['pathtophp'] = 'Path to PHP CLI';
|
||||
$string['pathtodu'] = 'Path to du';
|
||||
$string['pathtogs'] = 'Path to ghostscript';
|
||||
$string['pathtogs_help'] = 'On most Linux installs, this can be left as \'/usr/bin/gs\'. On Windows it will be something like \'c:\\gs\\bin\\gswin32c.exe\' (make sure there are no spaces in the path - if necessary copy the files \'gswin32c.exe\' and \'gsdll32.dll\' to a new folder without a space in the path)';
|
||||
$string['pathtopdftoppm'] = 'Path to pdftoppm';
|
||||
$string['pathtopdftoppm_help'] = '\'pdftoppm\' is a tool that converts PDF pages to PNG at least as fast as \'gs\' does. However, you will probably have a better performance when converting large documents. If present, \'pdftoppm\' will be used instead of \'gs\' for this task. On most Linux installs, this can be left as \'/usr/bin/pdftoppm\'. If not present, install the poppler-utils or poppler package, depending on the Linux distribution. On Windows it will be provided by Cygwin installs. See <a href="https://poppler.freedesktop.org/" target="_blank">Poppler project</a> for more details.';
|
||||
$string['pathtopgdump'] = 'Path to pg_dump';
|
||||
$string['pathtopgdumpdesc'] = 'This is only necessary to enter if you have more than one pg_dump on your system (for example if you have more than one version of postgresql installed)';
|
||||
$string['pathtopgdumpinvalid'] = 'Invalid path to pg_dump - either wrong path or not executable';
|
||||
|
@ -536,8 +536,6 @@ class pdf extends TcpdfFpdi {
|
||||
* @return string the filename of the generated image
|
||||
*/
|
||||
public function get_image($pageno) {
|
||||
global $CFG;
|
||||
|
||||
if (!$this->filename) {
|
||||
throw new \coding_exception('Attempting to generate a page image without first setting the PDF filename');
|
||||
}
|
||||
@ -560,15 +558,7 @@ class pdf extends TcpdfFpdi {
|
||||
}
|
||||
|
||||
if ($generate) {
|
||||
// Use ghostscript to generate an image of the specified page.
|
||||
$gsexec = \escapeshellarg($CFG->pathtogs);
|
||||
$imageres = \escapeshellarg(100);
|
||||
$imagefilearg = \escapeshellarg($imagefile);
|
||||
$filename = \escapeshellarg($this->filename);
|
||||
$pagenoinc = \escapeshellarg($pageno + 1);
|
||||
$command = "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ".
|
||||
"-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename";
|
||||
|
||||
$command = $this->get_command_for_image($pageno, $imagefile);
|
||||
$output = null;
|
||||
$result = exec($command, $output);
|
||||
if (!file_exists($imagefile)) {
|
||||
@ -585,6 +575,61 @@ class pdf extends TcpdfFpdi {
|
||||
return self::IMAGE_PAGE . $pageno . '.png';
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the command to use to extract as image the given $pageno page number
|
||||
* from a PDF document into the $imagefile file.
|
||||
* @param int $pageno Page number to extract from document.
|
||||
* @param string $imagefile Target filename for the PNG image as absolute path.
|
||||
* @return string The command to use to extract a page as PNG image.
|
||||
*/
|
||||
private function get_command_for_image(int $pageno, string $imagefile): string {
|
||||
global $CFG;
|
||||
|
||||
// First, quickest convertion option.
|
||||
if (!empty($CFG->pathtopdftoppm) && is_executable($CFG->pathtopdftoppm)) {
|
||||
return $this->get_pdftoppm_command_for_image($pageno, $imagefile);
|
||||
}
|
||||
|
||||
// Otherwise, rely on default behaviour.
|
||||
return $this->get_gs_command_for_image($pageno, $imagefile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the pdftoppm command to use to extract as image the given $pageno page number
|
||||
* from a PDF document into the $imagefile file.
|
||||
* @param int $pageno Page number to extract from document.
|
||||
* @param string $imagefile Target filename for the PNG image as absolute path.
|
||||
* @return string The pdftoppm command to use to extract a page as PNG image.
|
||||
*/
|
||||
private function get_pdftoppm_command_for_image(int $pageno, string $imagefile): string {
|
||||
global $CFG;
|
||||
$pdftoppmexec = \escapeshellarg($CFG->pathtopdftoppm);
|
||||
$imageres = \escapeshellarg(100);
|
||||
$imagefile = substr($imagefile, 0, -4); // Pdftoppm tool automatically adds extension file.
|
||||
$imagefilearg = \escapeshellarg($imagefile);
|
||||
$filename = \escapeshellarg($this->filename);
|
||||
$pagenoinc = \escapeshellarg($pageno + 1);
|
||||
return "$pdftoppmexec -q -r $imageres -f $pagenoinc -l $pagenoinc -png -singlefile $filename $imagefilearg";
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the ghostscript (gs) command to use to extract as image the given $pageno page number
|
||||
* from a PDF document into the $imagefile file.
|
||||
* @param int $pageno Page number to extract from document.
|
||||
* @param string $imagefile Target filename for the PNG image as absolute path.
|
||||
* @return string The ghostscript (gs) command to use to extract a page as PNG image.
|
||||
*/
|
||||
private function get_gs_command_for_image(int $pageno, string $imagefile): string {
|
||||
global $CFG;
|
||||
$gsexec = \escapeshellarg($CFG->pathtogs);
|
||||
$imageres = \escapeshellarg(100);
|
||||
$imagefilearg = \escapeshellarg($imagefile);
|
||||
$filename = \escapeshellarg($this->filename);
|
||||
$pagenoinc = \escapeshellarg($pageno + 1);
|
||||
return "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ".
|
||||
"-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename";
|
||||
}
|
||||
|
||||
/**
|
||||
* Check to see if PDF is version 1.4 (or below); if not: use ghostscript to convert it
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user