diff --git a/lib/spout/readme_moodle.txt b/lib/spout/readme_moodle.txt index 350f79c6821..91ace6be81d 100644 --- a/lib/spout/readme_moodle.txt +++ b/lib/spout/readme_moodle.txt @@ -1,9 +1,14 @@ -Description of Spout library import 2.6.0 +Description of Spout library import 2.7.3 ========================================= * Download / Clone from https://github.com/box/spout/ * Only include the src/Spout directory. * Update lib/thirdpartylibs.xml with the latest version. +2017/10/10 +---------- +Updated to v2.7.3 (MDL-60288) +by Ankit Agarwal + 2016/09/20 ---------- Updated to v2.6.0 (MDL-56012) diff --git a/lib/spout/src/Spout/Common/Escaper/ODS.php b/lib/spout/src/Spout/Common/Escaper/ODS.php index 86caeb39521..4ffd766ef4f 100644 --- a/lib/spout/src/Spout/Common/Escaper/ODS.php +++ b/lib/spout/src/Spout/Common/Escaper/ODS.php @@ -22,7 +22,29 @@ class ODS implements EscaperInterface */ public function escape($string) { - return htmlspecialchars($string, ENT_QUOTES); + if (defined('ENT_DISALLOWED')) { + // 'ENT_DISALLOWED' ensures that invalid characters in the given document type are replaced. + // Otherwise control characters like a vertical tab "\v" will make the XML document unreadable by the XML processor + // @link https://github.com/box/spout/issues/329 + $replacedString = htmlspecialchars($string, ENT_NOQUOTES | ENT_DISALLOWED); + } else { + // We are on hhvm or any other engine that does not support ENT_DISALLOWED. + // + // @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded. + // Single and double quotes can be left as is. + $escapedString = htmlspecialchars($string, ENT_NOQUOTES); + + // control characters values are from 0 to 1F (hex values) in the ASCII table + // some characters should not be escaped though: "\t", "\r" and "\n". + $regexPattern = '[\x00-\x08' . + // skipping "\t" (0x9) and "\n" (0xA) + '\x0B-\x0C' . + // skipping "\r" (0xD) + '\x0E-\x1F]'; + $replacedString = preg_replace("/$regexPattern/", '�', $escapedString); + } + + return $replacedString; } /** @@ -33,6 +55,12 @@ class ODS implements EscaperInterface */ public function unescape($string) { - return htmlspecialchars_decode($string, ENT_QUOTES); + // ============== + // = WARNING = + // ============== + // It is assumed that the given string has already had its XML entities decoded. + // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation). + // Therefore there is no need to call "htmlspecialchars_decode()". + return $string; } } diff --git a/lib/spout/src/Spout/Common/Escaper/XLSX.php b/lib/spout/src/Spout/Common/Escaper/XLSX.php index 8ca317fce52..7c6c61b37be 100644 --- a/lib/spout/src/Spout/Common/Escaper/XLSX.php +++ b/lib/spout/src/Spout/Common/Escaper/XLSX.php @@ -42,7 +42,9 @@ class XLSX implements EscaperInterface public function escape($string) { $escapedString = $this->escapeControlCharacters($string); - $escapedString = htmlspecialchars($escapedString, ENT_QUOTES); + // @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded. + // Single and double quotes can be left as is. + $escapedString = htmlspecialchars($escapedString, ENT_NOQUOTES); return $escapedString; } @@ -55,8 +57,13 @@ class XLSX implements EscaperInterface */ public function unescape($string) { - $unescapedString = htmlspecialchars_decode($string, ENT_QUOTES); - $unescapedString = $this->unescapeControlCharacters($unescapedString); + // ============== + // = WARNING = + // ============== + // It is assumed that the given string has already had its XML entities decoded. + // This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation). + // Therefore there is no need to call "htmlspecialchars_decode()". + $unescapedString = $this->unescapeControlCharacters($string); return $unescapedString; } diff --git a/lib/spout/src/Spout/Common/Helper/FileSystemHelper.php b/lib/spout/src/Spout/Common/Helper/FileSystemHelper.php index 4d4f0be6880..3145be73b5f 100644 --- a/lib/spout/src/Spout/Common/Helper/FileSystemHelper.php +++ b/lib/spout/src/Spout/Common/Helper/FileSystemHelper.php @@ -13,15 +13,15 @@ use Box\Spout\Common\Exception\IOException; */ class FileSystemHelper { - /** @var string Path of the base folder where all the I/O can occur */ - protected $baseFolderPath; + /** @var string Real path of the base folder where all the I/O can occur */ + protected $baseFolderRealPath; /** * @param string $baseFolderPath The path of the base folder where all the I/O can occur */ public function __construct($baseFolderPath) { - $this->baseFolderPath = $baseFolderPath; + $this->baseFolderRealPath = realpath($baseFolderPath); } /** @@ -124,9 +124,10 @@ class FileSystemHelper */ protected function throwIfOperationNotInBaseFolder($operationFolderPath) { - $isInBaseFolder = (strpos($operationFolderPath, $this->baseFolderPath) === 0); + $operationFolderRealPath = realpath($operationFolderPath); + $isInBaseFolder = (strpos($operationFolderRealPath, $this->baseFolderRealPath) === 0); if (!$isInBaseFolder) { - throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderPath}"); + throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderRealPath}"); } } } diff --git a/lib/spout/src/Spout/Reader/AbstractReader.php b/lib/spout/src/Spout/Reader/AbstractReader.php index cb476abab34..880efbd5d0d 100644 --- a/lib/spout/src/Spout/Reader/AbstractReader.php +++ b/lib/spout/src/Spout/Reader/AbstractReader.php @@ -19,8 +19,15 @@ abstract class AbstractReader implements ReaderInterface /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates = false; + /** @var \Box\Spout\Reader\Common\ReaderOptions Reader's customized options */ + protected $options; + + /** + * Returns the reader's current options + * + * @return \Box\Spout\Reader\Common\ReaderOptions + */ + abstract protected function getOptions(); /** * Returns whether stream wrappers are supported @@ -42,7 +49,7 @@ abstract class AbstractReader implements ReaderInterface * * @return \Iterator To iterate over sheets */ - abstract public function getConcreteSheetIterator(); + abstract protected function getConcreteSheetIterator(); /** * Closes the reader. To be used after reading the file. @@ -64,12 +71,26 @@ abstract class AbstractReader implements ReaderInterface /** * Sets whether date/time values should be returned as PHP objects or be formatted as strings. * + * @api * @param bool $shouldFormatDates * @return AbstractReader */ public function setShouldFormatDates($shouldFormatDates) { - $this->shouldFormatDates = $shouldFormatDates; + $this->getOptions()->setShouldFormatDates($shouldFormatDates); + return $this; + } + + /** + * Sets whether empty rows should be returned or skipped. + * + * @api + * @param bool $shouldPreserveEmptyRows + * @return AbstractReader + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->getOptions()->setShouldPreserveEmptyRows($shouldPreserveEmptyRows); return $this; } diff --git a/lib/spout/src/Spout/Reader/CSV/Reader.php b/lib/spout/src/Spout/Reader/CSV/Reader.php index ab887ef73af..648a12d36a2 100644 --- a/lib/spout/src/Spout/Reader/CSV/Reader.php +++ b/lib/spout/src/Spout/Reader/CSV/Reader.php @@ -4,7 +4,6 @@ namespace Box\Spout\Reader\CSV; use Box\Spout\Reader\AbstractReader; use Box\Spout\Common\Exception\IOException; -use Box\Spout\Common\Helper\EncodingHelper; /** * Class Reader @@ -20,20 +19,21 @@ class Reader extends AbstractReader /** @var SheetIterator To iterator over the CSV unique "sheet" */ protected $sheetIterator; - /** @var string Defines the character used to delimit fields (one character only) */ - protected $fieldDelimiter = ','; + /** @var string Original value for the "auto_detect_line_endings" INI value */ + protected $originalAutoDetectLineEndings; - /** @var string Defines the character used to enclose fields (one character only) */ - protected $fieldEnclosure = '"'; - - /** @var string Encoding of the CSV file to be read */ - protected $encoding = EncodingHelper::ENCODING_UTF8; - - /** @var string Defines the End of line */ - protected $endOfLineCharacter = "\n"; - - /** @var string */ - protected $autoDetectLineEndings; + /** + * Returns the reader's current options + * + * @return ReaderOptions + */ + protected function getOptions() + { + if (!isset($this->options)) { + $this->options = new ReaderOptions(); + } + return $this->options; + } /** * Sets the field delimiter for the CSV. @@ -44,7 +44,7 @@ class Reader extends AbstractReader */ public function setFieldDelimiter($fieldDelimiter) { - $this->fieldDelimiter = $fieldDelimiter; + $this->getOptions()->setFieldDelimiter($fieldDelimiter); return $this; } @@ -57,7 +57,7 @@ class Reader extends AbstractReader */ public function setFieldEnclosure($fieldEnclosure) { - $this->fieldEnclosure = $fieldEnclosure; + $this->getOptions()->setFieldEnclosure($fieldEnclosure); return $this; } @@ -70,7 +70,7 @@ class Reader extends AbstractReader */ public function setEncoding($encoding) { - $this->encoding = $encoding; + $this->getOptions()->setEncoding($encoding); return $this; } @@ -83,7 +83,7 @@ class Reader extends AbstractReader */ public function setEndOfLineCharacter($endOfLineCharacter) { - $this->endOfLineCharacter = $endOfLineCharacter; + $this->getOptions()->setEndOfLineCharacter($endOfLineCharacter); return $this; } @@ -107,7 +107,7 @@ class Reader extends AbstractReader */ protected function openReader($filePath) { - $this->autoDetectLineEndings = ini_get('auto_detect_line_endings'); + $this->originalAutoDetectLineEndings = ini_get('auto_detect_line_endings'); ini_set('auto_detect_line_endings', '1'); $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); @@ -117,10 +117,7 @@ class Reader extends AbstractReader $this->sheetIterator = new SheetIterator( $this->filePointer, - $this->fieldDelimiter, - $this->fieldEnclosure, - $this->encoding, - $this->endOfLineCharacter, + $this->getOptions(), $this->globalFunctionsHelper ); } @@ -130,7 +127,7 @@ class Reader extends AbstractReader * * @return SheetIterator To iterate over sheets */ - public function getConcreteSheetIterator() + protected function getConcreteSheetIterator() { return $this->sheetIterator; } @@ -147,6 +144,6 @@ class Reader extends AbstractReader $this->globalFunctionsHelper->fclose($this->filePointer); } - ini_set('auto_detect_line_endings', $this->autoDetectLineEndings); + ini_set('auto_detect_line_endings', $this->originalAutoDetectLineEndings); } } diff --git a/lib/spout/src/Spout/Reader/CSV/ReaderOptions.php b/lib/spout/src/Spout/Reader/CSV/ReaderOptions.php new file mode 100644 index 00000000000..9a1adb89d94 --- /dev/null +++ b/lib/spout/src/Spout/Reader/CSV/ReaderOptions.php @@ -0,0 +1,110 @@ +fieldDelimiter; + } + + /** + * Sets the field delimiter for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldDelimiter Character that delimits fields + * @return ReaderOptions + */ + public function setFieldDelimiter($fieldDelimiter) + { + $this->fieldDelimiter = $fieldDelimiter; + return $this; + } + + /** + * @return string + */ + public function getFieldEnclosure() + { + return $this->fieldEnclosure; + } + + /** + * Sets the field enclosure for the CSV. + * Needs to be called before opening the reader. + * + * @param string $fieldEnclosure Character that enclose fields + * @return ReaderOptions + */ + public function setFieldEnclosure($fieldEnclosure) + { + $this->fieldEnclosure = $fieldEnclosure; + return $this; + } + + /** + * @return string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Sets the encoding of the CSV file to be read. + * Needs to be called before opening the reader. + * + * @param string $encoding Encoding of the CSV file to be read + * @return ReaderOptions + */ + public function setEncoding($encoding) + { + $this->encoding = $encoding; + return $this; + } + + /** + * @return string EOL for the CSV + */ + public function getEndOfLineCharacter() + { + return $this->endOfLineCharacter; + } + + /** + * Sets the EOL for the CSV. + * Needs to be called before opening the reader. + * + * @param string $endOfLineCharacter used to properly get lines from the CSV file. + * @return ReaderOptions + */ + public function setEndOfLineCharacter($endOfLineCharacter) + { + $this->endOfLineCharacter = $endOfLineCharacter; + return $this; + } +} diff --git a/lib/spout/src/Spout/Reader/CSV/RowIterator.php b/lib/spout/src/Spout/Reader/CSV/RowIterator.php index 39b38a890dc..a2a6672c775 100644 --- a/lib/spout/src/Spout/Reader/CSV/RowIterator.php +++ b/lib/spout/src/Spout/Reader/CSV/RowIterator.php @@ -14,10 +14,9 @@ use Box\Spout\Common\Helper\EncodingHelper; class RowIterator implements IteratorInterface { /** - * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low). - * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421 + * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines). */ - const MAX_READ_BYTES_PER_LINE = 32768; + const MAX_READ_BYTES_PER_LINE = 0; /** @var resource Pointer to the CSV file to read */ protected $filePointer; @@ -40,6 +39,12 @@ class RowIterator implements IteratorInterface /** @var string Encoding of the CSV file to be read */ protected $encoding; + /** @var string End of line delimiter, given by the user as input. */ + protected $inputEOLDelimiter; + + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; @@ -49,24 +54,19 @@ class RowIterator implements IteratorInterface /** @var string End of line delimiter, encoded using the same encoding as the CSV */ protected $encodedEOLDelimiter; - /** @var string End of line delimiter, given by the user as input. */ - protected $inputEOLDelimiter; - /** * @param resource $filePointer Pointer to the CSV file to read - * @param string $fieldDelimiter Character that delimits fields - * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read - * @param string $endOfLineDelimiter End of line delimiter + * @param \Box\Spout\Reader\CSV\ReaderOptions $options * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) + public function __construct($filePointer, $options, $globalFunctionsHelper) { $this->filePointer = $filePointer; - $this->fieldDelimiter = $fieldDelimiter; - $this->fieldEnclosure = $fieldEnclosure; - $this->encoding = $encoding; - $this->inputEOLDelimiter = $endOfLineDelimiter; + $this->fieldDelimiter = $options->getFieldDelimiter(); + $this->fieldEnclosure = $options->getFieldEnclosure(); + $this->encoding = $options->getEncoding(); + $this->inputEOLDelimiter = $options->getEndOfLineCharacter(); + $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); $this->globalFunctionsHelper = $globalFunctionsHelper; $this->encodingHelper = new EncodingHelper($globalFunctionsHelper); @@ -106,7 +106,7 @@ class RowIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { @@ -114,7 +114,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows are skipped. + * Move forward to next element. Reads data for the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -124,25 +124,48 @@ class RowIterator implements IteratorInterface { $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - if ($this->hasReachedEndOfFile) { - return; + if (!$this->hasReachedEndOfFile) { + $this->readDataForNextRow(); } + } + /** + * @return void + * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 + */ + protected function readDataForNextRow() + { do { $rowData = $this->getNextUTF8EncodedRow(); - $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); - } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData)); + } while ($this->shouldReadNextRow($rowData)); if ($rowData !== false) { - $this->rowDataBuffer = $rowData; + // str_replace will replace NULL values by empty strings + $this->rowDataBuffer = str_replace(null, null, $rowData); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. // This happens when the last lines are empty lines. - $this->hasReachedEndOfFile = $hasNowReachedEndOfFile; + $this->hasReachedEndOfFile = true; } } + /** + * @param array|bool $currentRowData + * @return bool Whether the data for the current row can be returned or if we need to keep reading + */ + protected function shouldReadNextRow($currentRowData) + { + $hasSuccessfullyFetchedRowData = ($currentRowData !== false); + $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); + $isEmptyLine = $this->isEmptyLine($currentRowData); + + return ( + (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) || + (!$this->shouldPreserveEmptyRows && $isEmptyLine) + ); + } + /** * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, @@ -154,7 +177,7 @@ class RowIterator implements IteratorInterface protected function getNextUTF8EncodedRow() { $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); - if (false === $encodedRowData) { + if ($encodedRowData === false) { return false; } @@ -195,7 +218,7 @@ class RowIterator implements IteratorInterface } /** - * @param array $lineData Array containing the cells value for the line + * @param array|bool $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) diff --git a/lib/spout/src/Spout/Reader/CSV/Sheet.php b/lib/spout/src/Spout/Reader/CSV/Sheet.php index b9c66c70f94..9a688db0da9 100644 --- a/lib/spout/src/Spout/Reader/CSV/Sheet.php +++ b/lib/spout/src/Spout/Reader/CSV/Sheet.php @@ -16,14 +16,12 @@ class Sheet implements SheetInterface /** * @param resource $filePointer Pointer to the CSV file to read - * @param string $fieldDelimiter Character that delimits fields - * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read + * @param \Box\Spout\Reader\CSV\ReaderOptions $options * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct($filePointer, $options, $globalFunctionsHelper) { - $this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->rowIterator = new RowIterator($filePointer, $options, $globalFunctionsHelper); } /** @@ -34,4 +32,31 @@ class Sheet implements SheetInterface { return $this->rowIterator; } + + /** + * @api + * @return int Index of the sheet + */ + public function getIndex() + { + return 0; + } + + /** + * @api + * @return string Name of the sheet - empty string since CSV does not support that + */ + public function getName() + { + return ''; + } + + /** + * @api + * @return bool Always TRUE as there is only one sheet + */ + public function isActive() + { + return true; + } } diff --git a/lib/spout/src/Spout/Reader/CSV/SheetIterator.php b/lib/spout/src/Spout/Reader/CSV/SheetIterator.php index 8ee2e99b728..58a9480c4b5 100644 --- a/lib/spout/src/Spout/Reader/CSV/SheetIterator.php +++ b/lib/spout/src/Spout/Reader/CSV/SheetIterator.php @@ -20,14 +20,12 @@ class SheetIterator implements IteratorInterface /** * @param resource $filePointer - * @param string $fieldDelimiter Character that delimits fields - * @param string $fieldEnclosure Character that enclose fields - * @param string $encoding Encoding of the CSV file to be read + * @param \Box\Spout\Reader\CSV\ReaderOptions $options * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper */ - public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper) + public function __construct($filePointer, $options, $globalFunctionsHelper) { - $this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper); + $this->sheet = new Sheet($filePointer, $options, $globalFunctionsHelper); } /** @@ -45,7 +43,7 @@ class SheetIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { diff --git a/lib/spout/src/Spout/Reader/Common/ReaderOptions.php b/lib/spout/src/Spout/Reader/Common/ReaderOptions.php new file mode 100644 index 00000000000..4ab7a07f48d --- /dev/null +++ b/lib/spout/src/Spout/Reader/Common/ReaderOptions.php @@ -0,0 +1,58 @@ +shouldFormatDates; + } + + /** + * Sets whether date/time values should be returned as PHP objects or be formatted as strings. + * + * @param bool $shouldFormatDates + * @return ReaderOptions + */ + public function setShouldFormatDates($shouldFormatDates) + { + $this->shouldFormatDates = $shouldFormatDates; + return $this; + } + + /** + * @return bool Whether empty rows should be returned or skipped. + */ + public function shouldPreserveEmptyRows() + { + return $this->shouldPreserveEmptyRows; + } + + /** + * Sets whether empty rows should be returned or skipped. + * + * @param bool $shouldPreserveEmptyRows + * @return ReaderOptions + */ + public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) + { + $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; + return $this; + } +} diff --git a/lib/spout/src/Spout/Reader/Common/XMLProcessor.php b/lib/spout/src/Spout/Reader/Common/XMLProcessor.php new file mode 100644 index 00000000000..d8a1da8d279 --- /dev/null +++ b/lib/spout/src/Spout/Reader/Common/XMLProcessor.php @@ -0,0 +1,152 @@ +xmlReader = $xmlReader; + } + + /** + * @param string $nodeName A callback may be triggered when a node with this name is read + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @param callable $callback Callback to execute when the read node has the given name and type + * @return XMLProcessor + */ + public function registerCallback($nodeName, $nodeType, $callback) + { + $callbackKey = $this->getCallbackKey($nodeName, $nodeType); + $this->callbacks[$callbackKey] = $this->getInvokableCallbackData($callback); + + return $this; + } + + /** + * @param string $nodeName Name of the node + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @return string Key used to store the associated callback + */ + private function getCallbackKey($nodeName, $nodeType) + { + return "$nodeName$nodeType"; + } + + /** + * Because the callback can be a "protected" function, we don't want to use call_user_func() directly + * but instead invoke the callback using Reflection. This allows the invocation of "protected" functions. + * Since some functions can be called a lot, we pre-process the callback to only return the elements that + * will be needed to invoke the callback later. + * + * @param callable $callback Array reference to a callback: [OBJECT, METHOD_NAME] + * @return array Associative array containing the elements needed to invoke the callback using Reflection + */ + private function getInvokableCallbackData($callback) + { + $callbackObject = $callback[0]; + $callbackMethodName = $callback[1]; + $reflectionMethod = new \ReflectionMethod(get_class($callbackObject), $callbackMethodName); + $reflectionMethod->setAccessible(true); + + return [ + self::CALLBACK_REFLECTION_METHOD => $reflectionMethod, + self::CALLBACK_REFLECTION_OBJECT => $callbackObject, + ]; + } + + /** + * Resumes the reading of the XML file where it was left off. + * Stops whenever a callback indicates that reading should stop or at the end of the file. + * + * @return void + * @throws \Box\Spout\Reader\Exception\XMLProcessingException + */ + public function readUntilStopped() + { + while ($this->xmlReader->read()) { + $nodeType = $this->xmlReader->nodeType; + $nodeNamePossiblyWithPrefix = $this->xmlReader->name; + $nodeNameWithoutPrefix = $this->xmlReader->localName; + + $callbackData = $this->getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType); + + if ($callbackData !== null) { + $callbackResponse = $this->invokeCallback($callbackData, [$this->xmlReader]); + + if ($callbackResponse === self::PROCESSING_STOP) { + // stop reading + break; + } + } + } + } + + /** + * @param string $nodeNamePossiblyWithPrefix Name of the node, possibly prefixed + * @param string $nodeNameWithoutPrefix Name of the same node, un-prefixed + * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] + * @return array|null Callback data to be used for execution when a node of the given name/type is read or NULL if none found + */ + private function getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType) + { + // With prefixed nodes, we should match if (by order of preference): + // 1. the callback was registered with the prefixed node name (e.g. "x:worksheet") + // 2. the callback was registered with the un-prefixed node name (e.g. "worksheet") + $callbackKeyForPossiblyPrefixedName = $this->getCallbackKey($nodeNamePossiblyWithPrefix, $nodeType); + $callbackKeyForUnPrefixedName = $this->getCallbackKey($nodeNameWithoutPrefix, $nodeType); + $hasPrefix = ($nodeNamePossiblyWithPrefix !== $nodeNameWithoutPrefix); + + $callbackKeyToUse = $callbackKeyForUnPrefixedName; + if ($hasPrefix && isset($this->callbacks[$callbackKeyForPossiblyPrefixedName])) { + $callbackKeyToUse = $callbackKeyForPossiblyPrefixedName; + } + + // Using isset here because it is way faster than array_key_exists... + return isset($this->callbacks[$callbackKeyToUse]) ? $this->callbacks[$callbackKeyToUse] : null; + } + + /** + * @param array $callbackData Associative array containing data to invoke the callback using Reflection + * @param array $args Arguments to pass to the callback + * @return int Callback response + */ + private function invokeCallback($callbackData, $args) + { + $reflectionMethod = $callbackData[self::CALLBACK_REFLECTION_METHOD]; + $callbackObject = $callbackData[self::CALLBACK_REFLECTION_OBJECT]; + + return $reflectionMethod->invokeArgs($callbackObject, $args); + } +} diff --git a/lib/spout/src/Spout/Reader/ODS/Helper/CellValueFormatter.php b/lib/spout/src/Spout/Reader/ODS/Helper/CellValueFormatter.php index 99d8563a805..0c9141096d0 100644 --- a/lib/spout/src/Spout/Reader/ODS/Helper/CellValueFormatter.php +++ b/lib/spout/src/Spout/Reader/ODS/Helper/CellValueFormatter.php @@ -26,7 +26,7 @@ class CellValueFormatter const XML_NODE_A = 'text:a'; const XML_NODE_SPAN = 'text:span'; - /** Definition of XML attribute used to parse data */ + /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_TYPE = 'office:value-type'; const XML_ATTRIBUTE_VALUE = 'office:value'; const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value'; diff --git a/lib/spout/src/Spout/Reader/ODS/Helper/SettingsHelper.php b/lib/spout/src/Spout/Reader/ODS/Helper/SettingsHelper.php new file mode 100644 index 00000000000..a5388ef4b6f --- /dev/null +++ b/lib/spout/src/Spout/Reader/ODS/Helper/SettingsHelper.php @@ -0,0 +1,51 @@ +openFileInZip($filePath, self::SETTINGS_XML_FILE_PATH) === false) { + return null; + } + + $activeSheetName = null; + + try { + while ($xmlReader->readUntilNodeFound(self::XML_NODE_CONFIG_ITEM)) { + if ($xmlReader->getAttribute(self::XML_ATTRIBUTE_CONFIG_NAME) === self::XML_ATTRIBUTE_VALUE_ACTIVE_TABLE) { + $activeSheetName = $xmlReader->readString(); + break; + } + } + } catch (XMLProcessingException $exception) { + // do nothing + } + + $xmlReader->close(); + + return $activeSheetName; + } +} diff --git a/lib/spout/src/Spout/Reader/ODS/Reader.php b/lib/spout/src/Spout/Reader/ODS/Reader.php index a52bafaa333..dbdc47b8dba 100644 --- a/lib/spout/src/Spout/Reader/ODS/Reader.php +++ b/lib/spout/src/Spout/Reader/ODS/Reader.php @@ -19,6 +19,19 @@ class Reader extends AbstractReader /** @var SheetIterator To iterator over the ODS sheets */ protected $sheetIterator; + /** + * Returns the reader's current options + * + * @return ReaderOptions + */ + protected function getOptions() + { + if (!isset($this->options)) { + $this->options = new ReaderOptions(); + } + return $this->options; + } + /** * Returns whether stream wrappers are supported * @@ -42,7 +55,7 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->getOptions()); } else { throw new IOException("Could not open $filePath for reading."); } @@ -53,7 +66,7 @@ class Reader extends AbstractReader * * @return SheetIterator To iterate over sheets */ - public function getConcreteSheetIterator() + protected function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/lib/spout/src/Spout/Reader/ODS/ReaderOptions.php b/lib/spout/src/Spout/Reader/ODS/ReaderOptions.php new file mode 100644 index 00000000000..2d29640f3e7 --- /dev/null +++ b/lib/spout/src/Spout/Reader/ODS/ReaderOptions.php @@ -0,0 +1,14 @@ +" element - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options */ - public function __construct($xmlReader, $shouldFormatDates) + public function __construct($xmlReader, $options) { $this->xmlReader = $xmlReader; - $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates); + $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); + $this->cellValueFormatter = new CellValueFormatter($options->shouldFormatDates()); + + // Register all callbacks to process different nodes when reading the XML file + $this->xmlProcessor = new XMLProcessor($this->xmlReader); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']); } /** @@ -71,7 +106,8 @@ class RowIterator implements IteratorInterface } $this->hasAlreadyBeenRewound = true; - $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 1; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; @@ -82,7 +118,7 @@ class RowIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { @@ -99,77 +135,156 @@ class RowIterator implements IteratorInterface */ public function next() { - $rowData = []; - $cellValue = null; - $numColumnsRepeated = 1; - $numCellsRead = 0; - $hasAlreadyReadOneCell = false; + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow(); + } + + $this->lastRowIndexProcessed++; + } + + /** + * Returns whether we need data for the next row to be processed. + * We DO need to read data if: + * - we have not read any rows yet + * OR + * - the next row to be processed immediately follows the last read row + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 + ); + } + + /** + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow() + { + $this->currentlyProcessedRowData = []; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode(); - - $node = $this->xmlReader->expand(); - $currentCellValue = $this->getCellValue($node); - - // process cell N only after having read cell N+1 (see below why) - if ($hasAlreadyReadOneCell) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - } - - $cellValue = $currentCellValue; - $numColumnsRepeated = $currentNumColumnsRepeated; - - $numCellsRead++; - $hasAlreadyReadOneCell = true; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue)); - if ($isEmptyRow) { - // skip empty rows - $this->next(); - return; - } - - // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. - // The current count of read columns is determined by counting the values in $rowData. - // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" - // with a number-columns-repeated value equals to the number of (supported columns - used columns). - // In Excel, the number of supported columns is 16384, but we don't want to returns rows with - // always 16384 cells. - if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { - for ($i = 0; $i < $numColumnsRepeated; $i++) { - $rowData[] = $cellValue; - } - $this->numReadRows++; - } - break; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; - break; - } - } - + $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + $this->rowDataBuffer = $this->currentlyProcessedRowData; } /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int A return code that indicates what action should the processor take next + */ + protected function processRowStartingNode($xmlReader) + { + // Reset data from current row + $this->hasAlreadyReadOneCellInCurrentRow = false; + $this->lastProcessedCellValue = null; + $this->numColumnsRepeated = 1; + $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); + + return XMLProcessor::PROCESSING_CONTINUE; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int A return code that indicates what action should the processor take next + */ + protected function processCellStartingNode($xmlReader) + { + $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); + + // NOTE: expand() will automatically decode all XML entities of the child nodes + $node = $xmlReader->expand(); + $currentCellValue = $this->getCellValue($node); + + // process cell N only after having read cell N+1 (see below why) + if ($this->hasAlreadyReadOneCellInCurrentRow) { + for ($i = 0; $i < $this->numColumnsRepeated; $i++) { + $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue; + } + } + + $this->hasAlreadyReadOneCellInCurrentRow = true; + $this->lastProcessedCellValue = $currentCellValue; + $this->numColumnsRepeated = $currentNumColumnsRepeated; + + return XMLProcessor::PROCESSING_CONTINUE; + } + + /** + * @return int A return code that indicates what action should the processor take next + */ + protected function processRowEndingNode() + { + $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRowData, $this->lastProcessedCellValue); + + // if the fetched row is empty and we don't want to preserve it... + if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { + // ... skip it + return XMLProcessor::PROCESSING_CONTINUE; + } + + // if the row is empty, we don't want to return more than one cell + $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; + + // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. + // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData". + // This is to avoid creating a lot of empty cells, as Excel adds a last empty "" + // with a number-columns-repeated value equals to the number of (supported columns - used columns). + // In Excel, the number of supported columns is 16384, but we don't want to returns rows with + // always 16384 cells. + if ((count($this->currentlyProcessedRowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { + for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { + $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue; + } + } + + // If we are processing row N and the row is repeated M times, + // then the next row to be processed will be row (N+M). + $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; + + // at this point, we have all the data we need for the row + // so that we can populate the buffer + return XMLProcessor::PROCESSING_STOP; + } + + /** + * @return int A return code that indicates what action should the processor take next + */ + protected function processTableEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + + return XMLProcessor::PROCESSING_STOP; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing + */ + protected function getNumRowsRepeatedForCurrentNode($xmlReader) + { + $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); + return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ - protected function getNumColumnsRepeatedForCurrentNode() + protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { - $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); + $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1; } @@ -185,14 +300,21 @@ class RowIterator implements IteratorInterface } /** - * empty() replacement that honours 0 as a valid value + * After finishing processing each cell, a row is considered empty if it contains + * no cells or if the value of the last read cell is an empty string. + * After finishing processing each cell, the last read cell is not part of the + * row data yet (as we still need to apply the "num-columns-repeated" attribute). * - * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value - * @return bool + * @param array $rowData + * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell + * @return bool Whether the row is empty */ - protected function isEmptyCellValue($value) + protected function isEmptyRow($rowData, $lastReadCellValue) { - return (!isset($value) || trim($value) === ''); + return ( + count($rowData) === 0 && + (!isset($lastReadCellValue) || trim($lastReadCellValue) === '') + ); } /** @@ -214,7 +336,7 @@ class RowIterator implements IteratorInterface */ public function key() { - return $this->numReadRows; + return $this->lastRowIndexProcessed; } diff --git a/lib/spout/src/Spout/Reader/ODS/Sheet.php b/lib/spout/src/Spout/Reader/ODS/Sheet.php index 98d00b1424f..794ad3a3aea 100644 --- a/lib/spout/src/Spout/Reader/ODS/Sheet.php +++ b/lib/spout/src/Spout/Reader/ODS/Sheet.php @@ -25,17 +25,22 @@ class Sheet implements SheetInterface /** @var string Name of the sheet */ protected $name; + /** @var bool Whether the sheet was the active one */ + protected $isActive; + /** * @param XMLReader $xmlReader XML Reader, positioned on the "" element - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet + * @param bool $isSheetActive Whether the sheet was defined as active + * @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options */ - public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($xmlReader, $sheetIndex, $sheetName, $isSheetActive, $options) { - $this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates); + $this->rowIterator = new RowIterator($xmlReader, $options); $this->index = $sheetIndex; $this->name = $sheetName; + $this->isActive = $isSheetActive; } /** @@ -64,4 +69,13 @@ class Sheet implements SheetInterface { return $this->name; } + + /** + * @api + * @return bool Whether the sheet was defined as active + */ + public function isActive() + { + return $this->isActive; + } } diff --git a/lib/spout/src/Spout/Reader/ODS/SheetIterator.php b/lib/spout/src/Spout/Reader/ODS/SheetIterator.php index f6cfdbef45e..995c13601a4 100644 --- a/lib/spout/src/Spout/Reader/ODS/SheetIterator.php +++ b/lib/spout/src/Spout/Reader/ODS/SheetIterator.php @@ -5,6 +5,7 @@ namespace Box\Spout\Reader\ODS; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; +use Box\Spout\Reader\ODS\Helper\SettingsHelper; use Box\Spout\Reader\Wrapper\XMLReader; /** @@ -24,8 +25,8 @@ class SheetIterator implements IteratorInterface /** @var string $filePath Path of the file to be read */ protected $filePath; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates; + /** @var \Box\Spout\Reader\ODS\ReaderOptions Reader's current options */ + protected $options; /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; @@ -39,19 +40,25 @@ class SheetIterator implements IteratorInterface /** @var int The index of the sheet being read (zero-based) */ protected $currentSheetIndex; + /** @var string The name of the sheet that was defined as active */ + protected $activeSheetName; + /** * @param string $filePath Path of the file to be read - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings + * @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $shouldFormatDates) + public function __construct($filePath, $options) { $this->filePath = $filePath; - $this->shouldFormatDates = $shouldFormatDates; + $this->options = $options; $this->xmlReader = new XMLReader(); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->escaper = \Box\Spout\Common\Escaper\ODS::getInstance(); + + $settingsHelper = new SettingsHelper(); + $this->activeSheetName = $settingsHelper->getActiveSheetName($filePath); } /** @@ -83,7 +90,7 @@ class SheetIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { @@ -115,8 +122,27 @@ class SheetIterator implements IteratorInterface { $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); + $isActiveSheet = $this->isActiveSheet($sheetName, $this->currentSheetIndex, $this->activeSheetName); - return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex); + return new Sheet($this->xmlReader, $this->currentSheetIndex, $sheetName, $isActiveSheet, $this->options); + } + + /** + * Returns whether the current sheet was defined as the active one + * + * @param string $sheetName Name of the current sheet + * @param int $sheetIndex Index of the current sheet + * @param string|null Name of the sheet that was defined as active or NULL if none defined + * @return bool Whether the current sheet was defined as the active one + */ + private function isActiveSheet($sheetName, $sheetIndex, $activeSheetName) + { + // The given sheet is active if its name matches the defined active sheet's name + // or if no information about the active sheet was found, it defaults to the first sheet. + return ( + ($activeSheetName === null && $sheetIndex === 0) || + ($activeSheetName === $sheetName) + ); } /** diff --git a/lib/spout/src/Spout/Reader/Wrapper/SimpleXMLElement.php b/lib/spout/src/Spout/Reader/Wrapper/SimpleXMLElement.php deleted file mode 100644 index 2bd836df7dd..00000000000 --- a/lib/spout/src/Spout/Reader/Wrapper/SimpleXMLElement.php +++ /dev/null @@ -1,175 +0,0 @@ -useXMLInternalErrors(); - - try { - $this->simpleXMLElement = new \SimpleXMLElement($xmlData); - } catch (\Exception $exception) { - // if the data is invalid, the constructor will throw an Exception - $this->resetXMLInternalErrorsSetting(); - throw new XMLProcessingException($this->getLastXMLErrorMessage()); - } - - $this->resetXMLInternalErrorsSetting(); - } - - /** - * Returns the attribute for the given name. - * - * @param string $name Attribute name - * @param string|null|void $namespace An optional namespace for the retrieved attributes - * @return string|null The attribute value or NULL if attribute not found - */ - public function getAttribute($name, $namespace = null) - { - $isPrefix = ($namespace !== null); - $attributes = $this->simpleXMLElement->attributes($namespace, $isPrefix); - $attributeValue = $attributes->{$name}; - - return ($attributeValue !== null) ? (string) $attributeValue : null; - } - - /** - * Creates a prefix/ns context for the next XPath query - * @see \SimpleXMLElement::registerXPathNamespace - * - * @param string $prefix The namespace prefix to use in the XPath query for the namespace given in "namespace". - * @param string $namespace The namespace to use for the XPath query. This must match a namespace in - * use by the XML document or the XPath query using "prefix" will not return any results. - * @return bool TRUE on success or FALSE on failure. - */ - public function registerXPathNamespace($prefix, $namespace) - { - return $this->simpleXMLElement->registerXPathNamespace($prefix, $namespace); - } - - /** - * Runs XPath query on XML data - * @see \SimpleXMLElement::xpath - * - * @param string $path An XPath path - * @return SimpleXMLElement[]|bool an array of SimpleXMLElement objects or FALSE in case of an error. - */ - public function xpath($path) - { - $elements = $this->simpleXMLElement->xpath($path); - - if ($elements !== false) { - $wrappedElements = []; - foreach ($elements as $element) { - $wrappedElement = $this->wrapSimpleXMLElement($element); - - if ($wrappedElement !== null) { - $wrappedElements[] = $this->wrapSimpleXMLElement($element); - } - } - - $elements = $wrappedElements; - } - - return $elements; - } - - /** - * Wraps the given element into an instance of the wrapper - * - * @param \SimpleXMLElement $element Element to be wrapped - * @return SimpleXMLElement|null The wrapped element or NULL if the given element is invalid - */ - protected function wrapSimpleXMLElement(\SimpleXMLElement $element) - { - $wrappedElement = null; - $elementAsXML = $element->asXML(); - - if ($elementAsXML !== false) { - $wrappedElement = new SimpleXMLElement($elementAsXML); - } - - return $wrappedElement; - } - - /** - * Remove all nodes matching the given XPath query. - * It does not map to any \SimpleXMLElement function. - * - * @param string $path An XPath path - * @return void - */ - public function removeNodesMatchingXPath($path) - { - $nodesToRemove = $this->simpleXMLElement->xpath($path); - - foreach ($nodesToRemove as $nodeToRemove) { - unset($nodeToRemove[0]); - } - } - - /** - * Returns the first child matching the given tag name - * - * @param string $tagName - * @return SimpleXMLElement|null The first child matching the tag name or NULL if none found - */ - public function getFirstChildByTagName($tagName) - { - $doesElementExist = isset($this->simpleXMLElement->{$tagName}); - - /** @var \SimpleXMLElement $realElement */ - $realElement = $this->simpleXMLElement->{$tagName}; - - return $doesElementExist ? $this->wrapSimpleXMLElement($realElement) : null; - } - - /** - * Returns the immediate children. - * - * @return array The children - */ - public function children() - { - $children = []; - - foreach ($this->simpleXMLElement->children() as $child) { - $children[] = $this->wrapSimpleXMLElement($child); - } - - return $children; - } - - /** - * @return string - */ - public function __toString() - { - return $this->simpleXMLElement->__toString(); - } -} diff --git a/lib/spout/src/Spout/Reader/Wrapper/XMLReader.php b/lib/spout/src/Spout/Reader/Wrapper/XMLReader.php index c979819996d..08e99fc656e 100644 --- a/lib/spout/src/Spout/Reader/Wrapper/XMLReader.php +++ b/lib/spout/src/Spout/Reader/Wrapper/XMLReader.php @@ -1,6 +1,7 @@ getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath); - // HHVM does not check if file exists within zip file - // @link https://github.com/facebook/hhvm/issues/5779 - if ($this->isRunningHHVM()) { - if ($this->fileExistsWithinZip($realPathURI)) { - $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET); - } - } else { + // We need to check first that the file we are trying to read really exist because: + // - PHP emits a warning when trying to open a file that does not exist. + // - HHVM does not check if file exists within zip file (@link https://github.com/facebook/hhvm/issues/5779) + if ($this->fileExistsWithinZip($realPathURI)) { $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET); } @@ -54,16 +52,6 @@ class XMLReader extends \XMLReader return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath); } - /** - * Returns whether the current environment is HHVM - * - * @return bool TRUE if running on HHVM, FALSE otherwise - */ - protected function isRunningHHVM() - { - return defined('HHVM_VERSION'); - } - /** * Returns whether the file at the given location exists * @@ -176,4 +164,12 @@ class XMLReader extends \XMLReader return ($this->nodeType === $nodeType && $currentNodeName === $nodeName); } + + /** + * @return string The name of the current node, un-prefixed + */ + public function getCurrentNodeName() + { + return $this->localName; + } } diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/CellHelper.php b/lib/spout/src/Spout/Reader/XLSX/Helper/CellHelper.php index c2f21f7fdf1..60778394197 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/CellHelper.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/CellHelper.php @@ -31,6 +31,9 @@ class CellHelper */ public static function fillMissingArrayIndexes($dataArray, $fillValue = '') { + if (empty($dataArray)) { + return []; + } $existingIndexes = array_keys($dataArray); $newIndexes = array_fill_keys(range(0, max($existingIndexes)), $fillValue); diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php b/lib/spout/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php index 2a9d3985679..b4c625689c7 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/CellValueFormatter.php @@ -228,8 +228,8 @@ class CellValueFormatter $dateObj->setTime($hours, $minutes, $seconds); if ($this->shouldFormatDates) { - $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId); - $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat); + $styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId); + $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); return $dateObj->format($phpDateFormat); } else { return $dateObj; @@ -257,8 +257,8 @@ class CellValueFormatter $dateObj->modify('+' . $secondsRemainder . 'seconds'); if ($this->shouldFormatDates) { - $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId); - $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat); + $styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId); + $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); return $dateObj->format($phpDateFormat); } else { return $dateObj; diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php b/lib/spout/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php index 4acbef76d7e..9dba4c685ae 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/DateFormatHelper.php @@ -62,7 +62,9 @@ class DateFormatHelper public static function toPHPDateFormat($excelDateFormat) { // Remove brackets potentially present at the beginning of the format string - $dateFormat = preg_replace('/^(\[\$[^\]]+?\])/i', '', $excelDateFormat); + // and text portion of the format at the end of it (starting with ";") + // See §18.8.31 of ECMA-376 for more detail. + $dateFormat = preg_replace('/^(?:\[\$[^\]]+?\])?([^;]*).*/', '$1', $excelDateFormat); // Double quotes are used to escape characters that must not be interpreted. // For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y" diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php b/lib/spout/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php index 0f41e900253..fc04c794dc9 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/SharedStringsHelper.php @@ -4,7 +4,6 @@ namespace Box\Spout\Reader\XLSX\Helper; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\XMLProcessingException; -use Box\Spout\Reader\Wrapper\SimpleXMLElement; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface; @@ -23,6 +22,18 @@ class SharedStringsHelper /** Main namespace for the sharedStrings.xml file */ const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; + /** Definition of XML nodes names used to parse data */ + const XML_NODE_SST = 'sst'; + const XML_NODE_SI = 'si'; + const XML_NODE_R = 'r'; + const XML_NODE_T = 't'; + + /** Definition of XML attributes used to parse data */ + const XML_ATTRIBUTE_COUNT = 'count'; + const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount'; + const XML_ATTRIBUTE_XML_SPACE = 'xml:space'; + const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve'; + /** @var string Path of the XLSX file being read */ protected $filePath; @@ -34,7 +45,7 @@ class SharedStringsHelper /** * @param string $filePath Path of the XLSX file being read - * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored + * @param string|null|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored */ public function __construct($filePath, $tempFolder = null) { @@ -69,8 +80,6 @@ class SharedStringsHelper * * The XML file can be really big with sheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. - * Please note that SimpleXML does not provide such a functionality but since it is faster - * and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose. * * @return void * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read @@ -79,11 +88,8 @@ class SharedStringsHelper { $xmlReader = new XMLReader(); $sharedStringIndex = 0; - /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ - $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); - $sharedStringsFilePath = $this->getSharedStringsFilePath(); - if ($xmlReader->open($sharedStringsFilePath) === false) { + if ($xmlReader->openFileInZip($this->filePath, self::SHARED_STRINGS_XML_FILE_PATH) === false) { throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".'); } @@ -91,58 +97,25 @@ class SharedStringsHelper $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); - $xmlReader->readUntilNodeFound('si'); - - while ($xmlReader->name === 'si') { - $node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader); - $node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML); - - // removes nodes that should not be read, like the pronunciation of the Kanji characters - $cleanNode = $this->removeSuperfluousTextNodes($node); - - // find all text nodes "t"; there can be multiple if the cell contains formatting - $textNodes = $cleanNode->xpath('//ns:t'); - - $textValue = ''; - foreach ($textNodes as $nodeIndex => $textNode) { - if ($nodeIndex !== 0) { - // add a space between each "t" node - $textValue .= ' '; - } - - if ($this->shouldPreserveWhitespace($textNode)) { - $textValue .= $textNode->__toString(); - } else { - $textValue .= trim($textNode->__toString()); - } - } - - $unescapedTextValue = $escaper->unescape($textValue); - $this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex); + $xmlReader->readUntilNodeFound(self::XML_NODE_SI); + while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SI) { + $this->processSharedStringsItem($xmlReader, $sharedStringIndex); $sharedStringIndex++; - // jump to the next 'si' tag - $xmlReader->next('si'); + // jump to the next '' tag + $xmlReader->next(self::XML_NODE_SI); } + $this->cachingStrategy->closeCache(); + } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } - $this->cachingStrategy->closeCache(); - $xmlReader->close(); } - /** - * @return string The path to the shared strings XML file - */ - protected function getSharedStringsFilePath() - { - return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH; - } - /** * Returns the shared strings unique count, as specified in tag. * @@ -152,19 +125,19 @@ class SharedStringsHelper */ protected function getSharedStringsUniqueCount($xmlReader) { - $xmlReader->next('sst'); + $xmlReader->next(self::XML_NODE_SST); // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) - while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) { + while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) { $xmlReader->read(); } - $uniqueCount = $xmlReader->getAttribute('uniqueCount'); + $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_UNIQUE_COUNT); // some software do not add the "uniqueCount" attribute but only use the "count" one // @see https://github.com/box/spout/issues/254 if ($uniqueCount === null) { - $uniqueCount = $xmlReader->getAttribute('count'); + $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT); } return ($uniqueCount !== null) ? intval($uniqueCount) : null; @@ -183,58 +156,56 @@ class SharedStringsHelper } /** - * Returns a SimpleXMLElement node from the current node in the given XMLReader instance. - * This is to simplify the parsing of the subtree. + * Processes the shared strings item XML node which the given XML reader is positioned on. * - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader - * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement - * @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "" node + * @param int $sharedStringIndex Index of the processed shared strings item + * @return void */ - protected function getSimpleXmlElementNodeFromXMLReader($xmlReader) + protected function processSharedStringsItem($xmlReader, $sharedStringIndex) { - $node = null; - try { - $node = new SimpleXMLElement($xmlReader->readOuterXml()); - } catch (XMLProcessingException $exception) { - throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}]."); + $sharedStringValue = ''; + + // NOTE: expand() will automatically decode all XML entities of the child nodes + $siNode = $xmlReader->expand(); + $textNodes = $siNode->getElementsByTagName(self::XML_NODE_T); + + foreach ($textNodes as $textNode) { + if ($this->shouldExtractTextNodeValue($textNode)) { + $textNodeValue = $textNode->nodeValue; + $shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode); + + $sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : trim($textNodeValue); + } } - return $node; + $this->cachingStrategy->addStringForIndex($sharedStringValue, $sharedStringIndex); } /** - * Removes nodes that should not be read, like the pronunciation of the Kanji characters. - * By keeping them, their text content would be added to the read string. + * Not all text nodes' values must be extracted. + * Some text nodes are part of a node describing the pronunciation for instance. + * We'll only consider the nodes whose parents are "" or "". * - * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove - * @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node + * @param \DOMElement $textNode Text node to check + * @return bool Whether the given text node's value must be extracted */ - protected function removeSuperfluousTextNodes($parentNode) + protected function shouldExtractTextNodeValue($textNode) { - $tagsToRemove = [ - 'rPh', // Pronunciation of the text - 'pPr', // Paragraph Properties / Previous Paragraph Properties - 'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark - ]; - - foreach ($tagsToRemove as $tagToRemove) { - $xpath = '//ns:' . $tagToRemove; - $parentNode->removeNodesMatchingXPath($xpath); - } - - return $parentNode; + $parentTagName = $textNode->parentNode->localName; + return ($parentTagName === self::XML_NODE_SI || $parentTagName === self::XML_NODE_R); } /** * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. * - * @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element () whitespace may be preserved + * @param \DOMElement $textNode The text node element () whose whitespace may be preserved * @return bool Whether whitespace should be preserved */ protected function shouldPreserveWhitespace($textNode) { - $spaceValue = $textNode->getAttribute('space', 'xml'); - return ($spaceValue === 'preserve'); + $spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE); + return ($spaceValue === self::XML_ATTRIBUTE_VALUE_PRESERVE); } /** diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/SheetHelper.php b/lib/spout/src/Spout/Reader/XLSX/Helper/SheetHelper.php index a6ff909aad3..b74ba01bc36 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/SheetHelper.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/SheetHelper.php @@ -17,30 +17,43 @@ class SheetHelper const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml'; + /** Definition of XML node names used to parse data */ + const XML_NODE_WORKBOOK_VIEW = 'workbookView'; + const XML_NODE_SHEET = 'sheet'; + const XML_NODE_SHEETS = 'sheets'; + const XML_NODE_RELATIONSHIP = 'Relationship'; + + /** Definition of XML attributes used to parse data */ + const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab'; + const XML_ATTRIBUTE_R_ID = 'r:id'; + const XML_ATTRIBUTE_NAME = 'name'; + const XML_ATTRIBUTE_ID = 'Id'; + const XML_ATTRIBUTE_TARGET = 'Target'; + /** @var string Path of the XLSX file being read */ protected $filePath; + /** @var \Box\Spout\Reader\XLSX\ReaderOptions Reader's current options */ + protected $options; + /** @var \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings */ protected $sharedStringsHelper; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; - /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ - protected $shouldFormatDates; - /** * @param string $filePath Path of the XLSX file being read + * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper) { $this->filePath = $filePath; + $this->options = $options; $this->sharedStringsHelper = $sharedStringsHelper; $this->globalFunctionsHelper = $globalFunctionsHelper; - $this->shouldFormatDates = $shouldFormatDates; } /** @@ -53,14 +66,20 @@ class SheetHelper { $sheets = []; $sheetIndex = 0; + $activeSheetIndex = 0; // By default, the first sheet is active $xmlReader = new XMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) { while ($xmlReader->read()) { - if ($xmlReader->isPositionedOnStartingNode('sheet')) { - $sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex); + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_WORKBOOK_VIEW)) { + // The "workbookView" node is located before "sheet" nodes, ensuring that + // the active sheet is known before parsing sheets data. + $activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB); + } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_SHEET)) { + $isSheetActive = ($sheetIndex === $activeSheetIndex); + $sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex, $isSheetActive); $sheetIndex++; - } else if ($xmlReader->isPositionedOnEndingNode('sheets')) { + } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_SHEETS)) { // stop reading once all sheets have been read break; } @@ -79,12 +98,13 @@ class SheetHelper * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml" * @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based) + * @param bool $isSheetActive Whether this sheet was defined as active * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ - protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased) + protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased, $isSheetActive) { - $sheetId = $xmlReaderOnSheetNode->getAttribute('r:id'); - $escapedSheetName = $xmlReaderOnSheetNode->getAttribute('name'); + $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID); + $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME); /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $escaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); @@ -92,7 +112,11 @@ class SheetHelper $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); - return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName); + return new Sheet( + $this->filePath, $sheetDataXMLFilePath, + $sheetIndexZeroBased, $sheetName, $isSheetActive, + $this->options, $this->sharedStringsHelper + ); } /** @@ -107,13 +131,13 @@ class SheetHelper $xmlReader = new XMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) { while ($xmlReader->read()) { - if ($xmlReader->isPositionedOnStartingNode('Relationship')) { - $relationshipSheetId = $xmlReader->getAttribute('Id'); + if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) { + $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID); if ($relationshipSheetId === $sheetId) { // In workbook.xml.rels, it is only "worksheets/sheet1.xml" // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" - $sheetDataXMLFilePath = $xmlReader->getAttribute('Target'); + $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET); // sometimes, the sheet data file path already contains "/xl/"... if (strpos($sheetDataXMLFilePath, '/xl/') !== 0) { diff --git a/lib/spout/src/Spout/Reader/XLSX/Helper/StyleHelper.php b/lib/spout/src/Spout/Reader/XLSX/Helper/StyleHelper.php index 85278bf41a3..000adab8877 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Helper/StyleHelper.php +++ b/lib/spout/src/Spout/Reader/XLSX/Helper/StyleHelper.php @@ -29,6 +29,8 @@ class StyleHelper /** By convention, default style ID is 0 */ const DEFAULT_STYLE_ID = 0; + const NUMBER_FORMAT_GENERAL = 'General'; + /** * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx * @var array Mapping between built-in numFmtId and the associated format - for dates only @@ -51,18 +53,48 @@ class StyleHelper /** @var string Path of the XLSX file being read */ protected $filePath; + /** @var array Array containing the IDs of built-in number formats indicating a date */ + protected $builtinNumFmtIdIndicatingDates; + /** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */ protected $customNumberFormats; /** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */ protected $stylesAttributes; + /** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */ + protected $numFmtIdToIsDateFormatCache = []; + /** * @param string $filePath Path of the XLSX file being read */ public function __construct($filePath) { $this->filePath = $filePath; + $this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping); + } + + /** + * Returns whether the style with the given ID should consider + * numeric values as timestamps and format the cell as a date. + * + * @param int $styleId Zero-based style ID + * @return bool Whether the cell with the given cell should display a date instead of a numeric value + */ + public function shouldFormatNumericValueAsDate($styleId) + { + $stylesAttributes = $this->getStylesAttributes(); + + // Default style (0) does not format numeric values as timestamps. Only custom styles do. + // Also if the style ID does not exist in the styles.xml file, format as numeric value. + // Using isset here because it is way faster than array_key_exists... + if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) { + return false; + } + + $styleAttributes = $stylesAttributes[$styleId]; + + return $this->doesStyleIndicateDate($styleAttributes); } /** @@ -125,9 +157,15 @@ class StyleHelper { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) { + $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID); + $normalizedNumFmtId = ($numFmtId !== null) ? intval($numFmtId) : null; + + $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT); + $normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? !!$applyNumberFormat : null; + $this->stylesAttributes[] = [ - self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)), - self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)), + self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId, + self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat, ]; } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) { // Once done reading "cellXfs" node's children @@ -161,86 +199,92 @@ class StyleHelper } /** - * Returns whether the style with the given ID should consider - * numeric values as timestamps and format the cell as a date. - * - * @param int $styleId Zero-based style ID - * @return bool Whether the cell with the given cell should display a date instead of a numeric value + * @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId") + * @return bool Whether the style with the given attributes indicates that the number is a date */ - public function shouldFormatNumericValueAsDate($styleId) + protected function doesStyleIndicateDate($styleAttributes) { - $stylesAttributes = $this->getStylesAttributes(); - - // Default style (0) does not format numeric values as timestamps. Only custom styles do. - // Also if the style ID does not exist in the styles.xml file, format as numeric value. - // Using isset here because it is way faster than array_key_exists... - if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) { - return false; - } - - $styleAttributes = $stylesAttributes[$styleId]; - $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT]; - if (!$applyNumberFormat) { + $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; + + // A style may apply a date format if it has: + // - "applyNumberFormat" attribute not set to "false" + // - "numFmtId" attribute set + // This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format, + // but this is not necessarily a date. + if ($applyNumberFormat === false || $numFmtId === null) { return false; } - $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; return $this->doesNumFmtIdIndicateDate($numFmtId); } /** + * Returns whether the number format ID indicates that the number is a date. + * The result is cached to avoid recomputing the same thing over and over, as + * "numFmtId" attributes can be shared between multiple styles. + * * @param int $numFmtId - * @return bool Whether the number format ID indicates that the number is a timestamp + * @return bool Whether the number format ID indicates that the number is a date */ protected function doesNumFmtIdIndicateDate($numFmtId) { - return ( - !$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) && - ( + if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) { + $formatCode = $this->getFormatCodeForNumFmtId($numFmtId); + + $this->numFmtIdToIsDateFormatCache[$numFmtId] = ( $this->isNumFmtIdBuiltInDateFormat($numFmtId) || - $this->isNumFmtIdCustomDateFormat($numFmtId) - ) - ); + $this->isFormatCodeCustomDateFormat($formatCode) + ); + } + + return $this->numFmtIdToIsDateFormatCache[$numFmtId]; } /** * @param int $numFmtId - * @return bool Whether the number format ID indicates the "General" format (0 by convention) + * @return string|null The custom number format or NULL if none defined for the given numFmtId */ - protected function doesNumFmtIdIndicateGeneralFormat($numFmtId) - { - return ($numFmtId === 0); - } - - /** - * @param int $numFmtId - * @return bool Whether the number format ID indicates that the number is a timestamp - */ - protected function isNumFmtIdBuiltInDateFormat($numFmtId) - { - $builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping); - return in_array($numFmtId, $builtInDateFormatIds); - } - - /** - * @param int $numFmtId - * @return bool Whether the number format ID indicates that the number is a timestamp - */ - protected function isNumFmtIdCustomDateFormat($numFmtId) + protected function getFormatCodeForNumFmtId($numFmtId) { $customNumberFormats = $this->getCustomNumberFormats(); // Using isset here because it is way faster than array_key_exists... - if (!isset($customNumberFormats[$numFmtId])) { + return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null; + } + + /** + * @param int $numFmtId + * @return bool Whether the number format ID indicates that the number is a date + */ + protected function isNumFmtIdBuiltInDateFormat($numFmtId) + { + return in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates); + } + + /** + * @param string|null $formatCode + * @return bool Whether the given format code indicates that the number is a date + */ + protected function isFormatCodeCustomDateFormat($formatCode) + { + // if no associated format code or if using the default "General" format + if ($formatCode === null || strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) { return false; } - $customNumberFormat = $customNumberFormats[$numFmtId]; + return $this->isFormatCodeMatchingDateFormatPattern($formatCode); + } + /** + * @param string $formatCode + * @return bool Whether the given format code matches a date format pattern + */ + protected function isFormatCodeMatchingDateFormatPattern($formatCode) + { // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\") $pattern = '((?getStylesAttributes(); $styleAttributes = $stylesAttributes[$styleId]; $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) { - $numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId]; + $numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId]; } else { $customNumberFormats = $this->getCustomNumberFormats(); - $numberFormat = $customNumberFormats[$numFmtId]; + $numberFormatCode = $customNumberFormats[$numFmtId]; } - return $numberFormat; + return $numberFormatCode; } } diff --git a/lib/spout/src/Spout/Reader/XLSX/Reader.php b/lib/spout/src/Spout/Reader/XLSX/Reader.php index bcf02cca97a..76e8e325cf1 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Reader.php +++ b/lib/spout/src/Spout/Reader/XLSX/Reader.php @@ -14,9 +14,6 @@ use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper; */ class Reader extends AbstractReader { - /** @var string Temporary folder where the temporary files will be created */ - protected $tempFolder; - /** @var \ZipArchive */ protected $zip; @@ -27,13 +24,26 @@ class Reader extends AbstractReader protected $sheetIterator; + /** + * Returns the reader's current options + * + * @return ReaderOptions + */ + protected function getOptions() + { + if (!isset($this->options)) { + $this->options = new ReaderOptions(); + } + return $this->options; + } + /** * @param string $tempFolder Temporary folder where the temporary files will be created * @return Reader */ public function setTempFolder($tempFolder) { - $this->tempFolder = $tempFolder; + $this->getOptions()->setTempFolder($tempFolder); return $this; } @@ -62,14 +72,14 @@ class Reader extends AbstractReader $this->zip = new \ZipArchive(); if ($this->zip->open($filePath) === true) { - $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder); + $this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->getOptions()->getTempFolder()); if ($this->sharedStringsHelper->hasSharedStrings()) { // Extracts all the strings from the sheets for easy access in the future $this->sharedStringsHelper->extractSharedStrings(); } - $this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates); + $this->sheetIterator = new SheetIterator($filePath, $this->getOptions(), $this->sharedStringsHelper, $this->globalFunctionsHelper); } else { throw new IOException("Could not open $filePath for reading."); } @@ -80,7 +90,7 @@ class Reader extends AbstractReader * * @return SheetIterator To iterate over sheets */ - public function getConcreteSheetIterator() + protected function getConcreteSheetIterator() { return $this->sheetIterator; } diff --git a/lib/spout/src/Spout/Reader/XLSX/ReaderOptions.php b/lib/spout/src/Spout/Reader/XLSX/ReaderOptions.php new file mode 100644 index 00000000000..5f78c5d280a --- /dev/null +++ b/lib/spout/src/Spout/Reader/XLSX/ReaderOptions.php @@ -0,0 +1,33 @@ +tempFolder; + } + + /** + * @param string|null $tempFolder Temporary folder where the temporary files will be created + * @return ReaderOptions + */ + public function setTempFolder($tempFolder) + { + $this->tempFolder = $tempFolder; + return $this; + } +} diff --git a/lib/spout/src/Spout/Reader/XLSX/RowIterator.php b/lib/spout/src/Spout/Reader/XLSX/RowIterator.php index a7c70e69725..e70c6174337 100644 --- a/lib/spout/src/Spout/Reader/XLSX/RowIterator.php +++ b/lib/spout/src/Spout/Reader/XLSX/RowIterator.php @@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Helper\CellHelper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; use Box\Spout\Reader\XLSX\Helper\StyleHelper; +use Box\Spout\Reader\Common\XMLProcessor; /** * Class RowIterator @@ -26,6 +27,7 @@ class RowIterator implements IteratorInterface /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_REF = 'ref'; const XML_ATTRIBUTE_SPANS = 'spans'; + const XML_ATTRIBUTE_ROW_INDEX = 'r'; const XML_ATTRIBUTE_CELL_INDEX = 'r'; /** @var string Path of the XLSX file being read */ @@ -37,15 +39,24 @@ class RowIterator implements IteratorInterface /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; + /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ + protected $xmlProcessor; + /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ protected $styleHelper; - /** @var int Number of read rows */ + /** + * TODO: This variable can be deleted when row indices get preserved + * @var int Number of read rows + */ protected $numReadRows = 0; + /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ + protected $currentlyProcessedRowData = []; + /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ protected $rowDataBuffer = null; @@ -55,16 +66,25 @@ class RowIterator implements IteratorInterface /** @var int The number of columns the sheet has (0 meaning undefined) */ protected $numColumns = 0; + /** @var bool Whether empty rows should be returned or skipped */ + protected $shouldPreserveEmptyRows; + + /** @var int Last row index processed (one-based) */ + protected $lastRowIndexProcessed = 0; + + /** @var int Row index to be processed next (one-based) */ + protected $nextRowIndexToBeProcessed = 0; + /** @var int Last column index processed (zero-based) */ protected $lastColumnIndexProcessed = -1; /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml + * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates) + public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); @@ -72,7 +92,17 @@ class RowIterator implements IteratorInterface $this->xmlReader = new XMLReader(); $this->styleHelper = new StyleHelper($filePath); - $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates); + $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates()); + + $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); + + // Register all callbacks to process different nodes when reading the XML file + $this->xmlProcessor = new XMLProcessor($this->xmlReader); + $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); + $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); } /** @@ -98,12 +128,13 @@ class RowIterator implements IteratorInterface { $this->xmlReader->close(); - $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath; - if ($this->xmlReader->open($sheetDataFilePath) === false) { + if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) { throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\"."); } $this->numReadRows = 0; + $this->lastRowIndexProcessed = 0; + $this->nextRowIndexToBeProcessed = 0; $this->rowDataBuffer = null; $this->hasReachedEndOfFile = false; $this->numColumns = 0; @@ -115,7 +146,7 @@ class RowIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { @@ -123,7 +154,7 @@ class RowIterator implements IteratorInterface } /** - * Move forward to next element. Empty rows will be skipped. + * Move forward to next element. Reads data describing the next unprocessed row. * @link http://php.net/manual/en/iterator.next.php * * @return void @@ -132,69 +163,165 @@ class RowIterator implements IteratorInterface */ public function next() { - $rowData = []; + $this->nextRowIndexToBeProcessed++; + + if ($this->doesNeedDataForNextRowToBeProcessed()) { + $this->readDataForNextRow(); + } + } + + /** + * Returns whether we need data for the next row to be processed. + * We don't need to read data if: + * we have already read at least one row + * AND + * we need to preserve empty rows + * AND + * the last row that was read is not the row that need to be processed + * (i.e. if we need to return empty rows) + * + * @return bool Whether we need data for the next row to be processed. + */ + protected function doesNeedDataForNextRowToBeProcessed() + { + $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); + + return ( + !$hasReadAtLeastOneRow || + !$this->shouldPreserveEmptyRows || + $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed + ); + } + + /** + * @return void + * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found + * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML + */ + protected function readDataForNextRow() + { + $this->currentlyProcessedRowData = []; try { - while ($this->xmlReader->read()) { - if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) { - // Read dimensions of the sheet - $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) - if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) { - $lastCellIndex = $matches[1]; - $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1; - } - - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) { - // Start of the row description - - // Reset index of the last processed column - $this->lastColumnIndexProcessed = -1; - - // Read spans info if present - $numberOfColumnsForRow = $this->numColumns; - $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance - if ($spans) { - list(, $numberOfColumnsForRow) = explode(':', $spans); - $numberOfColumnsForRow = intval($numberOfColumnsForRow); - } - $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; - - } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) { - // Start of a cell description - $currentColumnIndex = $this->getCellIndex($this->xmlReader); - - $node = $this->xmlReader->expand(); - $rowData[$currentColumnIndex] = $this->getCellValue($node); - - $this->lastColumnIndexProcessed = $currentColumnIndex; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) { - // End of the row description - // If needed, we fill the empty cells - $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData); - $this->numReadRows++; - break; - - } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) { - // The closing "" marks the end of the file - $this->hasReachedEndOfFile = true; - break; - } - } - + $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); } - $this->rowDataBuffer = $rowData; + $this->rowDataBuffer = $this->currentlyProcessedRowData; } /** - * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" tag - * @return int + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int A return code that indicates what action should the processor take next + */ + protected function processDimensionStartingNode($xmlReader) + { + // Read dimensions of the sheet + $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) + if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) { + $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1; + } + + return XMLProcessor::PROCESSING_CONTINUE; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int A return code that indicates what action should the processor take next + */ + protected function processRowStartingNode($xmlReader) + { + // Reset index of the last processed column + $this->lastColumnIndexProcessed = -1; + + // Mark the last processed row as the one currently being read + $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); + + // Read spans info if present + $numberOfColumnsForRow = $this->numColumns; + $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance + if ($spans) { + list(, $numberOfColumnsForRow) = explode(':', $spans); + $numberOfColumnsForRow = intval($numberOfColumnsForRow); + } + + $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; + + return XMLProcessor::PROCESSING_CONTINUE; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" starting node + * @return int A return code that indicates what action should the processor take next + */ + protected function processCellStartingNode($xmlReader) + { + $currentColumnIndex = $this->getColumnIndex($xmlReader); + + // NOTE: expand() will automatically decode all XML entities of the child nodes + $node = $xmlReader->expand(); + $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); + $this->lastColumnIndexProcessed = $currentColumnIndex; + + return XMLProcessor::PROCESSING_CONTINUE; + } + + /** + * @return int A return code that indicates what action should the processor take next + */ + protected function processRowEndingNode() + { + // if the fetched row is empty and we don't want to preserve it.., + if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) { + // ... skip it + return XMLProcessor::PROCESSING_CONTINUE; + } + + $this->numReadRows++; + + // If needed, we fill the empty cells + if ($this->numColumns === 0) { + $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData); + } + + // at this point, we have all the data we need for the row + // so that we can populate the buffer + return XMLProcessor::PROCESSING_STOP; + } + + /** + * @return int A return code that indicates what action should the processor take next + */ + protected function processWorksheetEndingNode() + { + // The closing "" marks the end of the file + $this->hasReachedEndOfFile = true; + + return XMLProcessor::PROCESSING_STOP; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Row index * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid */ - protected function getCellIndex($xmlReader) + protected function getRowIndex($xmlReader) + { + // Get "r" attribute if present (from something like + $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX); + + return ($currentRowIndex !== null) ? + intval($currentRowIndex) : + $this->lastRowIndexProcessed + 1; + } + + /** + * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "" node + * @return int Column index + * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid + */ + protected function getColumnIndex($xmlReader) { // Get "r" attribute if present (from something like $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); @@ -216,25 +343,53 @@ class RowIterator implements IteratorInterface } /** - * Return the current element, from the buffer. + * @param array $rowData + * @return bool Whether the given row is empty + */ + protected function isEmptyRow($rowData) + { + return (count($rowData) === 1 && key($rowData) === ''); + } + + /** + * Return the current element, either an empty row or from the buffer. * @link http://php.net/manual/en/iterator.current.php * * @return array|null */ public function current() { - return $this->rowDataBuffer; + $rowDataForRowToBeProcessed = $this->rowDataBuffer; + + if ($this->shouldPreserveEmptyRows) { + // when we need to preserve empty rows, we will either return + // an empty row or the last row read. This depends whether the + // index of last row that was read matches the index of the last + // row whose value should be returned. + if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { + // return empty row if mismatch between last processed row + // and the row that needs to be returned + $rowDataForRowToBeProcessed = ['']; + } + } + + return $rowDataForRowToBeProcessed; } /** - * Return the key of the current element + * Return the key of the current element. Here, the row index. * @link http://php.net/manual/en/iterator.key.php * * @return int */ public function key() { - return $this->numReadRows; + // TODO: This should return $this->nextRowIndexToBeProcessed + // but to avoid a breaking change, the return value for + // this function has been kept as the number of rows read. + return $this->shouldPreserveEmptyRows ? + $this->nextRowIndexToBeProcessed : + $this->numReadRows; } diff --git a/lib/spout/src/Spout/Reader/XLSX/Sheet.php b/lib/spout/src/Spout/Reader/XLSX/Sheet.php index a1c7d9519d0..9baaef247f1 100644 --- a/lib/spout/src/Spout/Reader/XLSX/Sheet.php +++ b/lib/spout/src/Spout/Reader/XLSX/Sheet.php @@ -21,19 +21,24 @@ class Sheet implements SheetInterface /** @var string Name of the sheet */ protected $name; + /** @var bool Whether the sheet was the active one */ + protected $isActive; + /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml - * @param Helper\SharedStringsHelper Helper to work with shared strings - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet + * @param bool $isSheetActive Whether the sheet was defined as active + * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options + * @param Helper\SharedStringsHelper Helper to work with shared strings */ - public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName) + public function __construct($filePath, $sheetDataXMLFilePath, $sheetIndex, $sheetName, $isSheetActive, $options, $sharedStringsHelper) { - $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates); + $this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper); $this->index = $sheetIndex; $this->name = $sheetName; + $this->isActive = $isSheetActive; } /** @@ -62,4 +67,13 @@ class Sheet implements SheetInterface { return $this->name; } + + /** + * @api + * @return bool Whether the sheet was defined as active + */ + public function isActive() + { + return $this->isActive; + } } diff --git a/lib/spout/src/Spout/Reader/XLSX/SheetIterator.php b/lib/spout/src/Spout/Reader/XLSX/SheetIterator.php index f7a3f599906..7ba07d34286 100644 --- a/lib/spout/src/Spout/Reader/XLSX/SheetIterator.php +++ b/lib/spout/src/Spout/Reader/XLSX/SheetIterator.php @@ -22,15 +22,15 @@ class SheetIterator implements IteratorInterface /** * @param string $filePath Path of the file to be read + * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options * @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper - * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ - public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates) + public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper) { // Fetch all available sheets - $sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates); + $sheetHelper = new SheetHelper($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper); $this->sheets = $sheetHelper->getSheets(); if (count($this->sheets) === 0) { @@ -53,7 +53,7 @@ class SheetIterator implements IteratorInterface * Checks if current position is valid * @link http://php.net/manual/en/iterator.valid.php * - * @return boolean + * @return bool */ public function valid() { diff --git a/lib/spout/src/Spout/Writer/AbstractWriter.php b/lib/spout/src/Spout/Writer/AbstractWriter.php index 49939eee029..83e190f5f35 100644 --- a/lib/spout/src/Spout/Writer/AbstractWriter.php +++ b/lib/spout/src/Spout/Writer/AbstractWriter.php @@ -4,6 +4,8 @@ namespace Box\Spout\Writer; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Exception\InvalidArgumentException; +use Box\Spout\Common\Exception\SpoutException; +use Box\Spout\Common\Helper\FileSystemHelper; use Box\Spout\Writer\Exception\WriterAlreadyOpenedException; use Box\Spout\Writer\Exception\WriterNotOpenedException; use Box\Spout\Writer\Style\StyleBuilder; @@ -199,13 +201,23 @@ abstract class AbstractWriter implements WriterInterface * @return AbstractWriter * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer * @throws \Box\Spout\Common\Exception\IOException If unable to write data + * @throws \Box\Spout\Common\Exception\SpoutException If anything else goes wrong while writing data */ public function addRow(array $dataRow) { if ($this->isWriterOpened) { // empty $dataRow should not add an empty line if (!empty($dataRow)) { - $this->addRowToWriter($dataRow, $this->rowStyle); + try { + $this->addRowToWriter($dataRow, $this->rowStyle); + } catch (SpoutException $e) { + // if an exception occurs while writing data, + // close the writer and remove all files created so far. + $this->closeAndAttemptToCleanupAllFiles(); + + // re-throw the exception to alert developers of the error + throw $e; + } } } else { throw new WriterNotOpenedException('The writer needs to be opened before adding row.'); @@ -338,6 +350,10 @@ abstract class AbstractWriter implements WriterInterface */ public function close() { + if (!$this->isWriterOpened) { + return; + } + $this->closeWriter(); if (is_resource($this->filePointer)) { @@ -346,5 +362,23 @@ abstract class AbstractWriter implements WriterInterface $this->isWriterOpened = false; } -} + /** + * Closes the writer and attempts to cleanup all files that were + * created during the writing process (temp files & final file). + * + * @return void + */ + private function closeAndAttemptToCleanupAllFiles() + { + // close the writer, which should remove all temp files + $this->close(); + + // remove output file if it was created + if ($this->globalFunctionsHelper->file_exists($this->outputFilePath)) { + $outputFolderPath = dirname($this->outputFilePath); + $fileSystemHelper = new FileSystemHelper($outputFolderPath); + $fileSystemHelper->deleteFile($this->outputFilePath); + } + } +} diff --git a/lib/spout/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php b/lib/spout/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php index 7cf0eedda15..fa5e26745ef 100644 --- a/lib/spout/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php +++ b/lib/spout/src/Spout/Writer/Common/Helper/AbstractStyleHelper.php @@ -122,7 +122,7 @@ abstract class AbstractStyleHelper protected function applyWrapTextIfCellContainsNewLine($style, $dataRow) { // if the "wrap text" option is already set, no-op - if ($style->shouldWrapText()) { + if ($style->hasSetWrapText()) { return $style; } diff --git a/lib/spout/src/Spout/Writer/Common/Helper/CellHelper.php b/lib/spout/src/Spout/Writer/Common/Helper/CellHelper.php index b32e9ac0977..50ead93be42 100644 --- a/lib/spout/src/Spout/Writer/Common/Helper/CellHelper.php +++ b/lib/spout/src/Spout/Writer/Common/Helper/CellHelper.php @@ -46,6 +46,15 @@ class CellHelper return self::$columnIndexToCellIndexCache[$originalColumnIndex]; } + /** + * @param $value + * @return bool Whether the given value is considered "empty" + */ + public static function isEmpty($value) + { + return ($value === null || $value === ''); + } + /** * @param $value * @return bool Whether the given value is a non empty string diff --git a/lib/spout/src/Spout/Writer/Common/Internal/AbstractWorkbook.php b/lib/spout/src/Spout/Writer/Common/Internal/AbstractWorkbook.php index c8f9f9fb937..e852e1a84d2 100644 --- a/lib/spout/src/Spout/Writer/Common/Internal/AbstractWorkbook.php +++ b/lib/spout/src/Spout/Writer/Common/Internal/AbstractWorkbook.php @@ -16,6 +16,9 @@ abstract class AbstractWorkbook implements WorkbookInterface /** @var bool Whether new sheets should be automatically created when the max rows limit per sheet is reached */ protected $shouldCreateNewSheetsAutomatically; + /** @var string Timestamp based unique ID identifying the workbook */ + protected $internalId; + /** @var WorksheetInterface[] Array containing the workbook's sheets */ protected $worksheets = []; @@ -30,6 +33,7 @@ abstract class AbstractWorkbook implements WorkbookInterface public function __construct($shouldCreateNewSheetsAutomatically, $defaultRowStyle) { $this->shouldCreateNewSheetsAutomatically = $shouldCreateNewSheetsAutomatically; + $this->internalId = uniqid(); } /** diff --git a/lib/spout/src/Spout/Writer/Common/Sheet.php b/lib/spout/src/Spout/Writer/Common/Sheet.php index f6a966ba51a..0d8c63bf251 100644 --- a/lib/spout/src/Spout/Writer/Common/Sheet.php +++ b/lib/spout/src/Spout/Writer/Common/Sheet.php @@ -7,7 +7,7 @@ use Box\Spout\Writer\Exception\InvalidSheetNameException; /** * Class Sheet - * External representation of a worksheet within a ODS file + * External representation of a worksheet * * @package Box\Spout\Writer\Common */ @@ -21,12 +21,15 @@ class Sheet /** @var array Invalid characters that cannot be contained in the sheet name */ private static $INVALID_CHARACTERS_IN_SHEET_NAME = ['\\', '/', '?', '*', ':', '[', ']']; - /** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */ + /** @var array Associative array [WORKBOOK_ID] => [[SHEET_INDEX] => [SHEET_NAME]] keeping track of sheets' name to enforce uniqueness per workbook */ protected static $SHEETS_NAME_USED = []; /** @var int Index of the sheet, based on order in the workbook (zero-based) */ protected $index; + /** @var string ID of the sheet's associated workbook. Used to restrict sheet name uniqueness enforcement to a single workbook */ + protected $associatedWorkbookId; + /** @var string Name of the sheet */ protected $name; @@ -35,10 +38,16 @@ class Sheet /** * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) + * @param string $associatedWorkbookId ID of the sheet's associated workbook */ - public function __construct($sheetIndex) + public function __construct($sheetIndex, $associatedWorkbookId) { $this->index = $sheetIndex; + $this->associatedWorkbookId = $associatedWorkbookId; + if (!isset(self::$SHEETS_NAME_USED[$associatedWorkbookId])) { + self::$SHEETS_NAME_USED[$associatedWorkbookId] = []; + } + $this->stringHelper = new StringHelper(); $this->setName(self::DEFAULT_SHEET_NAME_PREFIX . ($sheetIndex + 1)); } @@ -75,43 +84,58 @@ class Sheet */ public function setName($name) { - if (!$this->isNameValid($name)) { - $errorMessage = "The sheet's name is invalid. It did not meet at least one of these requirements:\n"; - $errorMessage .= " - It should not be blank\n"; - $errorMessage .= " - It should not exceed 31 characters\n"; - $errorMessage .= " - It should not contain these characters: \\ / ? * : [ or ]\n"; - $errorMessage .= " - It should be unique"; - throw new InvalidSheetNameException($errorMessage); - } + $this->throwIfNameIsInvalid($name); $this->name = $name; - self::$SHEETS_NAME_USED[$this->index] = $name; + self::$SHEETS_NAME_USED[$this->associatedWorkbookId][$this->index] = $name; return $this; } /** - * Returns whether the given sheet's name is valid. + * Throws an exception if the given sheet's name is not valid. * @see Sheet::setName for validity rules. * * @param string $name - * @return bool TRUE if the name is valid, FALSE otherwise. + * @return void + * @throws \Box\Spout\Writer\Exception\InvalidSheetNameException If the sheet's name is invalid. */ - protected function isNameValid($name) + protected function throwIfNameIsInvalid($name) { if (!is_string($name)) { - return false; + $actualType = gettype($name); + $errorMessage = "The sheet's name is invalid. It must be a string ($actualType given)."; + throw new InvalidSheetNameException($errorMessage); } + $failedRequirements = []; $nameLength = $this->stringHelper->getStringLength($name); - return ( - $nameLength > 0 && - $nameLength <= self::MAX_LENGTH_SHEET_NAME && - !$this->doesContainInvalidCharacters($name) && - $this->isNameUnique($name) && - !$this->doesStartOrEndWithSingleQuote($name) - ); + if (!$this->isNameUnique($name)) { + $failedRequirements[] = 'It should be unique'; + } else { + if ($nameLength === 0) { + $failedRequirements[] = 'It should not be blank'; + } else { + if ($nameLength > self::MAX_LENGTH_SHEET_NAME) { + $failedRequirements[] = 'It should not exceed 31 characters'; + } + + if ($this->doesContainInvalidCharacters($name)) { + $failedRequirements[] = 'It should not contain these characters: \\ / ? * : [ or ]'; + } + + if ($this->doesStartOrEndWithSingleQuote($name)) { + $failedRequirements[] = 'It should not start or end with a single quote'; + } + } + } + + if (count($failedRequirements) !== 0) { + $errorMessage = "The sheet's name (\"$name\") is invalid. It did not respect these rules:\n - "; + $errorMessage .= implode("\n - ", $failedRequirements); + throw new InvalidSheetNameException($errorMessage); + } } /** @@ -148,7 +172,7 @@ class Sheet */ protected function isNameUnique($name) { - foreach (self::$SHEETS_NAME_USED as $sheetIndex => $sheetName) { + foreach (self::$SHEETS_NAME_USED[$this->associatedWorkbookId] as $sheetIndex => $sheetName) { if ($sheetIndex !== $this->index && $sheetName === $name) { return false; } diff --git a/lib/spout/src/Spout/Writer/ODS/Helper/FileSystemHelper.php b/lib/spout/src/Spout/Writer/ODS/Helper/FileSystemHelper.php index 06cad41c323..34ace0ad13e 100644 --- a/lib/spout/src/Spout/Writer/ODS/Helper/FileSystemHelper.php +++ b/lib/spout/src/Spout/Writer/ODS/Helper/FileSystemHelper.php @@ -75,7 +75,7 @@ class FileSystemHelper extends \Box\Spout\Common\Helper\FileSystemHelper */ protected function createRootFolder() { - $this->rootFolder = $this->createFolder($this->baseFolderPath, uniqid('ods')); + $this->rootFolder = $this->createFolder($this->baseFolderRealPath, uniqid('ods')); return $this; } diff --git a/lib/spout/src/Spout/Writer/ODS/Helper/StyleHelper.php b/lib/spout/src/Spout/Writer/ODS/Helper/StyleHelper.php index 9a0eeee9fb6..f5ad3bc38e5 100644 --- a/lib/spout/src/Spout/Writer/ODS/Helper/StyleHelper.php +++ b/lib/spout/src/Spout/Writer/ODS/Helper/StyleHelper.php @@ -214,64 +214,143 @@ EOD; */ protected function getStyleSectionContent($style) { - $defaultStyle = $this->getDefaultStyle(); $styleIndex = $style->getId() + 1; // 1-based $content = ''; - if ($style->shouldApplyFont()) { - $content .= 'getFontColor(); - if ($fontColor !== $defaultStyle->getFontColor()) { - $content .= ' fo:color="#' . $fontColor . '"'; - } - - $fontName = $style->getFontName(); - if ($fontName !== $defaultStyle->getFontName()) { - $content .= ' style:font-name="' . $fontName . '" style:font-name-asian="' . $fontName . '" style:font-name-complex="' . $fontName . '"'; - } - - $fontSize = $style->getFontSize(); - if ($fontSize !== $defaultStyle->getFontSize()) { - $content .= ' fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt"'; - } - - if ($style->isFontBold()) { - $content .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; - } - if ($style->isFontItalic()) { - $content .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; - } - if ($style->isFontUnderline()) { - $content .= ' style:text-underline-style="solid" style:text-underline-type="single"'; - } - if ($style->isFontStrikethrough()) { - $content .= ' style:text-line-through-style="solid"'; - } - - $content .= '/>'; - } - - if ($style->shouldWrapText()) { - $content .= ''; - } - - if ($style->shouldApplyBorder()) { - $borderProperty = ''; - $borders = array_map(function (BorderPart $borderPart) { - return BorderHelper::serializeBorderPart($borderPart); - }, $style->getBorder()->getParts()); - $content .= sprintf($borderProperty, implode(' ', $borders)); - } - - if ($style->shouldApplyBackgroundColor()) { - $content .= sprintf(' - ', $style->getBackgroundColor()); - } + $content .= $this->getTextPropertiesSectionContent($style); + $content .= $this->getTableCellPropertiesSectionContent($style); $content .= ''; return $content; } + + /** + * Returns the contents of the "" section, inside "" section + * + * @param \Box\Spout\Writer\Style\Style $style + * @return string + */ + private function getTextPropertiesSectionContent($style) + { + $content = ''; + + if ($style->shouldApplyFont()) { + $content .= $this->getFontSectionContent($style); + } + + return $content; + } + + /** + * Returns the contents of the "" section, inside "" section + * + * @param \Box\Spout\Writer\Style\Style $style + * @return string + */ + private function getFontSectionContent($style) + { + $defaultStyle = $this->getDefaultStyle(); + + $content = 'getFontColor(); + if ($fontColor !== $defaultStyle->getFontColor()) { + $content .= ' fo:color="#' . $fontColor . '"'; + } + + $fontName = $style->getFontName(); + if ($fontName !== $defaultStyle->getFontName()) { + $content .= ' style:font-name="' . $fontName . '" style:font-name-asian="' . $fontName . '" style:font-name-complex="' . $fontName . '"'; + } + + $fontSize = $style->getFontSize(); + if ($fontSize !== $defaultStyle->getFontSize()) { + $content .= ' fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt"'; + } + + if ($style->isFontBold()) { + $content .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"'; + } + if ($style->isFontItalic()) { + $content .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"'; + } + if ($style->isFontUnderline()) { + $content .= ' style:text-underline-style="solid" style:text-underline-type="single"'; + } + if ($style->isFontStrikethrough()) { + $content .= ' style:text-line-through-style="solid"'; + } + + $content .= '/>'; + + return $content; + } + + /** + * Returns the contents of the "" section, inside "" section + * + * @param \Box\Spout\Writer\Style\Style $style + * @return string + */ + private function getTableCellPropertiesSectionContent($style) + { + $content = ''; + + if ($style->shouldWrapText()) { + $content .= $this->getWrapTextXMLContent(); + } + + if ($style->shouldApplyBorder()) { + $content .= $this->getBorderXMLContent($style); + } + + if ($style->shouldApplyBackgroundColor()) { + $content .= $this->getBackgroundColorXMLContent($style); + } + + return $content; + } + + /** + * Returns the contents of the wrap text definition for the "" section + * + * @return string + */ + private function getWrapTextXMLContent() + { + return ''; + } + + /** + * Returns the contents of the borders definition for the "" section + * + * @param \Box\Spout\Writer\Style\Style $style + * @return string + */ + private function getBorderXMLContent($style) + { + $borderProperty = ''; + + $borders = array_map(function (BorderPart $borderPart) { + return BorderHelper::serializeBorderPart($borderPart); + }, $style->getBorder()->getParts()); + + return sprintf($borderProperty, implode(' ', $borders)); + } + + /** + * Returns the contents of the background color definition for the "" section + * + * @param \Box\Spout\Writer\Style\Style $style + * @return string + */ + private function getBackgroundColorXMLContent($style) + { + return sprintf( + '', + $style->getBackgroundColor() + ); + } } diff --git a/lib/spout/src/Spout/Writer/ODS/Internal/Workbook.php b/lib/spout/src/Spout/Writer/ODS/Internal/Workbook.php index ce24f2f62d5..fc64ada8242 100644 --- a/lib/spout/src/Spout/Writer/ODS/Internal/Workbook.php +++ b/lib/spout/src/Spout/Writer/ODS/Internal/Workbook.php @@ -69,7 +69,7 @@ class Workbook extends AbstractWorkbook public function addNewSheet() { $newSheetIndex = count($this->worksheets); - $sheet = new Sheet($newSheetIndex); + $sheet = new Sheet($newSheetIndex, $this->internalId); $sheetsContentTempFolder = $this->fileSystemHelper->getSheetsContentTempFolder(); $worksheet = new Worksheet($sheet, $sheetsContentTempFolder); diff --git a/lib/spout/src/Spout/Writer/ODS/Internal/Worksheet.php b/lib/spout/src/Spout/Writer/ODS/Internal/Worksheet.php index 5d6ce42f8b5..0920b6dbaab 100644 --- a/lib/spout/src/Spout/Writer/ODS/Internal/Worksheet.php +++ b/lib/spout/src/Spout/Writer/ODS/Internal/Worksheet.php @@ -40,7 +40,7 @@ class Worksheet implements WorksheetInterface /** * @param \Box\Spout\Writer\Common\Sheet $externalSheet The associated "external" sheet - * @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored + * @param string $worksheetFilesFolder Temporary folder where the files to create the ODS will be stored * @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing */ public function __construct($externalSheet, $worksheetFilesFolder) diff --git a/lib/spout/src/Spout/Writer/ODS/Writer.php b/lib/spout/src/Spout/Writer/ODS/Writer.php index 4b35dfdd881..6571d0071d2 100644 --- a/lib/spout/src/Spout/Writer/ODS/Writer.php +++ b/lib/spout/src/Spout/Writer/ODS/Writer.php @@ -20,7 +20,7 @@ class Writer extends AbstractMultiSheetsWriter /** @var string Temporary folder where the files to create the ODS will be stored */ protected $tempFolder; - /** @var Internal\Workbook The workbook for the XLSX file */ + /** @var Internal\Workbook The workbook for the ODS file */ protected $book; /** diff --git a/lib/spout/src/Spout/Writer/Style/Style.php b/lib/spout/src/Spout/Writer/Style/Style.php index 45057802607..b408ad3ca11 100644 --- a/lib/spout/src/Spout/Writer/Style/Style.php +++ b/lib/spout/src/Spout/Writer/Style/Style.php @@ -106,6 +106,7 @@ class Style /** * @param Border $border + * @return Style */ public function setBorder(Border $border) { @@ -115,7 +116,7 @@ class Style } /** - * @return boolean + * @return bool */ public function shouldApplyBorder() { @@ -123,7 +124,7 @@ class Style } /** - * @return boolean + * @return bool */ public function isFontBold() { @@ -142,7 +143,7 @@ class Style } /** - * @return boolean + * @return bool */ public function isFontItalic() { @@ -161,7 +162,7 @@ class Style } /** - * @return boolean + * @return bool */ public function isFontUnderline() { @@ -180,7 +181,7 @@ class Style } /** - * @return boolean + * @return bool */ public function isFontStrikethrough() { @@ -261,7 +262,7 @@ class Style } /** - * @return boolean + * @return bool */ public function shouldWrapText() { @@ -269,15 +270,24 @@ class Style } /** + * @param bool|void $shouldWrap Should the text be wrapped * @return Style */ - public function setShouldWrapText() + public function setShouldWrapText($shouldWrap = true) { - $this->shouldWrapText = true; + $this->shouldWrapText = $shouldWrap; $this->hasSetWrapText = true; return $this; } + /** + * @return bool + */ + public function hasSetWrapText() + { + return $this->hasSetWrapText; + } + /** * @return bool Whether specific font properties should be applied */ @@ -350,37 +360,67 @@ class Style { $mergedStyle = clone $this; - if (!$this->hasSetFontBold && $baseStyle->isFontBold()) { - $mergedStyle->setFontBold(); - } - if (!$this->hasSetFontItalic && $baseStyle->isFontItalic()) { - $mergedStyle->setFontItalic(); - } - if (!$this->hasSetFontUnderline && $baseStyle->isFontUnderline()) { - $mergedStyle->setFontUnderline(); - } - if (!$this->hasSetFontStrikethrough && $baseStyle->isFontStrikethrough()) { - $mergedStyle->setFontStrikethrough(); - } - if (!$this->hasSetFontSize && $baseStyle->getFontSize() !== self::DEFAULT_FONT_SIZE) { - $mergedStyle->setFontSize($baseStyle->getFontSize()); - } - if (!$this->hasSetFontColor && $baseStyle->getFontColor() !== self::DEFAULT_FONT_COLOR) { - $mergedStyle->setFontColor($baseStyle->getFontColor()); - } - if (!$this->hasSetFontName && $baseStyle->getFontName() !== self::DEFAULT_FONT_NAME) { - $mergedStyle->setFontName($baseStyle->getFontName()); - } - if (!$this->hasSetWrapText && $baseStyle->shouldWrapText()) { - $mergedStyle->setShouldWrapText(); - } - if (!$this->getBorder() && $baseStyle->shouldApplyBorder()) { - $mergedStyle->setBorder($baseStyle->getBorder()); - } - if (!$this->hasSetBackgroundColor && $baseStyle->shouldApplyBackgroundColor()) { - $mergedStyle->setBackgroundColor($baseStyle->getBackgroundColor()); - } + $this->mergeFontStyles($mergedStyle, $baseStyle); + $this->mergeOtherFontProperties($mergedStyle, $baseStyle); + $this->mergeCellProperties($mergedStyle, $baseStyle); return $mergedStyle; } + + /** + * @param Style $styleToUpdate (passed as reference) + * @param Style $baseStyle + * @return void + */ + private function mergeFontStyles($styleToUpdate, $baseStyle) + { + if (!$this->hasSetFontBold && $baseStyle->isFontBold()) { + $styleToUpdate->setFontBold(); + } + if (!$this->hasSetFontItalic && $baseStyle->isFontItalic()) { + $styleToUpdate->setFontItalic(); + } + if (!$this->hasSetFontUnderline && $baseStyle->isFontUnderline()) { + $styleToUpdate->setFontUnderline(); + } + if (!$this->hasSetFontStrikethrough && $baseStyle->isFontStrikethrough()) { + $styleToUpdate->setFontStrikethrough(); + } + } + + /** + * @param Style $styleToUpdate Style to update (passed as reference) + * @param Style $baseStyle + * @return void + */ + private function mergeOtherFontProperties($styleToUpdate, $baseStyle) + { + if (!$this->hasSetFontSize && $baseStyle->getFontSize() !== self::DEFAULT_FONT_SIZE) { + $styleToUpdate->setFontSize($baseStyle->getFontSize()); + } + if (!$this->hasSetFontColor && $baseStyle->getFontColor() !== self::DEFAULT_FONT_COLOR) { + $styleToUpdate->setFontColor($baseStyle->getFontColor()); + } + if (!$this->hasSetFontName && $baseStyle->getFontName() !== self::DEFAULT_FONT_NAME) { + $styleToUpdate->setFontName($baseStyle->getFontName()); + } + } + + /** + * @param Style $styleToUpdate Style to update (passed as reference) + * @param Style $baseStyle + * @return void + */ + private function mergeCellProperties($styleToUpdate, $baseStyle) + { + if (!$this->hasSetWrapText && $baseStyle->shouldWrapText()) { + $styleToUpdate->setShouldWrapText(); + } + if (!$this->getBorder() && $baseStyle->shouldApplyBorder()) { + $styleToUpdate->setBorder($baseStyle->getBorder()); + } + if (!$this->hasSetBackgroundColor && $baseStyle->shouldApplyBackgroundColor()) { + $styleToUpdate->setBackgroundColor($baseStyle->getBackgroundColor()); + } + } } diff --git a/lib/spout/src/Spout/Writer/Style/StyleBuilder.php b/lib/spout/src/Spout/Writer/Style/StyleBuilder.php index d620de428fa..2676cbe6fc3 100644 --- a/lib/spout/src/Spout/Writer/Style/StyleBuilder.php +++ b/lib/spout/src/Spout/Writer/Style/StyleBuilder.php @@ -109,15 +109,15 @@ class StyleBuilder } /** - * Makes the text wrap in the cell if it's too long or - * on multiple lines. + * Makes the text wrap in the cell if requested * * @api + * @param bool $shouldWrap Should the text be wrapped * @return StyleBuilder */ - public function setShouldWrapText() + public function setShouldWrapText($shouldWrap = true) { - $this->style->setShouldWrapText(); + $this->style->setShouldWrapText($shouldWrap); return $this; } @@ -138,7 +138,7 @@ class StyleBuilder * * @api * @param string $color ARGB color (@see Color) - * @return StyleBuilder + * @return StyleBuilder */ public function setBackgroundColor($color) { diff --git a/lib/spout/src/Spout/Writer/WriterInterface.php b/lib/spout/src/Spout/Writer/WriterInterface.php index e2d9f8d291b..93a6ee24b6c 100644 --- a/lib/spout/src/Spout/Writer/WriterInterface.php +++ b/lib/spout/src/Spout/Writer/WriterInterface.php @@ -35,7 +35,7 @@ interface WriterInterface * @param array $dataRow Array containing data to be streamed. * Example $dataRow = ['data1', 1234, null, '', 'data5']; * @return WriterInterface - * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If the writer has not been opened yetthe writer + * @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If the writer has not been opened yet * @throws \Box\Spout\Common\Exception\IOException If unable to write data */ public function addRow(array $dataRow); diff --git a/lib/spout/src/Spout/Writer/XLSX/Helper/FileSystemHelper.php b/lib/spout/src/Spout/Writer/XLSX/Helper/FileSystemHelper.php index 786e62e4a6d..86515f30c55 100644 --- a/lib/spout/src/Spout/Writer/XLSX/Helper/FileSystemHelper.php +++ b/lib/spout/src/Spout/Writer/XLSX/Helper/FileSystemHelper.php @@ -94,7 +94,7 @@ class FileSystemHelper extends \Box\Spout\Common\Helper\FileSystemHelper */ protected function createRootFolder() { - $this->rootFolder = $this->createFolder($this->baseFolderPath, uniqid('xlsx', true)); + $this->rootFolder = $this->createFolder($this->baseFolderRealPath, uniqid('xlsx', true)); return $this; } diff --git a/lib/spout/src/Spout/Writer/XLSX/Internal/Workbook.php b/lib/spout/src/Spout/Writer/XLSX/Internal/Workbook.php index bdf027fdae9..bcdce7f725b 100644 --- a/lib/spout/src/Spout/Writer/XLSX/Internal/Workbook.php +++ b/lib/spout/src/Spout/Writer/XLSX/Internal/Workbook.php @@ -83,7 +83,7 @@ class Workbook extends AbstractWorkbook public function addNewSheet() { $newSheetIndex = count($this->worksheets); - $sheet = new Sheet($newSheetIndex); + $sheet = new Sheet($newSheetIndex, $this->internalId); $worksheetFilesFolder = $this->fileSystemHelper->getXlWorksheetsFolder(); $worksheet = new Worksheet($sheet, $worksheetFilesFolder, $this->sharedStringsHelper, $this->styleHelper, $this->shouldUseInlineStrings); diff --git a/lib/spout/src/Spout/Writer/XLSX/Internal/Worksheet.php b/lib/spout/src/Spout/Writer/XLSX/Internal/Worksheet.php index 72aa41903ab..0bd909d1845 100644 --- a/lib/spout/src/Spout/Writer/XLSX/Internal/Worksheet.php +++ b/lib/spout/src/Spout/Writer/XLSX/Internal/Worksheet.php @@ -4,6 +4,7 @@ namespace Box\Spout\Writer\XLSX\Internal; use Box\Spout\Common\Exception\InvalidArgumentException; use Box\Spout\Common\Exception\IOException; +use Box\Spout\Common\Helper\StringHelper; use Box\Spout\Writer\Common\Helper\CellHelper; use Box\Spout\Writer\Common\Internal\WorksheetInterface; @@ -16,6 +17,14 @@ use Box\Spout\Writer\Common\Internal\WorksheetInterface; */ class Worksheet implements WorksheetInterface { + /** + * Maximum number of characters a cell can contain + * @see https://support.office.com/en-us/article/Excel-specifications-and-limits-16c69c74-3d6a-4aaf-ba35-e6eb276e8eaa [Excel 2007] + * @see https://support.office.com/en-us/article/Excel-specifications-and-limits-1672b34d-7043-467e-8e27-269d656771c3 [Excel 2010] + * @see https://support.office.com/en-us/article/Excel-specifications-and-limits-ca36e2dc-1f09-4620-b726-67c00b05040f [Excel 2013/2016] + */ + const MAX_CHARACTERS_PER_CELL = 32767; + const SHEET_XML_FILE_HEADER = << @@ -39,6 +48,9 @@ EOD; /** @var \Box\Spout\Common\Escaper\XLSX Strings escaper */ protected $stringsEscaper; + /** @var \Box\Spout\Common\Helper\StringHelper String helper */ + protected $stringHelper; + /** @var Resource Pointer to the sheet data file (e.g. xl/worksheets/sheet1.xml) */ protected $sheetFilePointer; @@ -62,6 +74,7 @@ EOD; /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ $this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance(); + $this->stringHelper = new StringHelper(); $this->worksheetFilePath = $worksheetFilesFolder . '/' . strtolower($this->externalSheet->getName()) . '.xml'; $this->startSheet(); @@ -131,6 +144,39 @@ EOD; * @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported */ public function addRow($dataRow, $style) + { + if (!$this->isEmptyRow($dataRow)) { + $this->addNonEmptyRow($dataRow, $style); + } + + $this->lastWrittenRowIndex++; + } + + /** + * Returns whether the given row is empty + * + * @param array $dataRow Array containing data to be written. Cannot be empty. + * Example $dataRow = ['data1', 1234, null, '', 'data5']; + * @return bool Whether the given row is empty + */ + protected function isEmptyRow($dataRow) + { + $numCells = count($dataRow); + // using "reset()" instead of "$dataRow[0]" because $dataRow can be an associative array + return ($numCells === 1 && CellHelper::isEmpty(reset($dataRow))); + } + + /** + * Adds non empty row to the worksheet. + * + * @param array $dataRow Array containing data to be written. Cannot be empty. + * Example $dataRow = ['data1', 1234, null, '', 'data5']; + * @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style. + * @return void + * @throws \Box\Spout\Common\Exception\IOException If the data cannot be written + * @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported + */ + protected function addNonEmptyRow($dataRow, $style) { $cellNumber = 0; $rowIndex = $this->lastWrittenRowIndex + 1; @@ -149,9 +195,6 @@ EOD; if ($wasWriteSuccessful === false) { throw new IOException("Unable to write data in {$this->worksheetFilePath}"); } - - // only update the count if the write worked - $this->lastWrittenRowIndex++; } /** @@ -162,21 +205,16 @@ EOD; * @param mixed $cellValue * @param int $styleId * @return string - * @throws InvalidArgumentException + * @throws InvalidArgumentException If the given value cannot be processed */ - private function getCellXML($rowIndex, $cellNumber, $cellValue, $styleId) + protected function getCellXML($rowIndex, $cellNumber, $cellValue, $styleId) { $columnIndex = CellHelper::getCellIndexFromColumnIndex($cellNumber); $cellXML = 'shouldUseInlineStrings) { - $cellXML .= ' t="inlineStr">' . $this->stringsEscaper->escape($cellValue) . ''; - } else { - $sharedStringId = $this->sharedStringsHelper->writeString($cellValue); - $cellXML .= ' t="s">' . $sharedStringId . ''; - } + $cellXML .= $this->getCellXMLFragmentForNonEmptyString($cellValue); } else if (CellHelper::isBoolean($cellValue)) { $cellXML .= ' t="b">' . intval($cellValue) . ''; } else if (CellHelper::isNumeric($cellValue)) { @@ -196,6 +234,29 @@ EOD; return $cellXML; } + /** + * Returns the XML fragment for a cell containing a non empty string + * + * @param string $cellValue The cell value + * @return string The XML fragment representing the cell + * @throws InvalidArgumentException If the string exceeds the maximum number of characters allowed per cell + */ + protected function getCellXMLFragmentForNonEmptyString($cellValue) + { + if ($this->stringHelper->getStringLength($cellValue) > self::MAX_CHARACTERS_PER_CELL) { + throw new InvalidArgumentException('Trying to add a value that exceeds the maximum number of characters allowed in a cell (32,767)'); + } + + if ($this->shouldUseInlineStrings) { + $cellXMLFragment = ' t="inlineStr">' . $this->stringsEscaper->escape($cellValue) . ''; + } else { + $sharedStringId = $this->sharedStringsHelper->writeString($cellValue); + $cellXMLFragment = ' t="s">' . $sharedStringId . ''; + } + + return $cellXMLFragment; + } + /** * Closes the worksheet * diff --git a/lib/thirdpartylibs.xml b/lib/thirdpartylibs.xml index ce97f483e9b..d0ce19b57db 100644 --- a/lib/thirdpartylibs.xml +++ b/lib/thirdpartylibs.xml @@ -263,7 +263,7 @@ spout Spout Apache - 2.6.0 + 2.7.3 2.0