MDL-60288 libraries: Upgrade spout to 2.7.3

This commit is contained in:
Ankit Agarwal 2017-10-06 01:01:17 +05:30
parent d8e9a23c48
commit de6ad82f2c
49 changed files with 1730 additions and 737 deletions

View File

@ -1,9 +1,14 @@
Description of Spout library import 2.6.0
Description of Spout library import 2.7.3
=========================================
* Download / Clone from https://github.com/box/spout/
* Only include the src/Spout directory.
* Update lib/thirdpartylibs.xml with the latest version.
2017/10/10
----------
Updated to v2.7.3 (MDL-60288)
by Ankit Agarwal <ankit.agrr@gmail.com>
2016/09/20
----------
Updated to v2.6.0 (MDL-56012)

View File

@ -22,7 +22,29 @@ class ODS implements EscaperInterface
*/
public function escape($string)
{
return htmlspecialchars($string, ENT_QUOTES);
if (defined('ENT_DISALLOWED')) {
// 'ENT_DISALLOWED' ensures that invalid characters in the given document type are replaced.
// Otherwise control characters like a vertical tab "\v" will make the XML document unreadable by the XML processor
// @link https://github.com/box/spout/issues/329
$replacedString = htmlspecialchars($string, ENT_NOQUOTES | ENT_DISALLOWED);
} else {
// We are on hhvm or any other engine that does not support ENT_DISALLOWED.
//
// @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded.
// Single and double quotes can be left as is.
$escapedString = htmlspecialchars($string, ENT_NOQUOTES);
// control characters values are from 0 to 1F (hex values) in the ASCII table
// some characters should not be escaped though: "\t", "\r" and "\n".
$regexPattern = '[\x00-\x08' .
// skipping "\t" (0x9) and "\n" (0xA)
'\x0B-\x0C' .
// skipping "\r" (0xD)
'\x0E-\x1F]';
$replacedString = preg_replace("/$regexPattern/", '<27>', $escapedString);
}
return $replacedString;
}
/**
@ -33,6 +55,12 @@ class ODS implements EscaperInterface
*/
public function unescape($string)
{
return htmlspecialchars_decode($string, ENT_QUOTES);
// ==============
// = WARNING =
// ==============
// It is assumed that the given string has already had its XML entities decoded.
// This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation).
// Therefore there is no need to call "htmlspecialchars_decode()".
return $string;
}
}

View File

@ -42,7 +42,9 @@ class XLSX implements EscaperInterface
public function escape($string)
{
$escapedString = $this->escapeControlCharacters($string);
$escapedString = htmlspecialchars($escapedString, ENT_QUOTES);
// @NOTE: Using ENT_NOQUOTES as only XML entities ('<', '>', '&') need to be encoded.
// Single and double quotes can be left as is.
$escapedString = htmlspecialchars($escapedString, ENT_NOQUOTES);
return $escapedString;
}
@ -55,8 +57,13 @@ class XLSX implements EscaperInterface
*/
public function unescape($string)
{
$unescapedString = htmlspecialchars_decode($string, ENT_QUOTES);
$unescapedString = $this->unescapeControlCharacters($unescapedString);
// ==============
// = WARNING =
// ==============
// It is assumed that the given string has already had its XML entities decoded.
// This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation).
// Therefore there is no need to call "htmlspecialchars_decode()".
$unescapedString = $this->unescapeControlCharacters($string);
return $unescapedString;
}

View File

@ -13,15 +13,15 @@ use Box\Spout\Common\Exception\IOException;
*/
class FileSystemHelper
{
/** @var string Path of the base folder where all the I/O can occur */
protected $baseFolderPath;
/** @var string Real path of the base folder where all the I/O can occur */
protected $baseFolderRealPath;
/**
* @param string $baseFolderPath The path of the base folder where all the I/O can occur
*/
public function __construct($baseFolderPath)
{
$this->baseFolderPath = $baseFolderPath;
$this->baseFolderRealPath = realpath($baseFolderPath);
}
/**
@ -124,9 +124,10 @@ class FileSystemHelper
*/
protected function throwIfOperationNotInBaseFolder($operationFolderPath)
{
$isInBaseFolder = (strpos($operationFolderPath, $this->baseFolderPath) === 0);
$operationFolderRealPath = realpath($operationFolderPath);
$isInBaseFolder = (strpos($operationFolderRealPath, $this->baseFolderRealPath) === 0);
if (!$isInBaseFolder) {
throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderPath}");
throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderRealPath}");
}
}
}

View File

@ -19,8 +19,15 @@ abstract class AbstractReader implements ReaderInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/** @var \Box\Spout\Reader\Common\ReaderOptions Reader's customized options */
protected $options;
/**
* Returns the reader's current options
*
* @return \Box\Spout\Reader\Common\ReaderOptions
*/
abstract protected function getOptions();
/**
* Returns whether stream wrappers are supported
@ -42,7 +49,7 @@ abstract class AbstractReader implements ReaderInterface
*
* @return \Iterator To iterate over sheets
*/
abstract public function getConcreteSheetIterator();
abstract protected function getConcreteSheetIterator();
/**
* Closes the reader. To be used after reading the file.
@ -64,12 +71,26 @@ abstract class AbstractReader implements ReaderInterface
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @api
* @param bool $shouldFormatDates
* @return AbstractReader
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
$this->getOptions()->setShouldFormatDates($shouldFormatDates);
return $this;
}
/**
* Sets whether empty rows should be returned or skipped.
*
* @api
* @param bool $shouldPreserveEmptyRows
* @return AbstractReader
*/
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
{
$this->getOptions()->setShouldPreserveEmptyRows($shouldPreserveEmptyRows);
return $this;
}

View File

@ -4,7 +4,6 @@ namespace Box\Spout\Reader\CSV;
use Box\Spout\Reader\AbstractReader;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Helper\EncodingHelper;
/**
* Class Reader
@ -20,20 +19,21 @@ class Reader extends AbstractReader
/** @var SheetIterator To iterator over the CSV unique "sheet" */
protected $sheetIterator;
/** @var string Defines the character used to delimit fields (one character only) */
protected $fieldDelimiter = ',';
/** @var string Original value for the "auto_detect_line_endings" INI value */
protected $originalAutoDetectLineEndings;
/** @var string Defines the character used to enclose fields (one character only) */
protected $fieldEnclosure = '"';
/** @var string Encoding of the CSV file to be read */
protected $encoding = EncodingHelper::ENCODING_UTF8;
/** @var string Defines the End of line */
protected $endOfLineCharacter = "\n";
/** @var string */
protected $autoDetectLineEndings;
/**
* Returns the reader's current options
*
* @return ReaderOptions
*/
protected function getOptions()
{
if (!isset($this->options)) {
$this->options = new ReaderOptions();
}
return $this->options;
}
/**
* Sets the field delimiter for the CSV.
@ -44,7 +44,7 @@ class Reader extends AbstractReader
*/
public function setFieldDelimiter($fieldDelimiter)
{
$this->fieldDelimiter = $fieldDelimiter;
$this->getOptions()->setFieldDelimiter($fieldDelimiter);
return $this;
}
@ -57,7 +57,7 @@ class Reader extends AbstractReader
*/
public function setFieldEnclosure($fieldEnclosure)
{
$this->fieldEnclosure = $fieldEnclosure;
$this->getOptions()->setFieldEnclosure($fieldEnclosure);
return $this;
}
@ -70,7 +70,7 @@ class Reader extends AbstractReader
*/
public function setEncoding($encoding)
{
$this->encoding = $encoding;
$this->getOptions()->setEncoding($encoding);
return $this;
}
@ -83,7 +83,7 @@ class Reader extends AbstractReader
*/
public function setEndOfLineCharacter($endOfLineCharacter)
{
$this->endOfLineCharacter = $endOfLineCharacter;
$this->getOptions()->setEndOfLineCharacter($endOfLineCharacter);
return $this;
}
@ -107,7 +107,7 @@ class Reader extends AbstractReader
*/
protected function openReader($filePath)
{
$this->autoDetectLineEndings = ini_get('auto_detect_line_endings');
$this->originalAutoDetectLineEndings = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', '1');
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
@ -117,10 +117,7 @@ class Reader extends AbstractReader
$this->sheetIterator = new SheetIterator(
$this->filePointer,
$this->fieldDelimiter,
$this->fieldEnclosure,
$this->encoding,
$this->endOfLineCharacter,
$this->getOptions(),
$this->globalFunctionsHelper
);
}
@ -130,7 +127,7 @@ class Reader extends AbstractReader
*
* @return SheetIterator To iterate over sheets
*/
public function getConcreteSheetIterator()
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}
@ -147,6 +144,6 @@ class Reader extends AbstractReader
$this->globalFunctionsHelper->fclose($this->filePointer);
}
ini_set('auto_detect_line_endings', $this->autoDetectLineEndings);
ini_set('auto_detect_line_endings', $this->originalAutoDetectLineEndings);
}
}

View File

@ -0,0 +1,110 @@
<?php
namespace Box\Spout\Reader\CSV;
use Box\Spout\Common\Helper\EncodingHelper;
/**
* Class ReaderOptions
* This class is used to customize the reader's behavior
*
* @package Box\Spout\Reader\CSV
*/
class ReaderOptions extends \Box\Spout\Reader\Common\ReaderOptions
{
/** @var string Defines the character used to delimit fields (one character only) */
protected $fieldDelimiter = ',';
/** @var string Defines the character used to enclose fields (one character only) */
protected $fieldEnclosure = '"';
/** @var string Encoding of the CSV file to be read */
protected $encoding = EncodingHelper::ENCODING_UTF8;
/** @var string Defines the End of line */
protected $endOfLineCharacter = "\n";
/**
* @return string
*/
public function getFieldDelimiter()
{
return $this->fieldDelimiter;
}
/**
* Sets the field delimiter for the CSV.
* Needs to be called before opening the reader.
*
* @param string $fieldDelimiter Character that delimits fields
* @return ReaderOptions
*/
public function setFieldDelimiter($fieldDelimiter)
{
$this->fieldDelimiter = $fieldDelimiter;
return $this;
}
/**
* @return string
*/
public function getFieldEnclosure()
{
return $this->fieldEnclosure;
}
/**
* Sets the field enclosure for the CSV.
* Needs to be called before opening the reader.
*
* @param string $fieldEnclosure Character that enclose fields
* @return ReaderOptions
*/
public function setFieldEnclosure($fieldEnclosure)
{
$this->fieldEnclosure = $fieldEnclosure;
return $this;
}
/**
* @return string
*/
public function getEncoding()
{
return $this->encoding;
}
/**
* Sets the encoding of the CSV file to be read.
* Needs to be called before opening the reader.
*
* @param string $encoding Encoding of the CSV file to be read
* @return ReaderOptions
*/
public function setEncoding($encoding)
{
$this->encoding = $encoding;
return $this;
}
/**
* @return string EOL for the CSV
*/
public function getEndOfLineCharacter()
{
return $this->endOfLineCharacter;
}
/**
* Sets the EOL for the CSV.
* Needs to be called before opening the reader.
*
* @param string $endOfLineCharacter used to properly get lines from the CSV file.
* @return ReaderOptions
*/
public function setEndOfLineCharacter($endOfLineCharacter)
{
$this->endOfLineCharacter = $endOfLineCharacter;
return $this;
}
}

View File

@ -14,10 +14,9 @@ use Box\Spout\Common\Helper\EncodingHelper;
class RowIterator implements IteratorInterface
{
/**
* If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
* Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
* Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
*/
const MAX_READ_BYTES_PER_LINE = 32768;
const MAX_READ_BYTES_PER_LINE = 0;
/** @var resource Pointer to the CSV file to read */
protected $filePointer;
@ -40,6 +39,12 @@ class RowIterator implements IteratorInterface
/** @var string Encoding of the CSV file to be read */
protected $encoding;
/** @var string End of line delimiter, given by the user as input. */
protected $inputEOLDelimiter;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows;
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
@ -49,24 +54,19 @@ class RowIterator implements IteratorInterface
/** @var string End of line delimiter, encoded using the same encoding as the CSV */
protected $encodedEOLDelimiter;
/** @var string End of line delimiter, given by the user as input. */
protected $inputEOLDelimiter;
/**
* @param resource $filePointer Pointer to the CSV file to read
* @param string $fieldDelimiter Character that delimits fields
* @param string $fieldEnclosure Character that enclose fields
* @param string $encoding Encoding of the CSV file to be read
* @param string $endOfLineDelimiter End of line delimiter
* @param \Box\Spout\Reader\CSV\ReaderOptions $options
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
public function __construct($filePointer, $options, $globalFunctionsHelper)
{
$this->filePointer = $filePointer;
$this->fieldDelimiter = $fieldDelimiter;
$this->fieldEnclosure = $fieldEnclosure;
$this->encoding = $encoding;
$this->inputEOLDelimiter = $endOfLineDelimiter;
$this->fieldDelimiter = $options->getFieldDelimiter();
$this->fieldEnclosure = $options->getFieldEnclosure();
$this->encoding = $options->getEncoding();
$this->inputEOLDelimiter = $options->getEndOfLineCharacter();
$this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
@ -106,7 +106,7 @@ class RowIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{
@ -114,7 +114,7 @@ class RowIterator implements IteratorInterface
}
/**
* Move forward to next element. Empty rows are skipped.
* Move forward to next element. Reads data for the next unprocessed row.
* @link http://php.net/manual/en/iterator.next.php
*
* @return void
@ -124,25 +124,48 @@ class RowIterator implements IteratorInterface
{
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
if ($this->hasReachedEndOfFile) {
return;
if (!$this->hasReachedEndOfFile) {
$this->readDataForNextRow();
}
}
/**
* @return void
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/
protected function readDataForNextRow()
{
do {
$rowData = $this->getNextUTF8EncodedRow();
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
} while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
} while ($this->shouldReadNextRow($rowData));
if ($rowData !== false) {
$this->rowDataBuffer = $rowData;
// str_replace will replace NULL values by empty strings
$this->rowDataBuffer = str_replace(null, null, $rowData);
$this->numReadRows++;
} else {
// If we reach this point, it means end of file was reached.
// This happens when the last lines are empty lines.
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
$this->hasReachedEndOfFile = true;
}
}
/**
* @param array|bool $currentRowData
* @return bool Whether the data for the current row can be returned or if we need to keep reading
*/
protected function shouldReadNextRow($currentRowData)
{
$hasSuccessfullyFetchedRowData = ($currentRowData !== false);
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
$isEmptyLine = $this->isEmptyLine($currentRowData);
return (
(!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
(!$this->shouldPreserveEmptyRows && $isEmptyLine)
);
}
/**
* Returns the next row, converted if necessary to UTF-8.
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
@ -154,7 +177,7 @@ class RowIterator implements IteratorInterface
protected function getNextUTF8EncodedRow()
{
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
if (false === $encodedRowData) {
if ($encodedRowData === false) {
return false;
}
@ -195,7 +218,7 @@ class RowIterator implements IteratorInterface
}
/**
* @param array $lineData Array containing the cells value for the line
* @param array|bool $lineData Array containing the cells value for the line
* @return bool Whether the given line is empty
*/
protected function isEmptyLine($lineData)

View File

@ -16,14 +16,12 @@ class Sheet implements SheetInterface
/**
* @param resource $filePointer Pointer to the CSV file to read
* @param string $fieldDelimiter Character that delimits fields
* @param string $fieldEnclosure Character that enclose fields
* @param string $encoding Encoding of the CSV file to be read
* @param \Box\Spout\Reader\CSV\ReaderOptions $options
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
public function __construct($filePointer, $options, $globalFunctionsHelper)
{
$this->rowIterator = new RowIterator($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
$this->rowIterator = new RowIterator($filePointer, $options, $globalFunctionsHelper);
}
/**
@ -34,4 +32,31 @@ class Sheet implements SheetInterface
{
return $this->rowIterator;
}
/**
* @api
* @return int Index of the sheet
*/
public function getIndex()
{
return 0;
}
/**
* @api
* @return string Name of the sheet - empty string since CSV does not support that
*/
public function getName()
{
return '';
}
/**
* @api
* @return bool Always TRUE as there is only one sheet
*/
public function isActive()
{
return true;
}
}

View File

@ -20,14 +20,12 @@ class SheetIterator implements IteratorInterface
/**
* @param resource $filePointer
* @param string $fieldDelimiter Character that delimits fields
* @param string $fieldEnclosure Character that enclose fields
* @param string $encoding Encoding of the CSV file to be read
* @param \Box\Spout\Reader\CSV\ReaderOptions $options
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper)
public function __construct($filePointer, $options, $globalFunctionsHelper)
{
$this->sheet = new Sheet($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineCharacter, $globalFunctionsHelper);
$this->sheet = new Sheet($filePointer, $options, $globalFunctionsHelper);
}
/**
@ -45,7 +43,7 @@ class SheetIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{

View File

@ -0,0 +1,58 @@
<?php
namespace Box\Spout\Reader\Common;
/**
* Class ReaderOptions
* Readers' common options
*
* @package Box\Spout\Reader\Common
*/
class ReaderOptions
{
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows = false;
/**
* @return bool Whether date/time values should be returned as PHP objects or be formatted as strings.
*/
public function shouldFormatDates()
{
return $this->shouldFormatDates;
}
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
* @return ReaderOptions
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
return $this;
}
/**
* @return bool Whether empty rows should be returned or skipped.
*/
public function shouldPreserveEmptyRows()
{
return $this->shouldPreserveEmptyRows;
}
/**
* Sets whether empty rows should be returned or skipped.
*
* @param bool $shouldPreserveEmptyRows
* @return ReaderOptions
*/
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
{
$this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
return $this;
}
}

View File

@ -0,0 +1,152 @@
<?php
namespace Box\Spout\Reader\Common;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
* Class XMLProcessor
* Helps process XML files
*
* @package Box\Spout\Reader\Common
*/
class XMLProcessor
{
/* Node types */
const NODE_TYPE_START = XMLReader::ELEMENT;
const NODE_TYPE_END = XMLReader::END_ELEMENT;
/* Keys associated to reflection attributes to invoke a callback */
const CALLBACK_REFLECTION_METHOD = 'reflectionMethod';
const CALLBACK_REFLECTION_OBJECT = 'reflectionObject';
/* Values returned by the callbacks to indicate what the processor should do next */
const PROCESSING_CONTINUE = 1;
const PROCESSING_STOP = 2;
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var array Registered callbacks */
private $callbacks = [];
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
*/
public function __construct($xmlReader)
{
$this->xmlReader = $xmlReader;
}
/**
* @param string $nodeName A callback may be triggered when a node with this name is read
* @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END]
* @param callable $callback Callback to execute when the read node has the given name and type
* @return XMLProcessor
*/
public function registerCallback($nodeName, $nodeType, $callback)
{
$callbackKey = $this->getCallbackKey($nodeName, $nodeType);
$this->callbacks[$callbackKey] = $this->getInvokableCallbackData($callback);
return $this;
}
/**
* @param string $nodeName Name of the node
* @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END]
* @return string Key used to store the associated callback
*/
private function getCallbackKey($nodeName, $nodeType)
{
return "$nodeName$nodeType";
}
/**
* Because the callback can be a "protected" function, we don't want to use call_user_func() directly
* but instead invoke the callback using Reflection. This allows the invocation of "protected" functions.
* Since some functions can be called a lot, we pre-process the callback to only return the elements that
* will be needed to invoke the callback later.
*
* @param callable $callback Array reference to a callback: [OBJECT, METHOD_NAME]
* @return array Associative array containing the elements needed to invoke the callback using Reflection
*/
private function getInvokableCallbackData($callback)
{
$callbackObject = $callback[0];
$callbackMethodName = $callback[1];
$reflectionMethod = new \ReflectionMethod(get_class($callbackObject), $callbackMethodName);
$reflectionMethod->setAccessible(true);
return [
self::CALLBACK_REFLECTION_METHOD => $reflectionMethod,
self::CALLBACK_REFLECTION_OBJECT => $callbackObject,
];
}
/**
* Resumes the reading of the XML file where it was left off.
* Stops whenever a callback indicates that reading should stop or at the end of the file.
*
* @return void
* @throws \Box\Spout\Reader\Exception\XMLProcessingException
*/
public function readUntilStopped()
{
while ($this->xmlReader->read()) {
$nodeType = $this->xmlReader->nodeType;
$nodeNamePossiblyWithPrefix = $this->xmlReader->name;
$nodeNameWithoutPrefix = $this->xmlReader->localName;
$callbackData = $this->getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType);
if ($callbackData !== null) {
$callbackResponse = $this->invokeCallback($callbackData, [$this->xmlReader]);
if ($callbackResponse === self::PROCESSING_STOP) {
// stop reading
break;
}
}
}
}
/**
* @param string $nodeNamePossiblyWithPrefix Name of the node, possibly prefixed
* @param string $nodeNameWithoutPrefix Name of the same node, un-prefixed
* @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END]
* @return array|null Callback data to be used for execution when a node of the given name/type is read or NULL if none found
*/
private function getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType)
{
// With prefixed nodes, we should match if (by order of preference):
// 1. the callback was registered with the prefixed node name (e.g. "x:worksheet")
// 2. the callback was registered with the un-prefixed node name (e.g. "worksheet")
$callbackKeyForPossiblyPrefixedName = $this->getCallbackKey($nodeNamePossiblyWithPrefix, $nodeType);
$callbackKeyForUnPrefixedName = $this->getCallbackKey($nodeNameWithoutPrefix, $nodeType);
$hasPrefix = ($nodeNamePossiblyWithPrefix !== $nodeNameWithoutPrefix);
$callbackKeyToUse = $callbackKeyForUnPrefixedName;
if ($hasPrefix && isset($this->callbacks[$callbackKeyForPossiblyPrefixedName])) {
$callbackKeyToUse = $callbackKeyForPossiblyPrefixedName;
}
// Using isset here because it is way faster than array_key_exists...
return isset($this->callbacks[$callbackKeyToUse]) ? $this->callbacks[$callbackKeyToUse] : null;
}
/**
* @param array $callbackData Associative array containing data to invoke the callback using Reflection
* @param array $args Arguments to pass to the callback
* @return int Callback response
*/
private function invokeCallback($callbackData, $args)
{
$reflectionMethod = $callbackData[self::CALLBACK_REFLECTION_METHOD];
$callbackObject = $callbackData[self::CALLBACK_REFLECTION_OBJECT];
return $reflectionMethod->invokeArgs($callbackObject, $args);
}
}

View File

@ -26,7 +26,7 @@ class CellValueFormatter
const XML_NODE_A = 'text:a';
const XML_NODE_SPAN = 'text:span';
/** Definition of XML attribute used to parse data */
/** Definition of XML attributes used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
const XML_ATTRIBUTE_VALUE = 'office:value';
const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';

View File

@ -0,0 +1,51 @@
<?php
namespace Box\Spout\Reader\ODS\Helper;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
* Class SettingsHelper
* This class provides helper functions to extract data from the "settings.xml" file.
*
* @package Box\Spout\Reader\ODS\Helper
*/
class SettingsHelper
{
const SETTINGS_XML_FILE_PATH = 'settings.xml';
/** Definition of XML nodes name and attribute used to parse settings data */
const XML_NODE_CONFIG_ITEM = 'config:config-item';
const XML_ATTRIBUTE_CONFIG_NAME = 'config:name';
const XML_ATTRIBUTE_VALUE_ACTIVE_TABLE = 'ActiveTable';
/**
* @param string $filePath Path of the file to be read
* @return string|null Name of the sheet that was defined as active or NULL if none found
*/
public function getActiveSheetName($filePath)
{
$xmlReader = new XMLReader();
if ($xmlReader->openFileInZip($filePath, self::SETTINGS_XML_FILE_PATH) === false) {
return null;
}
$activeSheetName = null;
try {
while ($xmlReader->readUntilNodeFound(self::XML_NODE_CONFIG_ITEM)) {
if ($xmlReader->getAttribute(self::XML_ATTRIBUTE_CONFIG_NAME) === self::XML_ATTRIBUTE_VALUE_ACTIVE_TABLE) {
$activeSheetName = $xmlReader->readString();
break;
}
}
} catch (XMLProcessingException $exception) {
// do nothing
}
$xmlReader->close();
return $activeSheetName;
}
}

View File

@ -19,6 +19,19 @@ class Reader extends AbstractReader
/** @var SheetIterator To iterator over the ODS sheets */
protected $sheetIterator;
/**
* Returns the reader's current options
*
* @return ReaderOptions
*/
protected function getOptions()
{
if (!isset($this->options)) {
$this->options = new ReaderOptions();
}
return $this->options;
}
/**
* Returns whether stream wrappers are supported
*
@ -42,7 +55,7 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
$this->sheetIterator = new SheetIterator($filePath, $this->getOptions());
} else {
throw new IOException("Could not open $filePath for reading.");
}
@ -53,7 +66,7 @@ class Reader extends AbstractReader
*
* @return SheetIterator To iterate over sheets
*/
public function getConcreteSheetIterator()
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}

View File

@ -0,0 +1,14 @@
<?php
namespace Box\Spout\Reader\ODS;
/**
* Class ReaderOptions
* This class is used to customize the reader's behavior
*
* @package Box\Spout\Reader\ODS
*/
class ReaderOptions extends \Box\Spout\Reader\Common\ReaderOptions
{
// No extra options
}

View File

@ -8,6 +8,7 @@ use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\Common\XMLProcessor;
/**
* Class RowIterator
@ -23,19 +24,26 @@ class RowIterator implements IteratorInterface
const MAX_COLUMNS_EXCEL = 16384;
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
protected $xmlProcessor;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows;
/** @var Helper\CellValueFormatter Helper to format cell values */
protected $cellValueFormatter;
/** @var bool Whether the iterator has already been rewound once */
protected $hasAlreadyBeenRewound = false;
/** @var int Number of read rows */
protected $numReadRows = 0;
/** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
protected $currentlyProcessedRowData = [];
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
@ -43,14 +51,41 @@ class RowIterator implements IteratorInterface
/** @var bool Indicates whether all rows have been read */
protected $hasReachedEndOfFile = false;
/** @var int Last row index processed (one-based) */
protected $lastRowIndexProcessed = 0;
/** @var int Row index to be processed next (one-based) */
protected $nextRowIndexToBeProcessed = 1;
/** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
protected $lastProcessedCellValue = null;
/** @var int Number of times the last processed row should be repeated */
protected $numRowsRepeated = 1;
/** @var int Number of times the last cell value should be copied to the cells on its right */
protected $numColumnsRepeated = 1;
/** @var bool Whether at least one cell has been read for the row currently being processed */
protected $hasAlreadyReadOneCellInCurrentRow = false;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options
*/
public function __construct($xmlReader, $shouldFormatDates)
public function __construct($xmlReader, $options)
{
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
$this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
$this->cellValueFormatter = new CellValueFormatter($options->shouldFormatDates());
// Register all callbacks to process different nodes when reading the XML file
$this->xmlProcessor = new XMLProcessor($this->xmlReader);
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
}
/**
@ -71,7 +106,8 @@ class RowIterator implements IteratorInterface
}
$this->hasAlreadyBeenRewound = true;
$this->numReadRows = 0;
$this->lastRowIndexProcessed = 0;
$this->nextRowIndexToBeProcessed = 1;
$this->rowDataBuffer = null;
$this->hasReachedEndOfFile = false;
@ -82,7 +118,7 @@ class RowIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{
@ -99,77 +135,156 @@ class RowIterator implements IteratorInterface
*/
public function next()
{
$rowData = [];
$cellValue = null;
$numColumnsRepeated = 1;
$numCellsRead = 0;
$hasAlreadyReadOneCell = false;
if ($this->doesNeedDataForNextRowToBeProcessed()) {
$this->readDataForNextRow();
}
$this->lastRowIndexProcessed++;
}
/**
* Returns whether we need data for the next row to be processed.
* We DO need to read data if:
* - we have not read any rows yet
* OR
* - the next row to be processed immediately follows the last read row
*
* @return bool Whether we need data for the next row to be processed.
*/
protected function doesNeedDataForNextRowToBeProcessed()
{
$hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
return (
!$hasReadAtLeastOneRow ||
$this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
);
}
/**
* @return void
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
*/
protected function readDataForNextRow()
{
$this->currentlyProcessedRowData = [];
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
$node = $this->xmlReader->expand();
$currentCellValue = $this->getCellValue($node);
// process cell N only after having read cell N+1 (see below why)
if ($hasAlreadyReadOneCell) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
}
$cellValue = $currentCellValue;
$numColumnsRepeated = $currentNumColumnsRepeated;
$numCellsRead++;
$hasAlreadyReadOneCell = true;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
$isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
if ($isEmptyRow) {
// skip empty rows
$this->next();
return;
}
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
// The current count of read columns is determined by counting the values in $rowData.
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
// always 16384 cells.
if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
for ($i = 0; $i < $numColumnsRepeated; $i++) {
$rowData[] = $cellValue;
}
$this->numReadRows++;
}
break;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
// The closing "</table:table>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
}
}
$this->xmlProcessor->readUntilStopped();
} catch (XMLProcessingException $exception) {
throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
}
$this->rowDataBuffer = $rowData;
$this->rowDataBuffer = $this->currentlyProcessedRowData;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
* @return int A return code that indicates what action should the processor take next
*/
protected function processRowStartingNode($xmlReader)
{
// Reset data from current row
$this->hasAlreadyReadOneCellInCurrentRow = false;
$this->lastProcessedCellValue = null;
$this->numColumnsRepeated = 1;
$this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
* @return int A return code that indicates what action should the processor take next
*/
protected function processCellStartingNode($xmlReader)
{
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
// NOTE: expand() will automatically decode all XML entities of the child nodes
$node = $xmlReader->expand();
$currentCellValue = $this->getCellValue($node);
// process cell N only after having read cell N+1 (see below why)
if ($this->hasAlreadyReadOneCellInCurrentRow) {
for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
$this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
}
}
$this->hasAlreadyReadOneCellInCurrentRow = true;
$this->lastProcessedCellValue = $currentCellValue;
$this->numColumnsRepeated = $currentNumColumnsRepeated;
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @return int A return code that indicates what action should the processor take next
*/
protected function processRowEndingNode()
{
$isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRowData, $this->lastProcessedCellValue);
// if the fetched row is empty and we don't want to preserve it...
if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
// ... skip it
return XMLProcessor::PROCESSING_CONTINUE;
}
// if the row is empty, we don't want to return more than one cell
$actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
// Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
// The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
// This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
// with a number-columns-repeated value equals to the number of (supported columns - used columns).
// In Excel, the number of supported columns is 16384, but we don't want to returns rows with
// always 16384 cells.
if ((count($this->currentlyProcessedRowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
$this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
}
}
// If we are processing row N and the row is repeated M times,
// then the next row to be processed will be row (N+M).
$this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
// at this point, we have all the data we need for the row
// so that we can populate the buffer
return XMLProcessor::PROCESSING_STOP;
}
/**
* @return int A return code that indicates what action should the processor take next
*/
protected function processTableEndingNode()
{
// The closing "</table:table>" marks the end of the file
$this->hasReachedEndOfFile = true;
return XMLProcessor::PROCESSING_STOP;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
* @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
*/
protected function getNumRowsRepeatedForCurrentNode($xmlReader)
{
$numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
* @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
*/
protected function getNumColumnsRepeatedForCurrentNode()
protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
{
$numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
$numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
}
@ -185,14 +300,21 @@ class RowIterator implements IteratorInterface
}
/**
* empty() replacement that honours 0 as a valid value
* After finishing processing each cell, a row is considered empty if it contains
* no cells or if the value of the last read cell is an empty string.
* After finishing processing each cell, the last read cell is not part of the
* row data yet (as we still need to apply the "num-columns-repeated" attribute).
*
* @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
* @return bool
* @param array $rowData
* @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
* @return bool Whether the row is empty
*/
protected function isEmptyCellValue($value)
protected function isEmptyRow($rowData, $lastReadCellValue)
{
return (!isset($value) || trim($value) === '');
return (
count($rowData) === 0 &&
(!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
);
}
/**
@ -214,7 +336,7 @@ class RowIterator implements IteratorInterface
*/
public function key()
{
return $this->numReadRows;
return $this->lastRowIndexProcessed;
}

View File

@ -25,17 +25,22 @@ class Sheet implements SheetInterface
/** @var string Name of the sheet */
protected $name;
/** @var bool Whether the sheet was the active one */
protected $isActive;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options
*/
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
public function __construct($xmlReader, $sheetIndex, $sheetName, $isSheetActive, $options)
{
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
$this->rowIterator = new RowIterator($xmlReader, $options);
$this->index = $sheetIndex;
$this->name = $sheetName;
$this->isActive = $isSheetActive;
}
/**
@ -64,4 +69,13 @@ class Sheet implements SheetInterface
{
return $this->name;
}
/**
* @api
* @return bool Whether the sheet was defined as active
*/
public function isActive()
{
return $this->isActive;
}
}

View File

@ -5,6 +5,7 @@ namespace Box\Spout\Reader\ODS;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\IteratorInterface;
use Box\Spout\Reader\ODS\Helper\SettingsHelper;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
@ -24,8 +25,8 @@ class SheetIterator implements IteratorInterface
/** @var string $filePath Path of the file to be read */
protected $filePath;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Reader\ODS\ReaderOptions Reader's current options */
protected $options;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@ -39,19 +40,25 @@ class SheetIterator implements IteratorInterface
/** @var int The index of the sheet being read (zero-based) */
protected $currentSheetIndex;
/** @var string The name of the sheet that was defined as active */
protected $activeSheetName;
/**
* @param string $filePath Path of the file to be read
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $shouldFormatDates)
public function __construct($filePath, $options)
{
$this->filePath = $filePath;
$this->shouldFormatDates = $shouldFormatDates;
$this->options = $options;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = \Box\Spout\Common\Escaper\ODS::getInstance();
$settingsHelper = new SettingsHelper();
$this->activeSheetName = $settingsHelper->getActiveSheetName($filePath);
}
/**
@ -83,7 +90,7 @@ class SheetIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{
@ -115,8 +122,27 @@ class SheetIterator implements IteratorInterface
{
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
$isActiveSheet = $this->isActiveSheet($sheetName, $this->currentSheetIndex, $this->activeSheetName);
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
return new Sheet($this->xmlReader, $this->currentSheetIndex, $sheetName, $isActiveSheet, $this->options);
}
/**
* Returns whether the current sheet was defined as the active one
*
* @param string $sheetName Name of the current sheet
* @param int $sheetIndex Index of the current sheet
* @param string|null Name of the sheet that was defined as active or NULL if none defined
* @return bool Whether the current sheet was defined as the active one
*/
private function isActiveSheet($sheetName, $sheetIndex, $activeSheetName)
{
// The given sheet is active if its name matches the defined active sheet's name
// or if no information about the active sheet was found, it defaults to the first sheet.
return (
($activeSheetName === null && $sheetIndex === 0) ||
($activeSheetName === $sheetName)
);
}
/**

View File

@ -1,175 +0,0 @@
<?php
namespace Box\Spout\Reader\Wrapper;
use Box\Spout\Reader\Exception\XMLProcessingException;
/**
* Class SimpleXMLElement
* Wrapper around the built-in SimpleXMLElement. This class does not extend \SimpleXMLElement
* because it its constructor is final... Instead, it is used as a passthrough.
* @see \SimpleXMLElement
*
* @package Box\Spout\Reader\Wrapper
*/
class SimpleXMLElement
{
use XMLInternalErrorsHelper;
/** @var \SimpleXMLElement Instance of the wrapped SimpleXMLElement object */
protected $simpleXMLElement;
/**
* Creates a new SimpleXMLElement object
* @see \SimpleXMLElement::__construct
*
* @param string $xmlData A well-formed XML string
* @throws \Box\Spout\Reader\Exception\XMLProcessingException If the XML string is not well-formed
*/
public function __construct($xmlData)
{
$this->useXMLInternalErrors();
try {
$this->simpleXMLElement = new \SimpleXMLElement($xmlData);
} catch (\Exception $exception) {
// if the data is invalid, the constructor will throw an Exception
$this->resetXMLInternalErrorsSetting();
throw new XMLProcessingException($this->getLastXMLErrorMessage());
}
$this->resetXMLInternalErrorsSetting();
}
/**
* Returns the attribute for the given name.
*
* @param string $name Attribute name
* @param string|null|void $namespace An optional namespace for the retrieved attributes
* @return string|null The attribute value or NULL if attribute not found
*/
public function getAttribute($name, $namespace = null)
{
$isPrefix = ($namespace !== null);
$attributes = $this->simpleXMLElement->attributes($namespace, $isPrefix);
$attributeValue = $attributes->{$name};
return ($attributeValue !== null) ? (string) $attributeValue : null;
}
/**
* Creates a prefix/ns context for the next XPath query
* @see \SimpleXMLElement::registerXPathNamespace
*
* @param string $prefix The namespace prefix to use in the XPath query for the namespace given in "namespace".
* @param string $namespace The namespace to use for the XPath query. This must match a namespace in
* use by the XML document or the XPath query using "prefix" will not return any results.
* @return bool TRUE on success or FALSE on failure.
*/
public function registerXPathNamespace($prefix, $namespace)
{
return $this->simpleXMLElement->registerXPathNamespace($prefix, $namespace);
}
/**
* Runs XPath query on XML data
* @see \SimpleXMLElement::xpath
*
* @param string $path An XPath path
* @return SimpleXMLElement[]|bool an array of SimpleXMLElement objects or FALSE in case of an error.
*/
public function xpath($path)
{
$elements = $this->simpleXMLElement->xpath($path);
if ($elements !== false) {
$wrappedElements = [];
foreach ($elements as $element) {
$wrappedElement = $this->wrapSimpleXMLElement($element);
if ($wrappedElement !== null) {
$wrappedElements[] = $this->wrapSimpleXMLElement($element);
}
}
$elements = $wrappedElements;
}
return $elements;
}
/**
* Wraps the given element into an instance of the wrapper
*
* @param \SimpleXMLElement $element Element to be wrapped
* @return SimpleXMLElement|null The wrapped element or NULL if the given element is invalid
*/
protected function wrapSimpleXMLElement(\SimpleXMLElement $element)
{
$wrappedElement = null;
$elementAsXML = $element->asXML();
if ($elementAsXML !== false) {
$wrappedElement = new SimpleXMLElement($elementAsXML);
}
return $wrappedElement;
}
/**
* Remove all nodes matching the given XPath query.
* It does not map to any \SimpleXMLElement function.
*
* @param string $path An XPath path
* @return void
*/
public function removeNodesMatchingXPath($path)
{
$nodesToRemove = $this->simpleXMLElement->xpath($path);
foreach ($nodesToRemove as $nodeToRemove) {
unset($nodeToRemove[0]);
}
}
/**
* Returns the first child matching the given tag name
*
* @param string $tagName
* @return SimpleXMLElement|null The first child matching the tag name or NULL if none found
*/
public function getFirstChildByTagName($tagName)
{
$doesElementExist = isset($this->simpleXMLElement->{$tagName});
/** @var \SimpleXMLElement $realElement */
$realElement = $this->simpleXMLElement->{$tagName};
return $doesElementExist ? $this->wrapSimpleXMLElement($realElement) : null;
}
/**
* Returns the immediate children.
*
* @return array The children
*/
public function children()
{
$children = [];
foreach ($this->simpleXMLElement->children() as $child) {
$children[] = $this->wrapSimpleXMLElement($child);
}
return $children;
}
/**
* @return string
*/
public function __toString()
{
return $this->simpleXMLElement->__toString();
}
}

View File

@ -1,6 +1,7 @@
<?php
namespace Box\Spout\Reader\Wrapper;
use DOMNode;
/**
@ -28,13 +29,10 @@ class XMLReader extends \XMLReader
$wasOpenSuccessful = false;
$realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
// HHVM does not check if file exists within zip file
// @link https://github.com/facebook/hhvm/issues/5779
if ($this->isRunningHHVM()) {
if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
} else {
// We need to check first that the file we are trying to read really exist because:
// - PHP emits a warning when trying to open a file that does not exist.
// - HHVM does not check if file exists within zip file (@link https://github.com/facebook/hhvm/issues/5779)
if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
@ -54,16 +52,6 @@ class XMLReader extends \XMLReader
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath);
}
/**
* Returns whether the current environment is HHVM
*
* @return bool TRUE if running on HHVM, FALSE otherwise
*/
protected function isRunningHHVM()
{
return defined('HHVM_VERSION');
}
/**
* Returns whether the file at the given location exists
*
@ -176,4 +164,12 @@ class XMLReader extends \XMLReader
return ($this->nodeType === $nodeType && $currentNodeName === $nodeName);
}
/**
* @return string The name of the current node, un-prefixed
*/
public function getCurrentNodeName()
{
return $this->localName;
}
}

View File

@ -31,6 +31,9 @@ class CellHelper
*/
public static function fillMissingArrayIndexes($dataArray, $fillValue = '')
{
if (empty($dataArray)) {
return [];
}
$existingIndexes = array_keys($dataArray);
$newIndexes = array_fill_keys(range(0, max($existingIndexes)), $fillValue);

View File

@ -228,8 +228,8 @@ class CellValueFormatter
$dateObj->setTime($hours, $minutes, $seconds);
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
@ -257,8 +257,8 @@ class CellValueFormatter
$dateObj->modify('+' . $secondsRemainder . 'seconds');
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
$styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;

View File

@ -62,7 +62,9 @@ class DateFormatHelper
public static function toPHPDateFormat($excelDateFormat)
{
// Remove brackets potentially present at the beginning of the format string
$dateFormat = preg_replace('/^(\[\$[^\]]+?\])/i', '', $excelDateFormat);
// and text portion of the format at the end of it (starting with ";")
// See §18.8.31 of ECMA-376 for more detail.
$dateFormat = preg_replace('/^(?:\[\$[^\]]+?\])?([^;]*).*/', '$1', $excelDateFormat);
// Double quotes are used to escape characters that must not be interpreted.
// For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y"

View File

@ -4,7 +4,6 @@ namespace Box\Spout\Reader\XLSX\Helper;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Reader\Exception\XMLProcessingException;
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyFactory;
use Box\Spout\Reader\XLSX\Helper\SharedStringsCaching\CachingStrategyInterface;
@ -23,6 +22,18 @@ class SharedStringsHelper
/** Main namespace for the sharedStrings.xml file */
const MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Definition of XML nodes names used to parse data */
const XML_NODE_SST = 'sst';
const XML_NODE_SI = 'si';
const XML_NODE_R = 'r';
const XML_NODE_T = 't';
/** Definition of XML attributes used to parse data */
const XML_ATTRIBUTE_COUNT = 'count';
const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount';
const XML_ATTRIBUTE_XML_SPACE = 'xml:space';
const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve';
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -34,7 +45,7 @@ class SharedStringsHelper
/**
* @param string $filePath Path of the XLSX file being read
* @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param string|null|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
*/
public function __construct($filePath, $tempFolder = null)
{
@ -69,8 +80,6 @@ class SharedStringsHelper
*
* The XML file can be really big with sheets containing a lot of data. That is why
* we need to use a XML reader that provides streaming like the XMLReader library.
* Please note that SimpleXML does not provide such a functionality but since it is faster
* and more handy to parse few XML nodes, it is used in combination with XMLReader for that purpose.
*
* @return void
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml can't be read
@ -79,11 +88,8 @@ class SharedStringsHelper
{
$xmlReader = new XMLReader();
$sharedStringIndex = 0;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
$sharedStringsFilePath = $this->getSharedStringsFilePath();
if ($xmlReader->open($sharedStringsFilePath) === false) {
if ($xmlReader->openFileInZip($this->filePath, self::SHARED_STRINGS_XML_FILE_PATH) === false) {
throw new IOException('Could not open "' . self::SHARED_STRINGS_XML_FILE_PATH . '".');
}
@ -91,58 +97,25 @@ class SharedStringsHelper
$sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader);
$this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount);
$xmlReader->readUntilNodeFound('si');
while ($xmlReader->name === 'si') {
$node = $this->getSimpleXmlElementNodeFromXMLReader($xmlReader);
$node->registerXPathNamespace('ns', self::MAIN_NAMESPACE_FOR_SHARED_STRINGS_XML);
// removes nodes that should not be read, like the pronunciation of the Kanji characters
$cleanNode = $this->removeSuperfluousTextNodes($node);
// find all text nodes "t"; there can be multiple if the cell contains formatting
$textNodes = $cleanNode->xpath('//ns:t');
$textValue = '';
foreach ($textNodes as $nodeIndex => $textNode) {
if ($nodeIndex !== 0) {
// add a space between each "t" node
$textValue .= ' ';
}
if ($this->shouldPreserveWhitespace($textNode)) {
$textValue .= $textNode->__toString();
} else {
$textValue .= trim($textNode->__toString());
}
}
$unescapedTextValue = $escaper->unescape($textValue);
$this->cachingStrategy->addStringForIndex($unescapedTextValue, $sharedStringIndex);
$xmlReader->readUntilNodeFound(self::XML_NODE_SI);
while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SI) {
$this->processSharedStringsItem($xmlReader, $sharedStringIndex);
$sharedStringIndex++;
// jump to the next 'si' tag
$xmlReader->next('si');
// jump to the next '<si>' tag
$xmlReader->next(self::XML_NODE_SI);
}
$this->cachingStrategy->closeCache();
} catch (XMLProcessingException $exception) {
throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]");
}
$this->cachingStrategy->closeCache();
$xmlReader->close();
}
/**
* @return string The path to the shared strings XML file
*/
protected function getSharedStringsFilePath()
{
return 'zip://' . $this->filePath . '#' . self::SHARED_STRINGS_XML_FILE_PATH;
}
/**
* Returns the shared strings unique count, as specified in <sst> tag.
*
@ -152,19 +125,19 @@ class SharedStringsHelper
*/
protected function getSharedStringsUniqueCount($xmlReader)
{
$xmlReader->next('sst');
$xmlReader->next(self::XML_NODE_SST);
// Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE)
while ($xmlReader->name === 'sst' && $xmlReader->nodeType !== XMLReader::ELEMENT) {
while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) {
$xmlReader->read();
}
$uniqueCount = $xmlReader->getAttribute('uniqueCount');
$uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_UNIQUE_COUNT);
// some software do not add the "uniqueCount" attribute but only use the "count" one
// @see https://github.com/box/spout/issues/254
if ($uniqueCount === null) {
$uniqueCount = $xmlReader->getAttribute('count');
$uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT);
}
return ($uniqueCount !== null) ? intval($uniqueCount) : null;
@ -183,58 +156,56 @@ class SharedStringsHelper
}
/**
* Returns a SimpleXMLElement node from the current node in the given XMLReader instance.
* This is to simplify the parsing of the subtree.
* Processes the shared strings item XML node which the given XML reader is positioned on.
*
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement
* @throws \Box\Spout\Common\Exception\IOException If the current node cannot be read
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "<si>" node
* @param int $sharedStringIndex Index of the processed shared strings item
* @return void
*/
protected function getSimpleXmlElementNodeFromXMLReader($xmlReader)
protected function processSharedStringsItem($xmlReader, $sharedStringIndex)
{
$node = null;
try {
$node = new SimpleXMLElement($xmlReader->readOuterXml());
} catch (XMLProcessingException $exception) {
throw new IOException("The sharedStrings.xml file contains unreadable data [{$exception->getMessage()}].");
$sharedStringValue = '';
// NOTE: expand() will automatically decode all XML entities of the child nodes
$siNode = $xmlReader->expand();
$textNodes = $siNode->getElementsByTagName(self::XML_NODE_T);
foreach ($textNodes as $textNode) {
if ($this->shouldExtractTextNodeValue($textNode)) {
$textNodeValue = $textNode->nodeValue;
$shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode);
$sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : trim($textNodeValue);
}
}
return $node;
$this->cachingStrategy->addStringForIndex($sharedStringValue, $sharedStringIndex);
}
/**
* Removes nodes that should not be read, like the pronunciation of the Kanji characters.
* By keeping them, their text content would be added to the read string.
* Not all text nodes' values must be extracted.
* Some text nodes are part of a node describing the pronunciation for instance.
* We'll only consider the nodes whose parents are "<si>" or "<r>".
*
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $parentNode Parent node that may contain nodes to remove
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement Cleaned parent node
* @param \DOMElement $textNode Text node to check
* @return bool Whether the given text node's value must be extracted
*/
protected function removeSuperfluousTextNodes($parentNode)
protected function shouldExtractTextNodeValue($textNode)
{
$tagsToRemove = [
'rPh', // Pronunciation of the text
'pPr', // Paragraph Properties / Previous Paragraph Properties
'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark
];
foreach ($tagsToRemove as $tagToRemove) {
$xpath = '//ns:' . $tagToRemove;
$parentNode->removeNodesMatchingXPath($xpath);
}
return $parentNode;
$parentTagName = $textNode->parentNode->localName;
return ($parentTagName === self::XML_NODE_SI || $parentTagName === self::XML_NODE_R);
}
/**
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
*
* @param \Box\Spout\Reader\Wrapper\SimpleXMLElement $textNode The text node element (<t>) whitespace may be preserved
* @param \DOMElement $textNode The text node element (<t>) whose whitespace may be preserved
* @return bool Whether whitespace should be preserved
*/
protected function shouldPreserveWhitespace($textNode)
{
$spaceValue = $textNode->getAttribute('space', 'xml');
return ($spaceValue === 'preserve');
$spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE);
return ($spaceValue === self::XML_ATTRIBUTE_VALUE_PRESERVE);
}
/**

View File

@ -17,30 +17,43 @@ class SheetHelper
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/** Definition of XML node names used to parse data */
const XML_NODE_WORKBOOK_VIEW = 'workbookView';
const XML_NODE_SHEET = 'sheet';
const XML_NODE_SHEETS = 'sheets';
const XML_NODE_RELATIONSHIP = 'Relationship';
/** Definition of XML attributes used to parse data */
const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
const XML_ATTRIBUTE_R_ID = 'r:id';
const XML_ATTRIBUTE_NAME = 'name';
const XML_ATTRIBUTE_ID = 'Id';
const XML_ATTRIBUTE_TARGET = 'Target';
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var \Box\Spout\Reader\XLSX\ReaderOptions Reader's current options */
protected $options;
/** @var \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings */
protected $sharedStringsHelper;
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/**
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper)
{
$this->filePath = $filePath;
$this->options = $options;
$this->sharedStringsHelper = $sharedStringsHelper;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->shouldFormatDates = $shouldFormatDates;
}
/**
@ -53,14 +66,20 @@ class SheetHelper
{
$sheets = [];
$sheetIndex = 0;
$activeSheetIndex = 0; // By default, the first sheet is active
$xmlReader = new XMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('sheet')) {
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex);
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_WORKBOOK_VIEW)) {
// The "workbookView" node is located before "sheet" nodes, ensuring that
// the active sheet is known before parsing sheets data.
$activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB);
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_SHEET)) {
$isSheetActive = ($sheetIndex === $activeSheetIndex);
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex, $isSheetActive);
$sheetIndex++;
} else if ($xmlReader->isPositionedOnEndingNode('sheets')) {
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_SHEETS)) {
// stop reading once all sheets have been read
break;
}
@ -79,12 +98,13 @@ class SheetHelper
*
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
* @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based)
* @param bool $isSheetActive Whether this sheet was defined as active
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
*/
protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased)
protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased, $isSheetActive)
{
$sheetId = $xmlReaderOnSheetNode->getAttribute('r:id');
$escapedSheetName = $xmlReaderOnSheetNode->getAttribute('name');
$sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
$escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
@ -92,7 +112,11 @@ class SheetHelper
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
return new Sheet(
$this->filePath, $sheetDataXMLFilePath,
$sheetIndexZeroBased, $sheetName, $isSheetActive,
$this->options, $this->sharedStringsHelper
);
}
/**
@ -107,13 +131,13 @@ class SheetHelper
$xmlReader = new XMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('Relationship')) {
$relationshipSheetId = $xmlReader->getAttribute('Id');
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
$relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID);
if ($relationshipSheetId === $sheetId) {
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
$sheetDataXMLFilePath = $xmlReader->getAttribute('Target');
$sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
// sometimes, the sheet data file path already contains "/xl/"...
if (strpos($sheetDataXMLFilePath, '/xl/') !== 0) {

View File

@ -29,6 +29,8 @@ class StyleHelper
/** By convention, default style ID is 0 */
const DEFAULT_STYLE_ID = 0;
const NUMBER_FORMAT_GENERAL = 'General';
/**
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
* @var array Mapping between built-in numFmtId and the associated format - for dates only
@ -51,18 +53,48 @@ class StyleHelper
/** @var string Path of the XLSX file being read */
protected $filePath;
/** @var array Array containing the IDs of built-in number formats indicating a date */
protected $builtinNumFmtIdIndicatingDates;
/** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
protected $customNumberFormats;
/** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
protected $stylesAttributes;
/** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
protected $numFmtIdToIsDateFormatCache = [];
/**
* @param string $filePath Path of the XLSX file being read
*/
public function __construct($filePath)
{
$this->filePath = $filePath;
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
}
/**
* Returns whether the style with the given ID should consider
* numeric values as timestamps and format the cell as a date.
*
* @param int $styleId Zero-based style ID
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
*/
public function shouldFormatNumericValueAsDate($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
// Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false;
}
$styleAttributes = $stylesAttributes[$styleId];
return $this->doesStyleIndicateDate($styleAttributes);
}
/**
@ -125,9 +157,15 @@ class StyleHelper
{
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
$numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
$normalizedNumFmtId = ($numFmtId !== null) ? intval($numFmtId) : null;
$applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT);
$normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? !!$applyNumberFormat : null;
$this->stylesAttributes[] = [
self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)),
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)),
self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId,
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat,
];
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
// Once done reading "cellXfs" node's children
@ -161,86 +199,92 @@ class StyleHelper
}
/**
* Returns whether the style with the given ID should consider
* numeric values as timestamps and format the cell as a date.
*
* @param int $styleId Zero-based style ID
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
* @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
* @return bool Whether the style with the given attributes indicates that the number is a date
*/
public function shouldFormatNumericValueAsDate($styleId)
protected function doesStyleIndicateDate($styleAttributes)
{
$stylesAttributes = $this->getStylesAttributes();
// Default style (0) does not format numeric values as timestamps. Only custom styles do.
// Also if the style ID does not exist in the styles.xml file, format as numeric value.
// Using isset here because it is way faster than array_key_exists...
if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) {
return false;
}
$styleAttributes = $stylesAttributes[$styleId];
$applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
if (!$applyNumberFormat) {
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
// A style may apply a date format if it has:
// - "applyNumberFormat" attribute not set to "false"
// - "numFmtId" attribute set
// This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
// but this is not necessarily a date.
if ($applyNumberFormat === false || $numFmtId === null) {
return false;
}
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
return $this->doesNumFmtIdIndicateDate($numFmtId);
}
/**
* Returns whether the number format ID indicates that the number is a date.
* The result is cached to avoid recomputing the same thing over and over, as
* "numFmtId" attributes can be shared between multiple styles.
*
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
* @return bool Whether the number format ID indicates that the number is a date
*/
protected function doesNumFmtIdIndicateDate($numFmtId)
{
return (
!$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) &&
(
if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
$formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
$this->numFmtIdToIsDateFormatCache[$numFmtId] = (
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
)
);
$this->isFormatCodeCustomDateFormat($formatCode)
);
}
return $this->numFmtIdToIsDateFormatCache[$numFmtId];
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates the "General" format (0 by convention)
* @return string|null The custom number format or NULL if none defined for the given numFmtId
*/
protected function doesNumFmtIdIndicateGeneralFormat($numFmtId)
{
return ($numFmtId === 0);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
$builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
return in_array($numFmtId, $builtInDateFormatIds);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdCustomDateFormat($numFmtId)
protected function getFormatCodeForNumFmtId($numFmtId)
{
$customNumberFormats = $this->getCustomNumberFormats();
// Using isset here because it is way faster than array_key_exists...
if (!isset($customNumberFormats[$numFmtId])) {
return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null;
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a date
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
return in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates);
}
/**
* @param string|null $formatCode
* @return bool Whether the given format code indicates that the number is a date
*/
protected function isFormatCodeCustomDateFormat($formatCode)
{
// if no associated format code or if using the default "General" format
if ($formatCode === null || strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) {
return false;
}
$customNumberFormat = $customNumberFormats[$numFmtId];
return $this->isFormatCodeMatchingDateFormatPattern($formatCode);
}
/**
* @param string $formatCode
* @return bool Whether the given format code matches a date format pattern
*/
protected function isFormatCodeMatchingDateFormatPattern($formatCode)
{
// Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
$pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
$customNumberFormat = preg_replace($pattern, '', $customNumberFormat);
$formatCode = preg_replace($pattern, '', $formatCode);
// custom date formats contain specific characters to represent the date:
// e - yy - m - d - h - s
@ -249,10 +293,10 @@ class StyleHelper
$hasFoundDateFormatCharacter = false;
foreach ($dateFormatCharacters as $dateFormatCharacter) {
// character not preceded by "\"
$pattern = '/(?<!\\\)' . $dateFormatCharacter . '/';
// character not preceded by "\" (case insensitive)
$pattern = '/(?<!\\\)' . $dateFormatCharacter . '/i';
if (preg_match($pattern, $customNumberFormat)) {
if (preg_match($pattern, $formatCode)) {
$hasFoundDateFormatCharacter = true;
break;
}
@ -266,21 +310,21 @@ class StyleHelper
* NOTE: It is assumed that the style DOES have a number format associated to it.
*
* @param int $styleId Zero-based style ID
* @return string The number format associated with the given style
* @return string The number format code associated with the given style
*/
public function getNumberFormat($styleId)
public function getNumberFormatCode($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
$styleAttributes = $stylesAttributes[$styleId];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
$numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
$numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
} else {
$customNumberFormats = $this->getCustomNumberFormats();
$numberFormat = $customNumberFormats[$numFmtId];
$numberFormatCode = $customNumberFormats[$numFmtId];
}
return $numberFormat;
return $numberFormatCode;
}
}

View File

@ -14,9 +14,6 @@ use Box\Spout\Reader\XLSX\Helper\SharedStringsHelper;
*/
class Reader extends AbstractReader
{
/** @var string Temporary folder where the temporary files will be created */
protected $tempFolder;
/** @var \ZipArchive */
protected $zip;
@ -27,13 +24,26 @@ class Reader extends AbstractReader
protected $sheetIterator;
/**
* Returns the reader's current options
*
* @return ReaderOptions
*/
protected function getOptions()
{
if (!isset($this->options)) {
$this->options = new ReaderOptions();
}
return $this->options;
}
/**
* @param string $tempFolder Temporary folder where the temporary files will be created
* @return Reader
*/
public function setTempFolder($tempFolder)
{
$this->tempFolder = $tempFolder;
$this->getOptions()->setTempFolder($tempFolder);
return $this;
}
@ -62,14 +72,14 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->tempFolder);
$this->sharedStringsHelper = new SharedStringsHelper($filePath, $this->getOptions()->getTempFolder());
if ($this->sharedStringsHelper->hasSharedStrings()) {
// Extracts all the strings from the sheets for easy access in the future
$this->sharedStringsHelper->extractSharedStrings();
}
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
$this->sheetIterator = new SheetIterator($filePath, $this->getOptions(), $this->sharedStringsHelper, $this->globalFunctionsHelper);
} else {
throw new IOException("Could not open $filePath for reading.");
}
@ -80,7 +90,7 @@ class Reader extends AbstractReader
*
* @return SheetIterator To iterate over sheets
*/
public function getConcreteSheetIterator()
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}

View File

@ -0,0 +1,33 @@
<?php
namespace Box\Spout\Reader\XLSX;
/**
* Class ReaderOptions
* This class is used to customize the reader's behavior
*
* @package Box\Spout\Reader\XLSX
*/
class ReaderOptions extends \Box\Spout\Reader\Common\ReaderOptions
{
/** @var string|null Temporary folder where the temporary files will be created */
protected $tempFolder = null;
/**
* @return string|null Temporary folder where the temporary files will be created
*/
public function getTempFolder()
{
return $this->tempFolder;
}
/**
* @param string|null $tempFolder Temporary folder where the temporary files will be created
* @return ReaderOptions
*/
public function setTempFolder($tempFolder)
{
$this->tempFolder = $tempFolder;
return $this;
}
}

View File

@ -9,6 +9,7 @@ use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Helper\CellHelper;
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
use Box\Spout\Reader\Common\XMLProcessor;
/**
* Class RowIterator
@ -26,6 +27,7 @@ class RowIterator implements IteratorInterface
/** Definition of XML attributes used to parse data */
const XML_ATTRIBUTE_REF = 'ref';
const XML_ATTRIBUTE_SPANS = 'spans';
const XML_ATTRIBUTE_ROW_INDEX = 'r';
const XML_ATTRIBUTE_CELL_INDEX = 'r';
/** @var string Path of the XLSX file being read */
@ -37,15 +39,24 @@ class RowIterator implements IteratorInterface
/** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
protected $xmlProcessor;
/** @var Helper\CellValueFormatter Helper to format cell values */
protected $cellValueFormatter;
/** @var Helper\StyleHelper $styleHelper Helper to work with styles */
protected $styleHelper;
/** @var int Number of read rows */
/**
* TODO: This variable can be deleted when row indices get preserved
* @var int Number of read rows
*/
protected $numReadRows = 0;
/** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
protected $currentlyProcessedRowData = [];
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
protected $rowDataBuffer = null;
@ -55,16 +66,25 @@ class RowIterator implements IteratorInterface
/** @var int The number of columns the sheet has (0 meaning undefined) */
protected $numColumns = 0;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows;
/** @var int Last row index processed (one-based) */
protected $lastRowIndexProcessed = 0;
/** @var int Row index to be processed next (one-based) */
protected $nextRowIndexToBeProcessed = 0;
/** @var int Last column index processed (zero-based) */
protected $lastColumnIndexProcessed = -1;
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper)
{
$this->filePath = $filePath;
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
@ -72,7 +92,17 @@ class RowIterator implements IteratorInterface
$this->xmlReader = new XMLReader();
$this->styleHelper = new StyleHelper($filePath);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates());
$this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
// Register all callbacks to process different nodes when reading the XML file
$this->xmlProcessor = new XMLProcessor($this->xmlReader);
$this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
}
/**
@ -98,12 +128,13 @@ class RowIterator implements IteratorInterface
{
$this->xmlReader->close();
$sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
if ($this->xmlReader->open($sheetDataFilePath) === false) {
if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
}
$this->numReadRows = 0;
$this->lastRowIndexProcessed = 0;
$this->nextRowIndexToBeProcessed = 0;
$this->rowDataBuffer = null;
$this->hasReachedEndOfFile = false;
$this->numColumns = 0;
@ -115,7 +146,7 @@ class RowIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{
@ -123,7 +154,7 @@ class RowIterator implements IteratorInterface
}
/**
* Move forward to next element. Empty rows will be skipped.
* Move forward to next element. Reads data describing the next unprocessed row.
* @link http://php.net/manual/en/iterator.next.php
*
* @return void
@ -132,69 +163,165 @@ class RowIterator implements IteratorInterface
*/
public function next()
{
$rowData = [];
$this->nextRowIndexToBeProcessed++;
if ($this->doesNeedDataForNextRowToBeProcessed()) {
$this->readDataForNextRow();
}
}
/**
* Returns whether we need data for the next row to be processed.
* We don't need to read data if:
* we have already read at least one row
* AND
* we need to preserve empty rows
* AND
* the last row that was read is not the row that need to be processed
* (i.e. if we need to return empty rows)
*
* @return bool Whether we need data for the next row to be processed.
*/
protected function doesNeedDataForNextRowToBeProcessed()
{
$hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
return (
!$hasReadAtLeastOneRow ||
!$this->shouldPreserveEmptyRows ||
$this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
);
}
/**
* @return void
* @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
*/
protected function readDataForNextRow()
{
$this->currentlyProcessedRowData = [];
try {
while ($this->xmlReader->read()) {
if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
// Read dimensions of the sheet
$dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
$lastCellIndex = $matches[1];
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
}
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
// Start of the row description
// Reset index of the last processed column
$this->lastColumnIndexProcessed = -1;
// Read spans info if present
$numberOfColumnsForRow = $this->numColumns;
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
if ($spans) {
list(, $numberOfColumnsForRow) = explode(':', $spans);
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
}
$rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentColumnIndex = $this->getCellIndex($this->xmlReader);
$node = $this->xmlReader->expand();
$rowData[$currentColumnIndex] = $this->getCellValue($node);
$this->lastColumnIndexProcessed = $currentColumnIndex;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
// If needed, we fill the empty cells
$rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
$this->numReadRows++;
break;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
// The closing "</worksheet>" marks the end of the file
$this->hasReachedEndOfFile = true;
break;
}
}
$this->xmlProcessor->readUntilStopped();
} catch (XMLProcessingException $exception) {
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
}
$this->rowDataBuffer = $rowData;
$this->rowDataBuffer = $this->currentlyProcessedRowData;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" tag
* @return int
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
* @return int A return code that indicates what action should the processor take next
*/
protected function processDimensionStartingNode($xmlReader)
{
// Read dimensions of the sheet
$dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
}
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
* @return int A return code that indicates what action should the processor take next
*/
protected function processRowStartingNode($xmlReader)
{
// Reset index of the last processed column
$this->lastColumnIndexProcessed = -1;
// Mark the last processed row as the one currently being read
$this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
// Read spans info if present
$numberOfColumnsForRow = $this->numColumns;
$spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
if ($spans) {
list(, $numberOfColumnsForRow) = explode(':', $spans);
$numberOfColumnsForRow = intval($numberOfColumnsForRow);
}
$this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
* @return int A return code that indicates what action should the processor take next
*/
protected function processCellStartingNode($xmlReader)
{
$currentColumnIndex = $this->getColumnIndex($xmlReader);
// NOTE: expand() will automatically decode all XML entities of the child nodes
$node = $xmlReader->expand();
$this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
$this->lastColumnIndexProcessed = $currentColumnIndex;
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @return int A return code that indicates what action should the processor take next
*/
protected function processRowEndingNode()
{
// if the fetched row is empty and we don't want to preserve it..,
if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
// ... skip it
return XMLProcessor::PROCESSING_CONTINUE;
}
$this->numReadRows++;
// If needed, we fill the empty cells
if ($this->numColumns === 0) {
$this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
}
// at this point, we have all the data we need for the row
// so that we can populate the buffer
return XMLProcessor::PROCESSING_STOP;
}
/**
* @return int A return code that indicates what action should the processor take next
*/
protected function processWorksheetEndingNode()
{
// The closing "</worksheet>" marks the end of the file
$this->hasReachedEndOfFile = true;
return XMLProcessor::PROCESSING_STOP;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
* @return int Row index
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*/
protected function getCellIndex($xmlReader)
protected function getRowIndex($xmlReader)
{
// Get "r" attribute if present (from something like <row r="3"...>
$currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
return ($currentRowIndex !== null) ?
intval($currentRowIndex) :
$this->lastRowIndexProcessed + 1;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
* @return int Column index
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*/
protected function getColumnIndex($xmlReader)
{
// Get "r" attribute if present (from something like <c r="A1"...>
$currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
@ -216,25 +343,53 @@ class RowIterator implements IteratorInterface
}
/**
* Return the current element, from the buffer.
* @param array $rowData
* @return bool Whether the given row is empty
*/
protected function isEmptyRow($rowData)
{
return (count($rowData) === 1 && key($rowData) === '');
}
/**
* Return the current element, either an empty row or from the buffer.
* @link http://php.net/manual/en/iterator.current.php
*
* @return array|null
*/
public function current()
{
return $this->rowDataBuffer;
$rowDataForRowToBeProcessed = $this->rowDataBuffer;
if ($this->shouldPreserveEmptyRows) {
// when we need to preserve empty rows, we will either return
// an empty row or the last row read. This depends whether the
// index of last row that was read matches the index of the last
// row whose value should be returned.
if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
// return empty row if mismatch between last processed row
// and the row that needs to be returned
$rowDataForRowToBeProcessed = [''];
}
}
return $rowDataForRowToBeProcessed;
}
/**
* Return the key of the current element
* Return the key of the current element. Here, the row index.
* @link http://php.net/manual/en/iterator.key.php
*
* @return int
*/
public function key()
{
return $this->numReadRows;
// TODO: This should return $this->nextRowIndexToBeProcessed
// but to avoid a breaking change, the return value for
// this function has been kept as the number of rows read.
return $this->shouldPreserveEmptyRows ?
$this->nextRowIndexToBeProcessed :
$this->numReadRows;
}

View File

@ -21,19 +21,24 @@ class Sheet implements SheetInterface
/** @var string Name of the sheet */
protected $name;
/** @var bool Whether the sheet was the active one */
protected $isActive;
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
* @param Helper\SharedStringsHelper Helper to work with shared strings
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
public function __construct($filePath, $sheetDataXMLFilePath, $sheetIndex, $sheetName, $isSheetActive, $options, $sharedStringsHelper)
{
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper);
$this->index = $sheetIndex;
$this->name = $sheetName;
$this->isActive = $isSheetActive;
}
/**
@ -62,4 +67,13 @@ class Sheet implements SheetInterface
{
return $this->name;
}
/**
* @api
* @return bool Whether the sheet was defined as active
*/
public function isActive()
{
return $this->isActive;
}
}

View File

@ -22,15 +22,15 @@ class SheetIterator implements IteratorInterface
/**
* @param string $filePath Path of the file to be read
* @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
public function __construct($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper)
{
// Fetch all available sheets
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
$sheetHelper = new SheetHelper($filePath, $options, $sharedStringsHelper, $globalFunctionsHelper);
$this->sheets = $sheetHelper->getSheets();
if (count($this->sheets) === 0) {
@ -53,7 +53,7 @@ class SheetIterator implements IteratorInterface
* Checks if current position is valid
* @link http://php.net/manual/en/iterator.valid.php
*
* @return boolean
* @return bool
*/
public function valid()
{

View File

@ -4,6 +4,8 @@ namespace Box\Spout\Writer;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Exception\InvalidArgumentException;
use Box\Spout\Common\Exception\SpoutException;
use Box\Spout\Common\Helper\FileSystemHelper;
use Box\Spout\Writer\Exception\WriterAlreadyOpenedException;
use Box\Spout\Writer\Exception\WriterNotOpenedException;
use Box\Spout\Writer\Style\StyleBuilder;
@ -199,13 +201,23 @@ abstract class AbstractWriter implements WriterInterface
* @return AbstractWriter
* @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If this function is called before opening the writer
* @throws \Box\Spout\Common\Exception\IOException If unable to write data
* @throws \Box\Spout\Common\Exception\SpoutException If anything else goes wrong while writing data
*/
public function addRow(array $dataRow)
{
if ($this->isWriterOpened) {
// empty $dataRow should not add an empty line
if (!empty($dataRow)) {
$this->addRowToWriter($dataRow, $this->rowStyle);
try {
$this->addRowToWriter($dataRow, $this->rowStyle);
} catch (SpoutException $e) {
// if an exception occurs while writing data,
// close the writer and remove all files created so far.
$this->closeAndAttemptToCleanupAllFiles();
// re-throw the exception to alert developers of the error
throw $e;
}
}
} else {
throw new WriterNotOpenedException('The writer needs to be opened before adding row.');
@ -338,6 +350,10 @@ abstract class AbstractWriter implements WriterInterface
*/
public function close()
{
if (!$this->isWriterOpened) {
return;
}
$this->closeWriter();
if (is_resource($this->filePointer)) {
@ -346,5 +362,23 @@ abstract class AbstractWriter implements WriterInterface
$this->isWriterOpened = false;
}
}
/**
* Closes the writer and attempts to cleanup all files that were
* created during the writing process (temp files & final file).
*
* @return void
*/
private function closeAndAttemptToCleanupAllFiles()
{
// close the writer, which should remove all temp files
$this->close();
// remove output file if it was created
if ($this->globalFunctionsHelper->file_exists($this->outputFilePath)) {
$outputFolderPath = dirname($this->outputFilePath);
$fileSystemHelper = new FileSystemHelper($outputFolderPath);
$fileSystemHelper->deleteFile($this->outputFilePath);
}
}
}

View File

@ -122,7 +122,7 @@ abstract class AbstractStyleHelper
protected function applyWrapTextIfCellContainsNewLine($style, $dataRow)
{
// if the "wrap text" option is already set, no-op
if ($style->shouldWrapText()) {
if ($style->hasSetWrapText()) {
return $style;
}

View File

@ -46,6 +46,15 @@ class CellHelper
return self::$columnIndexToCellIndexCache[$originalColumnIndex];
}
/**
* @param $value
* @return bool Whether the given value is considered "empty"
*/
public static function isEmpty($value)
{
return ($value === null || $value === '');
}
/**
* @param $value
* @return bool Whether the given value is a non empty string

View File

@ -16,6 +16,9 @@ abstract class AbstractWorkbook implements WorkbookInterface
/** @var bool Whether new sheets should be automatically created when the max rows limit per sheet is reached */
protected $shouldCreateNewSheetsAutomatically;
/** @var string Timestamp based unique ID identifying the workbook */
protected $internalId;
/** @var WorksheetInterface[] Array containing the workbook's sheets */
protected $worksheets = [];
@ -30,6 +33,7 @@ abstract class AbstractWorkbook implements WorkbookInterface
public function __construct($shouldCreateNewSheetsAutomatically, $defaultRowStyle)
{
$this->shouldCreateNewSheetsAutomatically = $shouldCreateNewSheetsAutomatically;
$this->internalId = uniqid();
}
/**

View File

@ -7,7 +7,7 @@ use Box\Spout\Writer\Exception\InvalidSheetNameException;
/**
* Class Sheet
* External representation of a worksheet within a ODS file
* External representation of a worksheet
*
* @package Box\Spout\Writer\Common
*/
@ -21,12 +21,15 @@ class Sheet
/** @var array Invalid characters that cannot be contained in the sheet name */
private static $INVALID_CHARACTERS_IN_SHEET_NAME = ['\\', '/', '?', '*', ':', '[', ']'];
/** @var array Associative array [SHEET_INDEX] => [SHEET_NAME] keeping track of sheets' name to enforce uniqueness */
/** @var array Associative array [WORKBOOK_ID] => [[SHEET_INDEX] => [SHEET_NAME]] keeping track of sheets' name to enforce uniqueness per workbook */
protected static $SHEETS_NAME_USED = [];
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index;
/** @var string ID of the sheet's associated workbook. Used to restrict sheet name uniqueness enforcement to a single workbook */
protected $associatedWorkbookId;
/** @var string Name of the sheet */
protected $name;
@ -35,10 +38,16 @@ class Sheet
/**
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $associatedWorkbookId ID of the sheet's associated workbook
*/
public function __construct($sheetIndex)
public function __construct($sheetIndex, $associatedWorkbookId)
{
$this->index = $sheetIndex;
$this->associatedWorkbookId = $associatedWorkbookId;
if (!isset(self::$SHEETS_NAME_USED[$associatedWorkbookId])) {
self::$SHEETS_NAME_USED[$associatedWorkbookId] = [];
}
$this->stringHelper = new StringHelper();
$this->setName(self::DEFAULT_SHEET_NAME_PREFIX . ($sheetIndex + 1));
}
@ -75,43 +84,58 @@ class Sheet
*/
public function setName($name)
{
if (!$this->isNameValid($name)) {
$errorMessage = "The sheet's name is invalid. It did not meet at least one of these requirements:\n";
$errorMessage .= " - It should not be blank\n";
$errorMessage .= " - It should not exceed 31 characters\n";
$errorMessage .= " - It should not contain these characters: \\ / ? * : [ or ]\n";
$errorMessage .= " - It should be unique";
throw new InvalidSheetNameException($errorMessage);
}
$this->throwIfNameIsInvalid($name);
$this->name = $name;
self::$SHEETS_NAME_USED[$this->index] = $name;
self::$SHEETS_NAME_USED[$this->associatedWorkbookId][$this->index] = $name;
return $this;
}
/**
* Returns whether the given sheet's name is valid.
* Throws an exception if the given sheet's name is not valid.
* @see Sheet::setName for validity rules.
*
* @param string $name
* @return bool TRUE if the name is valid, FALSE otherwise.
* @return void
* @throws \Box\Spout\Writer\Exception\InvalidSheetNameException If the sheet's name is invalid.
*/
protected function isNameValid($name)
protected function throwIfNameIsInvalid($name)
{
if (!is_string($name)) {
return false;
$actualType = gettype($name);
$errorMessage = "The sheet's name is invalid. It must be a string ($actualType given).";
throw new InvalidSheetNameException($errorMessage);
}
$failedRequirements = [];
$nameLength = $this->stringHelper->getStringLength($name);
return (
$nameLength > 0 &&
$nameLength <= self::MAX_LENGTH_SHEET_NAME &&
!$this->doesContainInvalidCharacters($name) &&
$this->isNameUnique($name) &&
!$this->doesStartOrEndWithSingleQuote($name)
);
if (!$this->isNameUnique($name)) {
$failedRequirements[] = 'It should be unique';
} else {
if ($nameLength === 0) {
$failedRequirements[] = 'It should not be blank';
} else {
if ($nameLength > self::MAX_LENGTH_SHEET_NAME) {
$failedRequirements[] = 'It should not exceed 31 characters';
}
if ($this->doesContainInvalidCharacters($name)) {
$failedRequirements[] = 'It should not contain these characters: \\ / ? * : [ or ]';
}
if ($this->doesStartOrEndWithSingleQuote($name)) {
$failedRequirements[] = 'It should not start or end with a single quote';
}
}
}
if (count($failedRequirements) !== 0) {
$errorMessage = "The sheet's name (\"$name\") is invalid. It did not respect these rules:\n - ";
$errorMessage .= implode("\n - ", $failedRequirements);
throw new InvalidSheetNameException($errorMessage);
}
}
/**
@ -148,7 +172,7 @@ class Sheet
*/
protected function isNameUnique($name)
{
foreach (self::$SHEETS_NAME_USED as $sheetIndex => $sheetName) {
foreach (self::$SHEETS_NAME_USED[$this->associatedWorkbookId] as $sheetIndex => $sheetName) {
if ($sheetIndex !== $this->index && $sheetName === $name) {
return false;
}

View File

@ -75,7 +75,7 @@ class FileSystemHelper extends \Box\Spout\Common\Helper\FileSystemHelper
*/
protected function createRootFolder()
{
$this->rootFolder = $this->createFolder($this->baseFolderPath, uniqid('ods'));
$this->rootFolder = $this->createFolder($this->baseFolderRealPath, uniqid('ods'));
return $this;
}

View File

@ -214,64 +214,143 @@ EOD;
*/
protected function getStyleSectionContent($style)
{
$defaultStyle = $this->getDefaultStyle();
$styleIndex = $style->getId() + 1; // 1-based
$content = '<style:style style:data-style-name="N0" style:family="table-cell" style:name="ce' . $styleIndex . '" style:parent-style-name="Default">';
if ($style->shouldApplyFont()) {
$content .= '<style:text-properties';
$fontColor = $style->getFontColor();
if ($fontColor !== $defaultStyle->getFontColor()) {
$content .= ' fo:color="#' . $fontColor . '"';
}
$fontName = $style->getFontName();
if ($fontName !== $defaultStyle->getFontName()) {
$content .= ' style:font-name="' . $fontName . '" style:font-name-asian="' . $fontName . '" style:font-name-complex="' . $fontName . '"';
}
$fontSize = $style->getFontSize();
if ($fontSize !== $defaultStyle->getFontSize()) {
$content .= ' fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt"';
}
if ($style->isFontBold()) {
$content .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"';
}
if ($style->isFontItalic()) {
$content .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"';
}
if ($style->isFontUnderline()) {
$content .= ' style:text-underline-style="solid" style:text-underline-type="single"';
}
if ($style->isFontStrikethrough()) {
$content .= ' style:text-line-through-style="solid"';
}
$content .= '/>';
}
if ($style->shouldWrapText()) {
$content .= '<style:table-cell-properties fo:wrap-option="wrap" style:vertical-align="automatic"/>';
}
if ($style->shouldApplyBorder()) {
$borderProperty = '<style:table-cell-properties %s />';
$borders = array_map(function (BorderPart $borderPart) {
return BorderHelper::serializeBorderPart($borderPart);
}, $style->getBorder()->getParts());
$content .= sprintf($borderProperty, implode(' ', $borders));
}
if ($style->shouldApplyBackgroundColor()) {
$content .= sprintf('
<style:table-cell-properties fo:background-color="#%s"/>', $style->getBackgroundColor());
}
$content .= $this->getTextPropertiesSectionContent($style);
$content .= $this->getTableCellPropertiesSectionContent($style);
$content .= '</style:style>';
return $content;
}
/**
* Returns the contents of the "<style:text-properties>" section, inside "<style:style>" section
*
* @param \Box\Spout\Writer\Style\Style $style
* @return string
*/
private function getTextPropertiesSectionContent($style)
{
$content = '';
if ($style->shouldApplyFont()) {
$content .= $this->getFontSectionContent($style);
}
return $content;
}
/**
* Returns the contents of the "<style:text-properties>" section, inside "<style:style>" section
*
* @param \Box\Spout\Writer\Style\Style $style
* @return string
*/
private function getFontSectionContent($style)
{
$defaultStyle = $this->getDefaultStyle();
$content = '<style:text-properties';
$fontColor = $style->getFontColor();
if ($fontColor !== $defaultStyle->getFontColor()) {
$content .= ' fo:color="#' . $fontColor . '"';
}
$fontName = $style->getFontName();
if ($fontName !== $defaultStyle->getFontName()) {
$content .= ' style:font-name="' . $fontName . '" style:font-name-asian="' . $fontName . '" style:font-name-complex="' . $fontName . '"';
}
$fontSize = $style->getFontSize();
if ($fontSize !== $defaultStyle->getFontSize()) {
$content .= ' fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt"';
}
if ($style->isFontBold()) {
$content .= ' fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"';
}
if ($style->isFontItalic()) {
$content .= ' fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"';
}
if ($style->isFontUnderline()) {
$content .= ' style:text-underline-style="solid" style:text-underline-type="single"';
}
if ($style->isFontStrikethrough()) {
$content .= ' style:text-line-through-style="solid"';
}
$content .= '/>';
return $content;
}
/**
* Returns the contents of the "<style:table-cell-properties>" section, inside "<style:style>" section
*
* @param \Box\Spout\Writer\Style\Style $style
* @return string
*/
private function getTableCellPropertiesSectionContent($style)
{
$content = '';
if ($style->shouldWrapText()) {
$content .= $this->getWrapTextXMLContent();
}
if ($style->shouldApplyBorder()) {
$content .= $this->getBorderXMLContent($style);
}
if ($style->shouldApplyBackgroundColor()) {
$content .= $this->getBackgroundColorXMLContent($style);
}
return $content;
}
/**
* Returns the contents of the wrap text definition for the "<style:table-cell-properties>" section
*
* @return string
*/
private function getWrapTextXMLContent()
{
return '<style:table-cell-properties fo:wrap-option="wrap" style:vertical-align="automatic"/>';
}
/**
* Returns the contents of the borders definition for the "<style:table-cell-properties>" section
*
* @param \Box\Spout\Writer\Style\Style $style
* @return string
*/
private function getBorderXMLContent($style)
{
$borderProperty = '<style:table-cell-properties %s />';
$borders = array_map(function (BorderPart $borderPart) {
return BorderHelper::serializeBorderPart($borderPart);
}, $style->getBorder()->getParts());
return sprintf($borderProperty, implode(' ', $borders));
}
/**
* Returns the contents of the background color definition for the "<style:table-cell-properties>" section
*
* @param \Box\Spout\Writer\Style\Style $style
* @return string
*/
private function getBackgroundColorXMLContent($style)
{
return sprintf(
'<style:table-cell-properties fo:background-color="#%s"/>',
$style->getBackgroundColor()
);
}
}

View File

@ -69,7 +69,7 @@ class Workbook extends AbstractWorkbook
public function addNewSheet()
{
$newSheetIndex = count($this->worksheets);
$sheet = new Sheet($newSheetIndex);
$sheet = new Sheet($newSheetIndex, $this->internalId);
$sheetsContentTempFolder = $this->fileSystemHelper->getSheetsContentTempFolder();
$worksheet = new Worksheet($sheet, $sheetsContentTempFolder);

View File

@ -40,7 +40,7 @@ class Worksheet implements WorksheetInterface
/**
* @param \Box\Spout\Writer\Common\Sheet $externalSheet The associated "external" sheet
* @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored
* @param string $worksheetFilesFolder Temporary folder where the files to create the ODS will be stored
* @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing
*/
public function __construct($externalSheet, $worksheetFilesFolder)

View File

@ -20,7 +20,7 @@ class Writer extends AbstractMultiSheetsWriter
/** @var string Temporary folder where the files to create the ODS will be stored */
protected $tempFolder;
/** @var Internal\Workbook The workbook for the XLSX file */
/** @var Internal\Workbook The workbook for the ODS file */
protected $book;
/**

View File

@ -106,6 +106,7 @@ class Style
/**
* @param Border $border
* @return Style
*/
public function setBorder(Border $border)
{
@ -115,7 +116,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function shouldApplyBorder()
{
@ -123,7 +124,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function isFontBold()
{
@ -142,7 +143,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function isFontItalic()
{
@ -161,7 +162,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function isFontUnderline()
{
@ -180,7 +181,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function isFontStrikethrough()
{
@ -261,7 +262,7 @@ class Style
}
/**
* @return boolean
* @return bool
*/
public function shouldWrapText()
{
@ -269,15 +270,24 @@ class Style
}
/**
* @param bool|void $shouldWrap Should the text be wrapped
* @return Style
*/
public function setShouldWrapText()
public function setShouldWrapText($shouldWrap = true)
{
$this->shouldWrapText = true;
$this->shouldWrapText = $shouldWrap;
$this->hasSetWrapText = true;
return $this;
}
/**
* @return bool
*/
public function hasSetWrapText()
{
return $this->hasSetWrapText;
}
/**
* @return bool Whether specific font properties should be applied
*/
@ -350,37 +360,67 @@ class Style
{
$mergedStyle = clone $this;
if (!$this->hasSetFontBold && $baseStyle->isFontBold()) {
$mergedStyle->setFontBold();
}
if (!$this->hasSetFontItalic && $baseStyle->isFontItalic()) {
$mergedStyle->setFontItalic();
}
if (!$this->hasSetFontUnderline && $baseStyle->isFontUnderline()) {
$mergedStyle->setFontUnderline();
}
if (!$this->hasSetFontStrikethrough && $baseStyle->isFontStrikethrough()) {
$mergedStyle->setFontStrikethrough();
}
if (!$this->hasSetFontSize && $baseStyle->getFontSize() !== self::DEFAULT_FONT_SIZE) {
$mergedStyle->setFontSize($baseStyle->getFontSize());
}
if (!$this->hasSetFontColor && $baseStyle->getFontColor() !== self::DEFAULT_FONT_COLOR) {
$mergedStyle->setFontColor($baseStyle->getFontColor());
}
if (!$this->hasSetFontName && $baseStyle->getFontName() !== self::DEFAULT_FONT_NAME) {
$mergedStyle->setFontName($baseStyle->getFontName());
}
if (!$this->hasSetWrapText && $baseStyle->shouldWrapText()) {
$mergedStyle->setShouldWrapText();
}
if (!$this->getBorder() && $baseStyle->shouldApplyBorder()) {
$mergedStyle->setBorder($baseStyle->getBorder());
}
if (!$this->hasSetBackgroundColor && $baseStyle->shouldApplyBackgroundColor()) {
$mergedStyle->setBackgroundColor($baseStyle->getBackgroundColor());
}
$this->mergeFontStyles($mergedStyle, $baseStyle);
$this->mergeOtherFontProperties($mergedStyle, $baseStyle);
$this->mergeCellProperties($mergedStyle, $baseStyle);
return $mergedStyle;
}
/**
* @param Style $styleToUpdate (passed as reference)
* @param Style $baseStyle
* @return void
*/
private function mergeFontStyles($styleToUpdate, $baseStyle)
{
if (!$this->hasSetFontBold && $baseStyle->isFontBold()) {
$styleToUpdate->setFontBold();
}
if (!$this->hasSetFontItalic && $baseStyle->isFontItalic()) {
$styleToUpdate->setFontItalic();
}
if (!$this->hasSetFontUnderline && $baseStyle->isFontUnderline()) {
$styleToUpdate->setFontUnderline();
}
if (!$this->hasSetFontStrikethrough && $baseStyle->isFontStrikethrough()) {
$styleToUpdate->setFontStrikethrough();
}
}
/**
* @param Style $styleToUpdate Style to update (passed as reference)
* @param Style $baseStyle
* @return void
*/
private function mergeOtherFontProperties($styleToUpdate, $baseStyle)
{
if (!$this->hasSetFontSize && $baseStyle->getFontSize() !== self::DEFAULT_FONT_SIZE) {
$styleToUpdate->setFontSize($baseStyle->getFontSize());
}
if (!$this->hasSetFontColor && $baseStyle->getFontColor() !== self::DEFAULT_FONT_COLOR) {
$styleToUpdate->setFontColor($baseStyle->getFontColor());
}
if (!$this->hasSetFontName && $baseStyle->getFontName() !== self::DEFAULT_FONT_NAME) {
$styleToUpdate->setFontName($baseStyle->getFontName());
}
}
/**
* @param Style $styleToUpdate Style to update (passed as reference)
* @param Style $baseStyle
* @return void
*/
private function mergeCellProperties($styleToUpdate, $baseStyle)
{
if (!$this->hasSetWrapText && $baseStyle->shouldWrapText()) {
$styleToUpdate->setShouldWrapText();
}
if (!$this->getBorder() && $baseStyle->shouldApplyBorder()) {
$styleToUpdate->setBorder($baseStyle->getBorder());
}
if (!$this->hasSetBackgroundColor && $baseStyle->shouldApplyBackgroundColor()) {
$styleToUpdate->setBackgroundColor($baseStyle->getBackgroundColor());
}
}
}

View File

@ -109,15 +109,15 @@ class StyleBuilder
}
/**
* Makes the text wrap in the cell if it's too long or
* on multiple lines.
* Makes the text wrap in the cell if requested
*
* @api
* @param bool $shouldWrap Should the text be wrapped
* @return StyleBuilder
*/
public function setShouldWrapText()
public function setShouldWrapText($shouldWrap = true)
{
$this->style->setShouldWrapText();
$this->style->setShouldWrapText($shouldWrap);
return $this;
}

View File

@ -35,7 +35,7 @@ interface WriterInterface
* @param array $dataRow Array containing data to be streamed.
* Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @return WriterInterface
* @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If the writer has not been opened yetthe writer
* @throws \Box\Spout\Writer\Exception\WriterNotOpenedException If the writer has not been opened yet
* @throws \Box\Spout\Common\Exception\IOException If unable to write data
*/
public function addRow(array $dataRow);

View File

@ -94,7 +94,7 @@ class FileSystemHelper extends \Box\Spout\Common\Helper\FileSystemHelper
*/
protected function createRootFolder()
{
$this->rootFolder = $this->createFolder($this->baseFolderPath, uniqid('xlsx', true));
$this->rootFolder = $this->createFolder($this->baseFolderRealPath, uniqid('xlsx', true));
return $this;
}

View File

@ -83,7 +83,7 @@ class Workbook extends AbstractWorkbook
public function addNewSheet()
{
$newSheetIndex = count($this->worksheets);
$sheet = new Sheet($newSheetIndex);
$sheet = new Sheet($newSheetIndex, $this->internalId);
$worksheetFilesFolder = $this->fileSystemHelper->getXlWorksheetsFolder();
$worksheet = new Worksheet($sheet, $worksheetFilesFolder, $this->sharedStringsHelper, $this->styleHelper, $this->shouldUseInlineStrings);

View File

@ -4,6 +4,7 @@ namespace Box\Spout\Writer\XLSX\Internal;
use Box\Spout\Common\Exception\InvalidArgumentException;
use Box\Spout\Common\Exception\IOException;
use Box\Spout\Common\Helper\StringHelper;
use Box\Spout\Writer\Common\Helper\CellHelper;
use Box\Spout\Writer\Common\Internal\WorksheetInterface;
@ -16,6 +17,14 @@ use Box\Spout\Writer\Common\Internal\WorksheetInterface;
*/
class Worksheet implements WorksheetInterface
{
/**
* Maximum number of characters a cell can contain
* @see https://support.office.com/en-us/article/Excel-specifications-and-limits-16c69c74-3d6a-4aaf-ba35-e6eb276e8eaa [Excel 2007]
* @see https://support.office.com/en-us/article/Excel-specifications-and-limits-1672b34d-7043-467e-8e27-269d656771c3 [Excel 2010]
* @see https://support.office.com/en-us/article/Excel-specifications-and-limits-ca36e2dc-1f09-4620-b726-67c00b05040f [Excel 2013/2016]
*/
const MAX_CHARACTERS_PER_CELL = 32767;
const SHEET_XML_FILE_HEADER = <<<EOD
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
@ -39,6 +48,9 @@ EOD;
/** @var \Box\Spout\Common\Escaper\XLSX Strings escaper */
protected $stringsEscaper;
/** @var \Box\Spout\Common\Helper\StringHelper String helper */
protected $stringHelper;
/** @var Resource Pointer to the sheet data file (e.g. xl/worksheets/sheet1.xml) */
protected $sheetFilePointer;
@ -62,6 +74,7 @@ EOD;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
$this->stringHelper = new StringHelper();
$this->worksheetFilePath = $worksheetFilesFolder . '/' . strtolower($this->externalSheet->getName()) . '.xml';
$this->startSheet();
@ -131,6 +144,39 @@ EOD;
* @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported
*/
public function addRow($dataRow, $style)
{
if (!$this->isEmptyRow($dataRow)) {
$this->addNonEmptyRow($dataRow, $style);
}
$this->lastWrittenRowIndex++;
}
/**
* Returns whether the given row is empty
*
* @param array $dataRow Array containing data to be written. Cannot be empty.
* Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @return bool Whether the given row is empty
*/
protected function isEmptyRow($dataRow)
{
$numCells = count($dataRow);
// using "reset()" instead of "$dataRow[0]" because $dataRow can be an associative array
return ($numCells === 1 && CellHelper::isEmpty(reset($dataRow)));
}
/**
* Adds non empty row to the worksheet.
*
* @param array $dataRow Array containing data to be written. Cannot be empty.
* Example $dataRow = ['data1', 1234, null, '', 'data5'];
* @param \Box\Spout\Writer\Style\Style $style Style to be applied to the row. NULL means use default style.
* @return void
* @throws \Box\Spout\Common\Exception\IOException If the data cannot be written
* @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported
*/
protected function addNonEmptyRow($dataRow, $style)
{
$cellNumber = 0;
$rowIndex = $this->lastWrittenRowIndex + 1;
@ -149,9 +195,6 @@ EOD;
if ($wasWriteSuccessful === false) {
throw new IOException("Unable to write data in {$this->worksheetFilePath}");
}
// only update the count if the write worked
$this->lastWrittenRowIndex++;
}
/**
@ -162,21 +205,16 @@ EOD;
* @param mixed $cellValue
* @param int $styleId
* @return string
* @throws InvalidArgumentException
* @throws InvalidArgumentException If the given value cannot be processed
*/
private function getCellXML($rowIndex, $cellNumber, $cellValue, $styleId)
protected function getCellXML($rowIndex, $cellNumber, $cellValue, $styleId)
{
$columnIndex = CellHelper::getCellIndexFromColumnIndex($cellNumber);
$cellXML = '<c r="' . $columnIndex . $rowIndex . '"';
$cellXML .= ' s="' . $styleId . '"';
if (CellHelper::isNonEmptyString($cellValue)) {
if ($this->shouldUseInlineStrings) {
$cellXML .= ' t="inlineStr"><is><t>' . $this->stringsEscaper->escape($cellValue) . '</t></is></c>';
} else {
$sharedStringId = $this->sharedStringsHelper->writeString($cellValue);
$cellXML .= ' t="s"><v>' . $sharedStringId . '</v></c>';
}
$cellXML .= $this->getCellXMLFragmentForNonEmptyString($cellValue);
} else if (CellHelper::isBoolean($cellValue)) {
$cellXML .= ' t="b"><v>' . intval($cellValue) . '</v></c>';
} else if (CellHelper::isNumeric($cellValue)) {
@ -196,6 +234,29 @@ EOD;
return $cellXML;
}
/**
* Returns the XML fragment for a cell containing a non empty string
*
* @param string $cellValue The cell value
* @return string The XML fragment representing the cell
* @throws InvalidArgumentException If the string exceeds the maximum number of characters allowed per cell
*/
protected function getCellXMLFragmentForNonEmptyString($cellValue)
{
if ($this->stringHelper->getStringLength($cellValue) > self::MAX_CHARACTERS_PER_CELL) {
throw new InvalidArgumentException('Trying to add a value that exceeds the maximum number of characters allowed in a cell (32,767)');
}
if ($this->shouldUseInlineStrings) {
$cellXMLFragment = ' t="inlineStr"><is><t>' . $this->stringsEscaper->escape($cellValue) . '</t></is></c>';
} else {
$sharedStringId = $this->sharedStringsHelper->writeString($cellValue);
$cellXMLFragment = ' t="s"><v>' . $sharedStringId . '</v></c>';
}
return $cellXMLFragment;
}
/**
* Closes the worksheet
*

View File

@ -263,7 +263,7 @@
<location>spout</location>
<name>Spout</name>
<license>Apache</license>
<version>2.6.0</version>
<version>2.7.3</version>
<licenseversion>2.0</licenseversion>
</library>
<library>