MDL-56012 lib: Update spout to version 2.6.0

This commit is contained in:
Adrian Greeve 2016-09-20 15:10:53 +08:00
parent ddd8dc0d1b
commit cd198eafd6
43 changed files with 1578 additions and 302 deletions

View File

@ -2,6 +2,8 @@
namespace Box\Spout\Common\Escaper;
use Box\Spout\Common\Singleton;
/**
* Class ODS
* Provides functions to escape and unescape data for ODS files
@ -10,6 +12,8 @@ namespace Box\Spout\Common\Escaper;
*/
class ODS implements EscaperInterface
{
use Singleton;
/**
* Escapes the given string to make it compatible with XLSX
*

View File

@ -2,6 +2,8 @@
namespace Box\Spout\Common\Escaper;
use Box\Spout\Common\Singleton;
/**
* Class XLSX
* Provides functions to escape and unescape data for XLSX files
@ -10,15 +12,25 @@ namespace Box\Spout\Common\Escaper;
*/
class XLSX implements EscaperInterface
{
/** @var string[] Control characters to be escaped */
use Singleton;
/** @var string Regex pattern to detect control characters that need to be escaped */
protected $escapableControlCharactersPattern;
/** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
protected $controlCharactersEscapingMap;
/** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
protected $controlCharactersEscapingReverseMap;
/**
*
* Initializes the singleton instance
*/
public function __construct()
protected function init()
{
$this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
$this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
$this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
}
/**
@ -49,6 +61,20 @@ class XLSX implements EscaperInterface
return $unescapedString;
}
/**
* @return string Regex pattern containing all escapable control characters
*/
protected function getEscapableControlCharactersPattern()
{
// control characters values are from 0 to 1F (hex values) in the ASCII table
// some characters should not be escaped though: "\t", "\r" and "\n".
return '[\x00-\x08' .
// skipping "\t" (0x9) and "\n" (0xA)
'\x0B-\x0C' .
// skipping "\r" (0xD)
'\x0E-\x1F]';
}
/**
* Builds the map containing control characters to be escaped
* mapped to their escaped values.
@ -62,14 +88,14 @@ class XLSX implements EscaperInterface
protected function getControlCharactersEscapingMap()
{
$controlCharactersEscapingMap = [];
$whitelistedControlCharacters = ["\t", "\r", "\n"];
// control characters values are from 0 to 1F (hex values) in the ASCII table
for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) {
if (!in_array(chr($charValue), $whitelistedControlCharacters)) {
for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
$character = chr($charValue);
if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
$charHexValue = dechex($charValue);
$escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
$controlCharactersEscapingMap[$escapedChar] = chr($charValue);
$controlCharactersEscapingMap[$escapedChar] = $character;
}
}
@ -92,7 +118,15 @@ class XLSX implements EscaperInterface
protected function escapeControlCharacters($string)
{
$escapedString = $this->escapeEscapeCharacter($string);
return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString);
// if no control characters
if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
return $escapedString;
}
return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
return $this->controlCharactersEscapingReverseMap[$matches[0]];
}, $escapedString);
}
/**
@ -122,6 +156,7 @@ class XLSX implements EscaperInterface
protected function unescapeControlCharacters($string)
{
$unescapedString = $string;
foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
// only unescape characters that don't contain the escaped escape character for now
$unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);

View File

@ -59,7 +59,7 @@ class EncodingHelper
{
$byteOffsetToSkipBom = 0;
if ($this->hasBom($filePointer, $encoding)) {
if ($this->hasBOM($filePointer, $encoding)) {
$bomUsed = $this->supportedEncodingsWithBom[$encoding];
// we skip the N first bytes

View File

@ -252,6 +252,19 @@ class GlobalFunctionsHelper
header($string);
}
/**
* Wrapper around global function ob_end_clean()
* @see ob_end_clean()
*
* @return void
*/
public function ob_end_clean()
{
if (ob_get_length() > 0) {
ob_end_clean();
}
}
/**
* Wrapper around global function iconv()
* @see iconv()

View File

@ -0,0 +1,41 @@
<?php
namespace Box\Spout\Common;
/**
* Class Singleton
* Defines a class as a singleton.
*
* @package Box\Spout\Common
*/
trait Singleton
{
protected static $instance;
/**
* @return static
*/
final public static function getInstance()
{
return isset(static::$instance)
? static::$instance
: static::$instance = new static;
}
/**
* Singleton constructor.
*/
final private function __construct()
{
$this->init();
}
/**
* Initializes the singleton
* @return void
*/
protected function init() {}
final private function __wakeup() {}
final private function __clone() {}
}

View File

@ -19,6 +19,9 @@ abstract class AbstractReader implements ReaderInterface
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates = false;
/**
* Returns whether stream wrappers are supported
*
@ -49,7 +52,7 @@ abstract class AbstractReader implements ReaderInterface
abstract protected function closeReader();
/**
* @param $globalFunctionsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @return AbstractReader
*/
public function setGlobalFunctionsHelper($globalFunctionsHelper)
@ -58,6 +61,18 @@ abstract class AbstractReader implements ReaderInterface
return $this;
}
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
* @return AbstractReader
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
return $this;
}
/**
* Prepares the reader to read the given file. It also makes sure
* that the file exists and is readable.

View File

@ -57,6 +57,7 @@ class RowIterator implements IteratorInterface
* @param string $fieldDelimiter Character that delimits fields
* @param string $fieldEnclosure Character that enclose fields
* @param string $encoding Encoding of the CSV file to be read
* @param string $endOfLineDelimiter End of line delimiter
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
@ -152,7 +153,7 @@ class RowIterator implements IteratorInterface
*/
protected function getNextUTF8EncodedRow()
{
$encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
if (false === $encodedRowData) {
return false;
}

View File

@ -23,6 +23,8 @@ class CellValueFormatter
/** Definition of XML nodes names used to parse data */
const XML_NODE_P = 'p';
const XML_NODE_S = 'text:s';
const XML_NODE_A = 'text:a';
const XML_NODE_SPAN = 'text:span';
/** Definition of XML attribute used to parse data */
const XML_ATTRIBUTE_TYPE = 'office:value-type';
@ -33,16 +35,21 @@ class CellValueFormatter
const XML_ATTRIBUTE_CURRENCY = 'office:currency';
const XML_ATTRIBUTE_C = 'text:c';
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Common\Escaper\ODS Used to unescape XML data */
protected $escaper;
/**
*
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct()
public function __construct($shouldFormatDates)
{
$this->shouldFormatDates = $shouldFormatDates;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
$this->escaper = \Box\Spout\Common\Escaper\ODS::getInstance();
}
/**
@ -98,6 +105,8 @@ class CellValueFormatter
$spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
$numSpaces = (!empty($spaceAttribute)) ? intval($spaceAttribute) : 1;
$currentPValue .= str_repeat(' ', $numSpaces);
} else if ($childNode->nodeName === self::XML_NODE_A || $childNode->nodeName === self::XML_NODE_SPAN) {
$currentPValue .= $childNode->nodeValue;
}
}
@ -119,6 +128,7 @@ class CellValueFormatter
{
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
$nodeIntValue = intval($nodeValue);
// The "==" is intentionally not a "===" because only the value matters, not the type
$cellValue = ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
return $cellValue;
}
@ -141,15 +151,27 @@ class CellValueFormatter
* Returns the cell Date value from the given node.
*
* @param \DOMNode $node
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatDateCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
// <text:p>05/19/16 04:39 PM</text:p>
// </table:table-cell>
if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "date-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
return new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}
@ -157,15 +179,27 @@ class CellValueFormatter
* Returns the cell Time value from the given node.
*
* @param \DOMNode $node
* @return \DateInterval|null The value associated with the cell or NULL if invalid time value
* @return \DateInterval|string|null The value associated with the cell or NULL if invalid time value
*/
protected function formatTimeCellValue($node)
{
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
// The XML node looks like this:
// <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
// <text:p>01:24:00 PM</text:p>
// </table:table-cell>
if ($this->shouldFormatDates) {
// The date is already formatted in the "p" tag
$nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
return $nodeWithValueAlreadyFormatted->nodeValue;
} else {
// otherwise, get it from the "time-value" attribute
try {
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
return new \DateInterval($nodeValue);
} catch (\Exception $e) {
return null;
}
}
}

View File

@ -42,7 +42,7 @@ class Reader extends AbstractReader
$this->zip = new \ZipArchive();
if ($this->zip->open($filePath) === true) {
$this->sheetIterator = new SheetIterator($filePath);
$this->sheetIterator = new SheetIterator($filePath, $this->shouldFormatDates);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -45,11 +45,12 @@ class RowIterator implements IteratorInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($xmlReader)
public function __construct($xmlReader, $shouldFormatDates)
{
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = new CellValueFormatter();
$this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
}
/**
@ -186,7 +187,7 @@ class RowIterator implements IteratorInterface
/**
* empty() replacement that honours 0 as a valid value
*
* @param $value The cell value
* @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
* @return bool
*/
protected function isEmptyCellValue($value)

View File

@ -27,12 +27,13 @@ class Sheet implements SheetInterface
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($xmlReader, $sheetIndex, $sheetName)
public function __construct($xmlReader, $shouldFormatDates, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($xmlReader);
$this->rowIterator = new RowIterator($xmlReader, $shouldFormatDates);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -15,6 +15,8 @@ use Box\Spout\Reader\Wrapper\XMLReader;
*/
class SheetIterator implements IteratorInterface
{
const CONTENT_XML_FILE_PATH = 'content.xml';
/** Definition of XML nodes name and attribute used to parse sheet data */
const XML_NODE_TABLE = 'table:table';
const XML_ATTRIBUTE_TABLE_NAME = 'table:name';
@ -22,6 +24,9 @@ class SheetIterator implements IteratorInterface
/** @var string $filePath Path of the file to be read */
protected $filePath;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
@ -36,15 +41,17 @@ class SheetIterator implements IteratorInterface
/**
* @param string $filePath Path of the file to be read
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath)
public function __construct($filePath, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->shouldFormatDates = $shouldFormatDates;
$this->xmlReader = new XMLReader();
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\ODS();
$this->escaper = \Box\Spout\Common\Escaper\ODS::getInstance();
}
/**
@ -58,8 +65,8 @@ class SheetIterator implements IteratorInterface
{
$this->xmlReader->close();
$contentXmlFilePath = $this->filePath . '#content.xml';
if ($this->xmlReader->open('zip://' . $contentXmlFilePath) === false) {
if ($this->xmlReader->openFileInZip($this->filePath, self::CONTENT_XML_FILE_PATH) === false) {
$contentXmlFilePath = $this->filePath . '#' . self::CONTENT_XML_FILE_PATH;
throw new IOException("Could not open \"{$contentXmlFilePath}\".");
}
@ -109,7 +116,7 @@ class SheetIterator implements IteratorInterface
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
$sheetName = $this->escaper->unescape($escapedSheetName);
return new Sheet($this->xmlReader, $sheetName, $this->currentSheetIndex);
return new Sheet($this->xmlReader, $this->shouldFormatDates, $sheetName, $this->currentSheetIndex);
}
/**

View File

@ -40,8 +40,6 @@ class SimpleXMLElement
}
$this->resetXMLInternalErrorsSetting();
return $this->simpleXMLElement;
}
/**

View File

@ -14,66 +14,44 @@ class XMLReader extends \XMLReader
{
use XMLInternalErrorsHelper;
const ZIP_WRAPPER = 'zip://';
/**
* Set the URI containing the XML to parse
* @see \XMLReader::open
* Opens the XML Reader to read a file located inside a ZIP file.
*
* @param string $URI URI pointing to the document
* @param string|null|void $encoding The document encoding
* @param int $options A bitmask of the LIBXML_* constants
* @param string $zipFilePath Path to the ZIP file
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @return bool TRUE on success or FALSE on failure
*/
public function open($URI, $encoding = null, $options = 0)
public function openFileInZip($zipFilePath, $fileInsideZipPath)
{
$wasOpenSuccessful = false;
$realPathURI = $this->convertURIToUseRealPath($URI);
$realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
// HHVM does not check if file exists within zip file
// @link https://github.com/facebook/hhvm/issues/5779
if ($this->isRunningHHVM() && $this->isZipStream($realPathURI)) {
if ($this->isRunningHHVM()) {
if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET);
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
} else {
$wasOpenSuccessful = parent::open($realPathURI, $encoding, $options|LIBXML_NONET);
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
return $wasOpenSuccessful;
}
/**
* Updates the given URI to use a real path.
* This is to avoid issues on some Windows setup.
* Returns the real path for the given path components.
* This is useful to avoid issues on some Windows setup.
*
* @param string $URI URI
* @return string The URI using a real path
* @param string $zipFilePath Path to the ZIP file
* @param string $fileInsideZipPath Relative or absolute path of the file inside the zip
* @return string The real path URI
*/
protected function convertURIToUseRealPath($URI)
public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{
$realPathURI = $URI;
if ($this->isZipStream($URI)) {
if (preg_match('/zip:\/\/(.*)#(.*)/', $URI, $matches)) {
$documentPath = $matches[1];
$documentInsideZipPath = $matches[2];
$realPathURI = 'zip://' . realpath($documentPath) . '#' . $documentInsideZipPath;
}
} else {
$realPathURI = realpath($URI);
}
return $realPathURI;
}
/**
* Returns whether the given URI is a zip stream.
*
* @param string $URI URI pointing to a document
* @return bool TRUE if URI is a zip stream, FALSE otherwise
*/
protected function isZipStream($URI)
{
return (strpos($URI, 'zip://') === 0);
return (self::ZIP_WRAPPER . realpath($zipFilePath) . '#' . $fileInsideZipPath);
}
/**
@ -138,9 +116,10 @@ class XMLReader extends \XMLReader
*/
public function readUntilNodeFound($nodeName)
{
while (($wasReadSuccessful = $this->read()) && ($this->nodeType !== \XMLReader::ELEMENT || $this->name !== $nodeName)) {
// do nothing
}
do {
$wasReadSuccessful = $this->read();
$isNotPositionedOnStartingNode = !$this->isPositionedOnStartingNode($nodeName);
} while ($wasReadSuccessful && $isNotPositionedOnStartingNode);
return $wasReadSuccessful;
}
@ -170,7 +149,7 @@ class XMLReader extends \XMLReader
*/
public function isPositionedOnStartingNode($nodeName)
{
return ($this->nodeType === XMLReader::ELEMENT && $this->name === $nodeName);
return $this->isPositionedOnNode($nodeName, XMLReader::ELEMENT);
}
/**
@ -179,6 +158,22 @@ class XMLReader extends \XMLReader
*/
public function isPositionedOnEndingNode($nodeName)
{
return ($this->nodeType === XMLReader::END_ELEMENT && $this->name === $nodeName);
return $this->isPositionedOnNode($nodeName, XMLReader::END_ELEMENT);
}
/**
* @param string $nodeName
* @param int $nodeType
* @return bool Whether the XML Reader is currently positioned on the node with given name and type
*/
private function isPositionedOnNode($nodeName, $nodeType)
{
// In some cases, the node has a prefix (for instance, "<sheet>" can also be "<x:sheet>").
// So if the given node name does not have a prefix, we need to look at the unprefixed name ("localName").
// @see https://github.com/box/spout/issues/233
$hasPrefix = (strpos($nodeName, ':') !== false);
$currentNodeName = ($hasPrefix) ? $this->name : $this->localName;
return ($this->nodeType === $nodeType && $currentNodeName === $nodeName);
}
}

View File

@ -29,6 +29,8 @@ class CellValueFormatter
/** Constants used for date formatting */
const NUM_SECONDS_IN_ONE_DAY = 86400;
const NUM_SECONDS_IN_ONE_HOUR = 3600;
const NUM_SECONDS_IN_ONE_MINUTE = 60;
/**
* February 29th, 1900 is NOT a leap year but Excel thinks it is...
@ -42,20 +44,25 @@ class CellValueFormatter
/** @var StyleHelper Helper to work with styles */
protected $styleHelper;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
protected $escaper;
/**
* @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param StyleHelper $styleHelper Helper to work with styles
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($sharedStringsHelper, $styleHelper)
public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates)
{
$this->sharedStringsHelper = $sharedStringsHelper;
$this->styleHelper = $styleHelper;
$this->shouldFormatDates = $shouldFormatDates;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->escaper = new \Box\Spout\Common\Escaper\XLSX();
$this->escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
}
/**
@ -118,8 +125,7 @@ class CellValueFormatter
// inline strings are formatted this way:
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
$tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
$escapedCellValue = trim($tNode->nodeValue);
$cellValue = $this->escaper->unescape($escapedCellValue);
$cellValue = $this->escaper->unescape($tNode->nodeValue);
return $cellValue;
}
@ -167,7 +173,7 @@ class CellValueFormatter
$shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
if ($shouldFormatAsDate) {
return $this->formatExcelTimestampValue(floatval($nodeValue));
return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId);
} else {
$nodeIntValue = intval($nodeValue);
return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
@ -177,33 +183,86 @@ class CellValueFormatter
/**
* Returns a cell's PHP Date value, associated to the given timestamp.
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
* NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
*/
protected function formatExcelTimestampValue($nodeValue)
protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
{
// Fix for the erroneous leap year in Excel
if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
--$nodeValue;
}
// The value 1.0 represents 1900-01-01. Numbers below 1.0 are not valid Excel dates.
if ($nodeValue < 1.0) {
if ($nodeValue >= 1) {
// Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
} else if ($nodeValue >= 0) {
// Values between 0 and 1 represent "times".
return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
} else {
// invalid date
return null;
}
}
/**
* Returns a cell's PHP DateTime value, associated to the given timestamp.
* Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|string The value associated with the cell
*/
protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
{
$time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
$hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
$minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
$seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
// using the base Excel date (Jan 1st, 1900) - not relevant here
$dateObj = new \DateTime('1900-01-01');
$dateObj->setTime($hours, $minutes, $seconds);
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
}
}
/**
* Returns a cell's PHP Date value, associated to the given timestamp.
* NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
{
// Do not use any unix timestamps for calculation to prevent
// issues with numbers exceeding 2^31.
$secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
$secondsRemainder = round($secondsRemainder, 0);
try {
$cellValue = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
$cellValue->modify('+' . intval($nodeValue) . 'days');
$cellValue->modify('+' . $secondsRemainder . 'seconds');
$dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
$dateObj->modify('+' . intval($nodeValue) . 'days');
$dateObj->modify('+' . $secondsRemainder . 'seconds');
return $cellValue;
if ($this->shouldFormatDates) {
$styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
return $dateObj->format($phpDateFormat);
} else {
return $dateObj;
}
} catch (\Exception $e) {
return null;
}
@ -224,16 +283,16 @@ class CellValueFormatter
/**
* Returns a cell's PHP Date value, associated to the given stored nodeValue.
* @see ECMA-376 Part 1 - §18.17.4
*
* @param string $nodeValue
* @return \DateTime|null The value associated with the cell or NULL if invalid date value
* @param string $nodeValue ISO 8601 Date string
* @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
*/
protected function formatDateCellValue($nodeValue)
{
// Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
try {
$cellValue = new \DateTime($nodeValue);
return $cellValue;
return ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
} catch (\Exception $e) {
return null;
}

View File

@ -0,0 +1,122 @@
<?php
namespace Box\Spout\Reader\XLSX\Helper;
/**
* Class DateFormatHelper
* This class provides helper functions to format Excel dates
*
* @package Box\Spout\Reader\XLSX\Helper
*/
class DateFormatHelper
{
const KEY_GENERAL = 'general';
const KEY_HOUR_12 = '12h';
const KEY_HOUR_24 = '24h';
/**
* This map is used to replace Excel format characters by their PHP equivalent.
* Keys should be ordered from longest to smallest.
*
* @var array Mapping between Excel format characters and PHP format characters
*/
private static $excelDateFormatToPHPDateFormatMapping = [
self::KEY_GENERAL => [
// Time
'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem
':mm' => ':i', // Minutes with leading zeros - if preceded by a ":" (otherwise month)
'mm:' => 'i:', // Minutes with leading zeros - if followed by a ":" (otherwise month)
'ss' => 's', // Seconds, with leading zeros
'.s' => '', // Ignore (fractional seconds format does not exist in PHP)
// Date
'e' => 'Y', // Full numeric representation of a year, 4 digits
'yyyy' => 'Y', // Full numeric representation of a year, 4 digits
'yy' => 'y', // Two digit representation of a year
'mmmmm' => 'M', // Short textual representation of a month, three letters ("mmmmm" should only contain the 1st letter...)
'mmmm' => 'F', // Full textual representation of a month
'mmm' => 'M', // Short textual representation of a month, three letters
'mm' => 'm', // Numeric representation of a month, with leading zeros
'm' => 'n', // Numeric representation of a month, without leading zeros
'dddd' => 'l', // Full textual representation of the day of the week
'ddd' => 'D', // Textual representation of a day, three letters
'dd' => 'd', // Day of the month, 2 digits with leading zeros
'd' => 'j', // Day of the month without leading zeros
],
self::KEY_HOUR_12 => [
'hh' => 'h', // 12-hour format of an hour without leading zeros
'h' => 'g', // 12-hour format of an hour without leading zeros
],
self::KEY_HOUR_24 => [
'hh' => 'H', // 24-hour hours with leading zero
'h' => 'G', // 24-hour format of an hour without leading zeros
],
];
/**
* Converts the given Excel date format to a format understandable by the PHP date function.
*
* @param string $excelDateFormat Excel date format
* @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php)
*/
public static function toPHPDateFormat($excelDateFormat)
{
// Remove brackets potentially present at the beginning of the format string
$dateFormat = preg_replace('/^(\[\$[^\]]+?\])/i', '', $excelDateFormat);
// Double quotes are used to escape characters that must not be interpreted.
// For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y"
// By exploding the format string using double quote as a delimiter, we can get all parts
// that must be transformed (even indexes) and all parts that must not be (odd indexes).
$dateFormatParts = explode('"', $dateFormat);
foreach ($dateFormatParts as $partIndex => $dateFormatPart) {
// do not look at odd indexes
if ($partIndex % 2 === 1) {
continue;
}
// Make sure all characters are lowercase, as the mapping table is using lowercase characters
$transformedPart = strtolower($dateFormatPart);
// Remove escapes related to non-format characters
$transformedPart = str_replace('\\', '', $transformedPart);
// Apply general transformation first...
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]);
// ... then apply hour transformation, for 12-hour or 24-hour format
if (self::has12HourFormatMarker($dateFormatPart)) {
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]);
} else {
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]);
}
// overwrite the parts array with the new transformed part
$dateFormatParts[$partIndex] = $transformedPart;
}
// Merge all transformed parts back together
$phpDateFormat = implode('"', $dateFormatParts);
// Finally, to have the date format compatible with the DateTime::format() function, we need to escape
// all characters that are inside double quotes (and double quotes must be removed).
// For instance, ["Day " dd] should become [\D\a\y\ dd]
$phpDateFormat = preg_replace_callback('/"(.+?)"/', function($matches) {
$stringToEscape = $matches[1];
$letters = preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY);
return '\\' . implode('\\', $letters);
}, $phpDateFormat);
return $phpDateFormat;
}
/**
* @param string $excelDateFormat Date format as defined by Excel
* @return bool Whether the given date format has the 12-hour format marker
*/
private static function has12HourFormatMarker($excelDateFormat)
{
return (stripos($excelDateFormat, 'am/pm') !== false);
}
}

View File

@ -78,7 +78,7 @@ class CachingStrategyFactory
* Returns the best caching strategy, given the number of unique shared strings
* and the amount of memory available.
*
* @param int $sharedStringsUniqueCount Number of unique shared strings
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
* @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @return CachingStrategyInterface The best caching strategy
*/
@ -95,11 +95,16 @@ class CachingStrategyFactory
* Returns whether it is safe to use in-memory caching, given the number of unique shared strings
* and the amount of memory available.
*
* @param int $sharedStringsUniqueCount Number of unique shared strings
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
* @return bool
*/
protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
{
// if the number of shared strings in unknown, do not use "in memory" strategy
if ($sharedStringsUniqueCount === null) {
return false;
}
$memoryAvailable = $this->getMemoryLimitInKB();
if ($memoryAvailable === -1) {

View File

@ -80,7 +80,7 @@ class SharedStringsHelper
$xmlReader = new XMLReader();
$sharedStringIndex = 0;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
$sharedStringsFilePath = $this->getSharedStringsFilePath();
if ($xmlReader->open($sharedStringsFilePath) === false) {
@ -100,11 +100,16 @@ class SharedStringsHelper
// removes nodes that should not be read, like the pronunciation of the Kanji characters
$cleanNode = $this->removeSuperfluousTextNodes($node);
// find all text nodes 't'; there can be multiple if the cell contains formatting
// find all text nodes "t"; there can be multiple if the cell contains formatting
$textNodes = $cleanNode->xpath('//ns:t');
$textValue = '';
foreach ($textNodes as $textNode) {
foreach ($textNodes as $nodeIndex => $textNode) {
if ($nodeIndex !== 0) {
// add a space between each "t" node
$textValue .= ' ';
}
if ($this->shouldPreserveWhitespace($textNode)) {
$textValue .= $textNode->__toString();
} else {
@ -142,7 +147,7 @@ class SharedStringsHelper
* Returns the shared strings unique count, as specified in <sst> tag.
*
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance
* @return int Number of unique shared strings in the sharedStrings.xml file
* @return int|null Number of unique shared strings in the sharedStrings.xml file
* @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
*/
protected function getSharedStringsUniqueCount($xmlReader)
@ -154,13 +159,21 @@ class SharedStringsHelper
$xmlReader->read();
}
return intval($xmlReader->getAttribute('uniqueCount'));
$uniqueCount = $xmlReader->getAttribute('uniqueCount');
// some software do not add the "uniqueCount" attribute but only use the "count" one
// @see https://github.com/box/spout/issues/254
if ($uniqueCount === null) {
$uniqueCount = $xmlReader->getAttribute('count');
}
return ($uniqueCount !== null) ? intval($uniqueCount) : null;
}
/**
* Returns the best shared strings caching strategy.
*
* @param int $sharedStringsUniqueCount
* @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
* @return CachingStrategyInterface
*/
protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount)
@ -200,6 +213,8 @@ class SharedStringsHelper
{
$tagsToRemove = [
'rPh', // Pronunciation of the text
'pPr', // Paragraph Properties / Previous Paragraph Properties
'rPr', // Run Properties for the Paragraph Mark / Previous Run Properties for the Paragraph Mark
];
foreach ($tagsToRemove as $tagToRemove) {

View File

@ -2,7 +2,7 @@
namespace Box\Spout\Reader\XLSX\Helper;
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
use Box\Spout\Reader\Wrapper\XMLReader;
use Box\Spout\Reader\XLSX\Sheet;
/**
@ -14,18 +14,9 @@ use Box\Spout\Reader\XLSX\Sheet;
class SheetHelper
{
/** Paths of XML files relative to the XLSX file root */
const CONTENT_TYPES_XML_FILE_PATH = '[Content_Types].xml';
const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/** Namespaces for the XML files */
const MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML = 'http://schemas.openxmlformats.org/package/2006/content-types';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS = 'http://schemas.openxmlformats.org/package/2006/relationships';
const MAIN_NAMESPACE_FOR_WORKBOOK_XML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/** Value of the Override attribute used in [Content_Types].xml to define sheets */
const OVERRIDE_CONTENT_TYPES_ATTRIBUTE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml';
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -35,22 +26,21 @@ class SheetHelper
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml.rels file */
protected $workbookXMLRelsAsXMLElement;
/** @var \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representing the workbook.xml file */
protected $workbookXMLAsXMLElement;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/**
* @param string $filePath Path of the XLSX file being read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper Helper to work with shared strings
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->sharedStringsHelper = $sharedStringsHelper;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->shouldFormatDates = $shouldFormatDates;
}
/**
@ -62,119 +52,81 @@ class SheetHelper
public function getSheets()
{
$sheets = [];
$sheetIndex = 0;
$contentTypesAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::CONTENT_TYPES_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_CONTENT_TYPES_XML
);
$xmlReader = new XMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('sheet')) {
$sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $sheetIndex);
$sheetIndex++;
} else if ($xmlReader->isPositionedOnEndingNode('sheets')) {
// stop reading once all sheets have been read
break;
}
}
// find all nodes defining a sheet
$sheetNodes = $contentTypesAsXMLElement->xpath('//ns:Override[@ContentType="' . self::OVERRIDE_CONTENT_TYPES_ATTRIBUTE . '"]');
$numSheetNodes = count($sheetNodes);
for ($i = 0; $i < $numSheetNodes; $i++) {
$sheetNode = $sheetNodes[$i];
$sheetDataXMLFilePath = $sheetNode->getAttribute('PartName');
$sheets[] = $this->getSheetFromXML($sheetDataXMLFilePath);
$xmlReader->close();
}
// make sure the sheets are sorted by index
// (as the sheets are not necessarily in this order in the XML file)
usort($sheets, function ($sheet1, $sheet2) {
return ($sheet1->getIndex() - $sheet2->getIndex());
});
return $sheets;
}
/**
* Returns an instance of a sheet, given the path of its data XML file.
* We first look at "xl/_rels/workbook.xml.rels" to find the relationship ID of the sheet.
* Then we look at "xl/worbook.xml" to find the sheet entry associated to the found ID.
* The entry contains the ID and name of the sheet.
* Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml".
* We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
* ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
*
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
* @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based)
* @return \Box\Spout\Reader\XLSX\Sheet Sheet instance
*/
protected function getSheetFromXML($sheetDataXMLFilePath)
protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased)
{
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
$sheetDataXMLFilePathInWorkbookXMLRels = ltrim($sheetDataXMLFilePath, '/xl/');
// find the node associated to the given file path
$workbookXMLResElement = $this->getWorkbookXMLRelsAsXMLElement();
$relationshipNodes = $workbookXMLResElement->xpath('//ns:Relationship[@Target="' . $sheetDataXMLFilePathInWorkbookXMLRels . '"]');
$relationshipNode = $relationshipNodes[0];
$relationshipSheetId = $relationshipNode->getAttribute('Id');
$workbookXMLElement = $this->getWorkbookXMLAsXMLElement();
$sheetNodes = $workbookXMLElement->xpath('//ns:sheet[@r:id="' . $relationshipSheetId . '"]');
$sheetNode = $sheetNodes[0];
$escapedSheetName = $sheetNode->getAttribute('name');
$sheetIdOneBased = $sheetNode->getAttribute('sheetId');
$sheetIndexZeroBased = $sheetIdOneBased - 1;
$sheetId = $xmlReaderOnSheetNode->getAttribute('r:id');
$escapedSheetName = $xmlReaderOnSheetNode->getAttribute('name');
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
$sheetName = $escaper->unescape($escapedSheetName);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $sheetIndexZeroBased, $sheetName);
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
return new Sheet($this->filePath, $sheetDataXMLFilePath, $this->sharedStringsHelper, $this->shouldFormatDates, $sheetIndexZeroBased, $sheetName);
}
/**
* Returns a representation of the workbook.xml.rels file, ready to be parsed.
* The returned value is cached.
*
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
* @param string $sheetId The sheet ID, as defined in "workbook.xml"
* @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
*/
protected function getWorkbookXMLRelsAsXMLElement()
protected function getSheetDataXMLFilePathForSheetId($sheetId)
{
if (!$this->workbookXMLRelsAsXMLElement) {
$this->workbookXMLRelsAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_RELS_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML_RELS
);
$sheetDataXMLFilePath = '';
// find the file path of the sheet, by looking at the "workbook.xml.res" file
$xmlReader = new XMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode('Relationship')) {
$relationshipSheetId = $xmlReader->getAttribute('Id');
if ($relationshipSheetId === $sheetId) {
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
$sheetDataXMLFilePath = $xmlReader->getAttribute('Target');
// sometimes, the sheet data file path already contains "/xl/"...
if (strpos($sheetDataXMLFilePath, '/xl/') !== 0) {
$sheetDataXMLFilePath = '/xl/' . $sheetDataXMLFilePath;
break;
}
}
}
}
$xmlReader->close();
}
return $this->workbookXMLRelsAsXMLElement;
}
/**
* Returns a representation of the workbook.xml file, ready to be parsed.
* The returned value is cached.
*
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement XML element representating the workbook.xml.rels file
*/
protected function getWorkbookXMLAsXMLElement()
{
if (!$this->workbookXMLAsXMLElement) {
$this->workbookXMLAsXMLElement = $this->getFileAsXMLElementWithNamespace(
self::WORKBOOK_XML_FILE_PATH,
self::MAIN_NAMESPACE_FOR_WORKBOOK_XML
);
}
return $this->workbookXMLAsXMLElement;
}
/**
* Loads the contents of the given file in an XML parser and register the given XPath namespace.
*
* @param string $xmlFilePath The path of the XML file inside the XLSX file
* @param string $mainNamespace The main XPath namespace to register
* @return \Box\Spout\Reader\Wrapper\SimpleXMLElement The XML element representing the file
*/
protected function getFileAsXMLElementWithNamespace($xmlFilePath, $mainNamespace)
{
$xmlContents = $this->globalFunctionsHelper->file_get_contents('zip://' . $this->filePath . '#' . $xmlFilePath);
$xmlElement = new SimpleXMLElement($xmlContents);
$xmlElement->registerXPathNamespace('ns', $mainNamespace);
return $xmlElement;
return $sheetDataXMLFilePath;
}
}

View File

@ -2,7 +2,6 @@
namespace Box\Spout\Reader\XLSX\Helper;
use Box\Spout\Reader\Wrapper\SimpleXMLElement;
use Box\Spout\Reader\Wrapper\XMLReader;
/**
@ -30,6 +29,25 @@ class StyleHelper
/** By convention, default style ID is 0 */
const DEFAULT_STYLE_ID = 0;
/**
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
* @var array Mapping between built-in numFmtId and the associated format - for dates only
*/
protected static $builtinNumFmtIdToNumFormatMapping = [
14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0',
];
/** @var string Path of the XLSX file being read */
protected $filePath;
@ -57,18 +75,15 @@ class StyleHelper
$this->customNumberFormats = [];
$this->stylesAttributes = [];
$stylesXmlFilePath = $this->filePath .'#' . self::STYLES_XML_FILE_PATH;
$xmlReader = new XMLReader();
if ($xmlReader->open('zip://' . $stylesXmlFilePath)) {
if ($xmlReader->openFileInZip($this->filePath, self::STYLES_XML_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$numFmtsNode = new SimpleXMLElement($xmlReader->readOuterXml());
$this->extractNumberFormats($numFmtsNode);
$this->extractNumberFormats($xmlReader);
} else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
$cellXfsNode = new SimpleXMLElement($xmlReader->readOuterXml());
$this->extractStyleAttributes($cellXfsNode);
$this->extractStyleAttributes($xmlReader);
}
}
@ -81,15 +96,20 @@ class StyleHelper
* For simplicity, the styles attributes are kept in memory. This is possible thanks
* to the reuse of formats. So 1 million cells should not use 1 million formats.
*
* @param SimpleXMLElement $numFmtsNode The "numFmts" node
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node
* @return void
*/
protected function extractNumberFormats($numFmtsNode)
protected function extractNumberFormats($xmlReader)
{
foreach ($numFmtsNode->children() as $numFmtNode) {
$numFmtId = intval($numFmtNode->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
$formatCode = $numFmtNode->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
$this->customNumberFormats[$numFmtId] = $formatCode;
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
$numFmtId = intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
$formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
$this->customNumberFormats[$numFmtId] = $formatCode;
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
// Once done reading "numFmts" node's children
break;
}
}
}
@ -98,16 +118,21 @@ class StyleHelper
* For simplicity, the styles attributes are kept in memory. This is possible thanks
* to the reuse of styles. So 1 million cells should not use 1 million styles.
*
* @param SimpleXMLElement $cellXfsNode The "cellXfs" node
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
* @return void
*/
protected function extractStyleAttributes($cellXfsNode)
protected function extractStyleAttributes($xmlReader)
{
foreach ($cellXfsNode->children() as $xfNode) {
$this->stylesAttributes[] = [
self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xfNode->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)),
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xfNode->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)),
];
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
$this->stylesAttributes[] = [
self::XML_ATTRIBUTE_NUM_FMT_ID => intval($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)),
self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => !!($xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT)),
];
} else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) {
// Once done reading "cellXfs" node's children
break;
}
}
}
@ -171,18 +196,30 @@ class StyleHelper
protected function doesNumFmtIdIndicateDate($numFmtId)
{
return (
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
!$this->doesNumFmtIdIndicateGeneralFormat($numFmtId) &&
(
$this->isNumFmtIdBuiltInDateFormat($numFmtId) ||
$this->isNumFmtIdCustomDateFormat($numFmtId)
)
);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates the "General" format (0 by convention)
*/
protected function doesNumFmtIdIndicateGeneralFormat($numFmtId)
{
return ($numFmtId === 0);
}
/**
* @param int $numFmtId
* @return bool Whether the number format ID indicates that the number is a timestamp
*/
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
$builtInDateFormatIds = [14, 15, 16, 17, 18, 19, 20, 21, 22, 45, 46, 47];
$builtInDateFormatIds = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
return in_array($numFmtId, $builtInDateFormatIds);
}
@ -223,4 +260,27 @@ class StyleHelper
return $hasFoundDateFormatCharacter;
}
/**
* Returns the format as defined in "styles.xml" of the given style.
* NOTE: It is assumed that the style DOES have a number format associated to it.
*
* @param int $styleId Zero-based style ID
* @return string The number format associated with the given style
*/
public function getNumberFormat($styleId)
{
$stylesAttributes = $this->getStylesAttributes();
$styleAttributes = $stylesAttributes[$styleId];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
$numberFormat = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
} else {
$customNumberFormats = $this->getCustomNumberFormats();
$numberFormat = $customNumberFormats[$numFmtId];
}
return $numberFormat;
}
}

View File

@ -69,7 +69,7 @@ class Reader extends AbstractReader
$this->sharedStringsHelper->extractSharedStrings();
}
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper);
$this->sheetIterator = new SheetIterator($filePath, $this->sharedStringsHelper, $this->globalFunctionsHelper, $this->shouldFormatDates);
} else {
throw new IOException("Could not open $filePath for reading.");
}

View File

@ -55,12 +55,16 @@ class RowIterator implements IteratorInterface
/** @var int The number of columns the sheet has (0 meaning undefined) */
protected $numColumns = 0;
/** @var int Last column index processed (zero-based) */
protected $lastColumnIndexProcessed = -1;
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
{
$this->filePath = $filePath;
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
@ -68,7 +72,7 @@ class RowIterator implements IteratorInterface
$this->xmlReader = new XMLReader();
$this->styleHelper = new StyleHelper($filePath);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper);
$this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
}
/**
@ -143,6 +147,9 @@ class RowIterator implements IteratorInterface
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
// Start of the row description
// Reset index of the last processed column
$this->lastColumnIndexProcessed = -1;
// Read spans info if present
$numberOfColumnsForRow = $this->numColumns;
$spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
@ -154,12 +161,13 @@ class RowIterator implements IteratorInterface
} else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
// Start of a cell description
$currentCellIndex = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
$currentColumnIndex = CellHelper::getColumnIndexFromCellIndex($currentCellIndex);
$currentColumnIndex = $this->getCellIndex($this->xmlReader);
$node = $this->xmlReader->expand();
$rowData[$currentColumnIndex] = $this->getCellValue($node);
$this->lastColumnIndexProcessed = $currentColumnIndex;
} else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
// End of the row description
// If needed, we fill the empty cells
@ -181,6 +189,21 @@ class RowIterator implements IteratorInterface
$this->rowDataBuffer = $rowData;
}
/**
* @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" tag
* @return int
* @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*/
protected function getCellIndex($xmlReader)
{
// Get "r" attribute if present (from something like <c r="A1"...>
$currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
return ($currentCellIndex !== null) ?
CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
$this->lastColumnIndexProcessed + 1;
}
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*

View File

@ -25,12 +25,13 @@ class Sheet implements SheetInterface
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param Helper\SharedStringsHelper Helper to work with shared strings
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
*/
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $sheetIndex, $sheetName)
public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $sheetIndex, $sheetName)
{
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper);
$this->rowIterator = new RowIterator($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates);
$this->index = $sheetIndex;
$this->name = $sheetName;
}

View File

@ -24,12 +24,13 @@ class SheetIterator implements IteratorInterface
* @param string $filePath Path of the file to be read
* @param \Box\Spout\Reader\XLSX\Helper\SharedStringsHelper $sharedStringsHelper
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper)
public function __construct($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates)
{
// Fetch all available sheets
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper);
$sheetHelper = new SheetHelper($filePath, $sharedStringsHelper, $globalFunctionsHelper, $shouldFormatDates);
$this->sheets = $sheetHelper->getSheets();
if (count($this->sheets) === 0) {

View File

@ -71,6 +71,21 @@ abstract class AbstractWriter implements WriterInterface
$this->resetRowStyleToDefault();
}
/**
* Sets the default styles for all rows added with "addRow".
* Overriding the default style instead of using "addRowWithStyle" improves performance by 20%.
* @see https://github.com/box/spout/issues/272
*
* @param Style\Style $defaultStyle
* @return AbstractWriter
*/
public function setDefaultRowStyle($defaultStyle)
{
$this->defaultRowStyle = $defaultStyle;
$this->resetRowStyleToDefault();
return $this;
}
/**
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
* @return AbstractWriter
@ -121,6 +136,10 @@ abstract class AbstractWriter implements WriterInterface
$this->filePointer = $this->globalFunctionsHelper->fopen('php://output', 'w');
$this->throwIfFilePointerIsNotAvailable();
// Clear any previous output (otherwise the generated file will be corrupted)
// @see https://github.com/box/spout/issues/241
$this->globalFunctionsHelper->ob_end_clean();
// Set headers
$this->globalFunctionsHelper->header('Content-Type: ' . static::$headerContentType);
$this->globalFunctionsHelper->header('Content-Disposition: attachment; filename="' . $this->outputFilePath . '"');
@ -238,7 +257,8 @@ abstract class AbstractWriter implements WriterInterface
public function addRows(array $dataRows)
{
if (!empty($dataRows)) {
if (!is_array($dataRows[0])) {
$firstRow = reset($dataRows);
if (!is_array($firstRow)) {
throw new InvalidArgumentException('The input should be an array of arrays');
}

View File

@ -29,6 +29,9 @@ class Writer extends AbstractWriter
/** @var int */
protected $lastWrittenRowIndex = 0;
/** @var bool */
protected $shouldAddBOM = true;
/**
* Sets the field delimiter for the CSV
*
@ -55,6 +58,18 @@ class Writer extends AbstractWriter
return $this;
}
/**
* Set if a BOM has to be added to the file
*
* @param bool $shouldAddBOM
* @return Writer
*/
public function setShouldAddBOM($shouldAddBOM)
{
$this->shouldAddBOM = (bool) $shouldAddBOM;
return $this;
}
/**
* Opens the CSV streamer and makes it ready to accept data.
*
@ -62,8 +77,10 @@ class Writer extends AbstractWriter
*/
protected function openWriter()
{
// Adds UTF-8 BOM for Unicode compatibility
$this->globalFunctionsHelper->fputs($this->filePointer, EncodingHelper::BOM_UTF8);
if ($this->shouldAddBOM) {
// Adds UTF-8 BOM for Unicode compatibility
$this->globalFunctionsHelper->fputs($this->filePointer, EncodingHelper::BOM_UTF8);
}
}
/**

View File

@ -0,0 +1,16 @@
<?php
namespace Box\Spout\Writer\Exception\Border;
use Box\Spout\Writer\Exception\WriterException;
use Box\Spout\Writer\Style\BorderPart;
class InvalidNameException extends WriterException
{
public function __construct($name)
{
$msg = '%s is not a valid name identifier for a border. Valid identifiers are: %s.';
parent::__construct(sprintf($msg, $name, implode(',', BorderPart::getAllowedNames())));
}
}

View File

@ -0,0 +1,16 @@
<?php
namespace Box\Spout\Writer\Exception\Border;
use Box\Spout\Writer\Exception\WriterException;
use Box\Spout\Writer\Style\BorderPart;
class InvalidStyleException extends WriterException
{
public function __construct($name)
{
$msg = '%s is not a valid style identifier for a border. Valid identifiers are: %s.';
parent::__construct(sprintf($msg, $name, implode(',', BorderPart::getAllowedStyles())));
}
}

View File

@ -0,0 +1,16 @@
<?php
namespace Box\Spout\Writer\Exception\Border;
use Box\Spout\Writer\Exception\WriterException;
use Box\Spout\Writer\Style\BorderPart;
class InvalidWidthException extends WriterException
{
public function __construct($name)
{
$msg = '%s is not a valid width identifier for a border. Valid identifiers are: %s.';
parent::__construct(sprintf($msg, $name, implode(',', BorderPart::getAllowedWidths())));
}
}

View File

@ -0,0 +1,68 @@
<?php
namespace Box\Spout\Writer\ODS\Helper;
use Box\Spout\Writer\Style\BorderPart;
use Box\Spout\Writer\Style\Border;
/**
* Class BorderHelper
*
* The fo:border, fo:border-top, fo:border-bottom, fo:border-left and fo:border-right attributes
* specify border properties
* http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1419780_253892949
*
* Example table-cell-properties
*
* <style:table-cell-properties
* fo:border-bottom="0.74pt solid #ffc000" style:diagonal-bl-tr="none"
* style:diagonal-tl-br="none" fo:border-left="none" fo:border-right="none"
* style:rotation-align="none" fo:border-top="none"/>
*/
class BorderHelper
{
/**
* Width mappings
*
* @var array
*/
protected static $widthMap = [
Border::WIDTH_THIN => '0.75pt',
Border::WIDTH_MEDIUM => '1.75pt',
Border::WIDTH_THICK => '2.5pt',
];
/**
* Style mapping
*
* @var array
*/
protected static $styleMap = [
Border::STYLE_SOLID => 'solid',
Border::STYLE_DASHED => 'dashed',
Border::STYLE_DOTTED => 'dotted',
Border::STYLE_DOUBLE => 'double',
];
/**
* @param BorderPart $borderPart
* @return string
*/
public static function serializeBorderPart(BorderPart $borderPart)
{
$definition = 'fo:border-%s="%s"';
if ($borderPart->getStyle() === Border::STYLE_NONE) {
$borderPartDefinition = sprintf($definition, $borderPart->getName(), 'none');
} else {
$attributes = [
self::$widthMap[$borderPart->getWidth()],
self::$styleMap[$borderPart->getStyle()],
'#' . $borderPart->getColor(),
];
$borderPartDefinition = sprintf($definition, $borderPart->getName(), implode(' ', $attributes));
}
return $borderPartDefinition;
}
}

View File

@ -3,6 +3,7 @@
namespace Box\Spout\Writer\ODS\Helper;
use Box\Spout\Writer\Common\Helper\AbstractStyleHelper;
use Box\Spout\Writer\Style\BorderPart;
/**
* Class StyleHelper
@ -256,9 +257,21 @@ EOD;
$content .= '<style:table-cell-properties fo:wrap-option="wrap" style:vertical-align="automatic"/>';
}
if ($style->shouldApplyBorder()) {
$borderProperty = '<style:table-cell-properties %s />';
$borders = array_map(function (BorderPart $borderPart) {
return BorderHelper::serializeBorderPart($borderPart);
}, $style->getBorder()->getParts());
$content .= sprintf($borderProperty, implode(' ', $borders));
}
if ($style->shouldApplyBackgroundColor()) {
$content .= sprintf('
<style:table-cell-properties fo:background-color="#%s"/>', $style->getBackgroundColor());
}
$content .= '</style:style>';
return $content;
}
}

View File

@ -47,7 +47,7 @@ class Worksheet implements WorksheetInterface
{
$this->externalSheet = $externalSheet;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->stringsEscaper = new \Box\Spout\Common\Escaper\ODS();
$this->stringsEscaper = \Box\Spout\Common\Escaper\ODS::getInstance();
$this->worksheetFilePath = $worksheetFilesFolder . '/sheet' . $externalSheet->getIndex() . '.xml';
$this->stringHelper = new StringHelper();
@ -134,6 +134,10 @@ class Worksheet implements WorksheetInterface
*/
public function addRow($dataRow, $style)
{
// $dataRow can be an associative array. We need to transform
// it into a regular array, as we'll use the numeric indexes.
$dataRowWithNumericIndexes = array_values($dataRow);
$styleIndex = ($style->getId() + 1); // 1-based
$cellsCount = count($dataRow);
$this->maxNumColumns = max($this->maxNumColumns, $cellsCount);
@ -144,12 +148,14 @@ class Worksheet implements WorksheetInterface
$nextCellIndex = 1;
for ($i = 0; $i < $cellsCount; $i++) {
$currentCellValue = $dataRow[$currentCellIndex];
$currentCellValue = $dataRowWithNumericIndexes[$currentCellIndex];
// Using isset here because it is way faster than array_key_exists...
if (!isset($dataRow[$nextCellIndex]) || $currentCellValue !== $dataRow[$nextCellIndex]) {
if (!isset($dataRowWithNumericIndexes[$nextCellIndex]) ||
$currentCellValue !== $dataRowWithNumericIndexes[$nextCellIndex]) {
$numTimesValueRepeated = ($nextCellIndex - $currentCellIndex);
$data .= $this->getCellContent($currentCellValue, $styleIndex, $numTimesValueRepeated);
$data .= $this->getCellXML($currentCellValue, $styleIndex, $numTimesValueRepeated);
$currentCellIndex = $nextCellIndex;
}
@ -177,7 +183,7 @@ class Worksheet implements WorksheetInterface
* @return string The cell XML content
* @throws \Box\Spout\Common\Exception\InvalidArgumentException If a cell value's type is not supported
*/
protected function getCellContent($cellValue, $styleIndex, $numTimesValueRepeated)
protected function getCellXML($cellValue, $styleIndex, $numTimesValueRepeated)
{
$data = '<table:table-cell table:style-name="ce' . $styleIndex . '"';
@ -218,6 +224,10 @@ class Worksheet implements WorksheetInterface
*/
public function close()
{
if (!is_resource($this->sheetFilePointer)) {
return;
}
fclose($this->sheetFilePointer);
}
}

View File

@ -0,0 +1,85 @@
<?php
namespace Box\Spout\Writer\Style;
/**
* Class Border
*/
class Border
{
const LEFT = 'left';
const RIGHT = 'right';
const TOP = 'top';
const BOTTOM = 'bottom';
const STYLE_NONE = 'none';
const STYLE_SOLID = 'solid';
const STYLE_DASHED = 'dashed';
const STYLE_DOTTED = 'dotted';
const STYLE_DOUBLE = 'double';
const WIDTH_THIN = 'thin';
const WIDTH_MEDIUM = 'medium';
const WIDTH_THICK = 'thick';
/**
* @var array A list of BorderPart objects for this border.
*/
protected $parts = [];
/**
* @param array|void $borderParts
*/
public function __construct(array $borderParts = [])
{
$this->setParts($borderParts);
}
/**
* @param $name The name of the border part
* @return null|BorderPart
*/
public function getPart($name)
{
return $this->hasPart($name) ? $this->parts[$name] : null;
}
/**
* @param $name The name of the border part
* @return bool
*/
public function hasPart($name)
{
return isset($this->parts[$name]);
}
/**
* @return array
*/
public function getParts()
{
return $this->parts;
}
/**
* Set BorderParts
* @param array $parts
*/
public function setParts($parts)
{
unset($this->parts);
foreach ($parts as $part) {
$this->addPart($part);
}
}
/**
* @param BorderPart $borderPart
* @return self
*/
public function addPart(BorderPart $borderPart)
{
$this->parts[$borderPart->getName()] = $borderPart;
return $this;
}
}

View File

@ -0,0 +1,75 @@
<?php
namespace Box\Spout\Writer\Style;
/**
* Class BorderBuilder
*/
class BorderBuilder
{
/**
* @var Border
*/
protected $border;
public function __construct()
{
$this->border = new Border();
}
/**
* @param string|void $color Border A RGB color code
* @param string|void $width Border width @see BorderPart::allowedWidths
* @param string|void $style Border style @see BorderPart::allowedStyles
* @return BorderBuilder
*/
public function setBorderTop($color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->border->addPart(new BorderPart(Border::TOP, $color, $width, $style));
return $this;
}
/**
* @param string|void $color Border A RGB color code
* @param string|void $width Border width @see BorderPart::allowedWidths
* @param string|void $style Border style @see BorderPart::allowedStyles
* @return BorderBuilder
*/
public function setBorderRight($color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->border->addPart(new BorderPart(Border::RIGHT, $color, $width, $style));
return $this;
}
/**
* @param string|void $color Border A RGB color code
* @param string|void $width Border width @see BorderPart::allowedWidths
* @param string|void $style Border style @see BorderPart::allowedStyles
* @return BorderBuilder
*/
public function setBorderBottom($color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->border->addPart(new BorderPart(Border::BOTTOM, $color, $width, $style));
return $this;
}
/**
* @param string|void $color Border A RGB color code
* @param string|void $width Border width @see BorderPart::allowedWidths
* @param string|void $style Border style @see BorderPart::allowedStyles
* @return BorderBuilder
*/
public function setBorderLeft($color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->border->addPart(new BorderPart(Border::LEFT, $color, $width, $style));
return $this;
}
/**
* @return Border
*/
public function build()
{
return $this->border;
}
}

View File

@ -0,0 +1,184 @@
<?php
namespace Box\Spout\Writer\Style;
use Box\Spout\Writer\Exception\Border\InvalidNameException;
use Box\Spout\Writer\Exception\Border\InvalidStyleException;
use Box\Spout\Writer\Exception\Border\InvalidWidthException;
/**
* Class BorderPart
*/
class BorderPart
{
/**
* @var string The style of this border part.
*/
protected $style;
/**
* @var string The name of this border part.
*/
protected $name;
/**
* @var string The color of this border part.
*/
protected $color;
/**
* @var string The width of this border part.
*/
protected $width;
/**
* @var array Allowed style constants for parts.
*/
protected static $allowedStyles = [
'none',
'solid',
'dashed',
'dotted',
'double'
];
/**
* @var array Allowed names constants for border parts.
*/
protected static $allowedNames = [
'left',
'right',
'top',
'bottom',
];
/**
* @var array Allowed width constants for border parts.
*/
protected static $allowedWidths = [
'thin',
'medium',
'thick',
];
/**
* @param string $name @see BorderPart::$allowedNames
* @param string $color A RGB color code
* @param string $width @see BorderPart::$allowedWidths
* @param string $style @see BorderPart::$allowedStyles
* @throws InvalidNameException
* @throws InvalidStyleException
* @throws InvalidWidthException
*/
public function __construct($name, $color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->setName($name);
$this->setColor($color);
$this->setWidth($width);
$this->setStyle($style);
}
/**
* @return string
*/
public function getName()
{
return $this->name;
}
/**
* @param string $name The name of the border part @see BorderPart::$allowedNames
* @throws InvalidNameException
* @return void
*/
public function setName($name)
{
if (!in_array($name, self::$allowedNames)) {
throw new InvalidNameException($name);
}
$this->name = $name;
}
/**
* @return string
*/
public function getStyle()
{
return $this->style;
}
/**
* @param string $style The style of the border part @see BorderPart::$allowedStyles
* @throws InvalidStyleException
* @return void
*/
public function setStyle($style)
{
if (!in_array($style, self::$allowedStyles)) {
throw new InvalidStyleException($style);
}
$this->style = $style;
}
/**
* @return string
*/
public function getColor()
{
return $this->color;
}
/**
* @param string $color The color of the border part @see Color::rgb()
* @return void
*/
public function setColor($color)
{
$this->color = $color;
}
/**
* @return string
*/
public function getWidth()
{
return $this->width;
}
/**
* @param string $width The width of the border part @see BorderPart::$allowedWidths
* @throws InvalidWidthException
* @return void
*/
public function setWidth($width)
{
if (!in_array($width, self::$allowedWidths)) {
throw new InvalidWidthException($width);
}
$this->width = $width;
}
/**
* @return array
*/
public static function getAllowedStyles()
{
return self::$allowedStyles;
}
/**
* @return array
*/
public static function getAllowedNames()
{
return self::$allowedNames;
}
/**
* @return array
*/
public static function getAllowedWidths()
{
return self::$allowedWidths;
}
}

View File

@ -61,6 +61,23 @@ class Style
/** @var bool Whether the wrap text property was set */
protected $hasSetWrapText = false;
/**
* @var Border
*/
protected $border = null;
/**
* @var bool Whether border properties should be applied
*/
protected $shouldApplyBorder = false;
/** @var string Background color */
protected $backgroundColor = null;
/** @var bool */
protected $hasSetBackgroundColor = false;
/**
* @return int|null
*/
@ -79,6 +96,32 @@ class Style
return $this;
}
/**
* @return Border
*/
public function getBorder()
{
return $this->border;
}
/**
* @param Border $border
*/
public function setBorder(Border $border)
{
$this->shouldApplyBorder = true;
$this->border = $border;
return $this;
}
/**
* @return boolean
*/
public function shouldApplyBorder()
{
return $this->shouldApplyBorder;
}
/**
* @return boolean
*/
@ -243,6 +286,35 @@ class Style
return $this->shouldApplyFont;
}
/**
* Sets the background color
* @param string $color ARGB color (@see Color)
* @return Style
*/
public function setBackgroundColor($color)
{
$this->hasSetBackgroundColor = true;
$this->backgroundColor = $color;
return $this;
}
/**
* @return string
*/
public function getBackgroundColor()
{
return $this->backgroundColor;
}
/**
*
* @return bool Whether the background color should be applied
*/
public function shouldApplyBackgroundColor()
{
return $this->hasSetBackgroundColor;
}
/**
* Serializes the style for future comparison with other styles.
* The ID is excluded from the comparison, as we only care about
@ -302,6 +374,12 @@ class Style
if (!$this->hasSetWrapText && $baseStyle->shouldWrapText()) {
$mergedStyle->setShouldWrapText();
}
if (!$this->getBorder() && $baseStyle->shouldApplyBorder()) {
$mergedStyle->setBorder($baseStyle->getBorder());
}
if (!$this->hasSetBackgroundColor && $baseStyle->shouldApplyBackgroundColor()) {
$mergedStyle->setBackgroundColor($baseStyle->getBackgroundColor());
}
return $mergedStyle;
}

View File

@ -121,6 +121,31 @@ class StyleBuilder
return $this;
}
/**
* Set a border
*
* @param Border $border
* @return $this
*/
public function setBorder(Border $border)
{
$this->style->setBorder($border);
return $this;
}
/**
* Sets a background color
*
* @api
* @param string $color ARGB color (@see Color)
* @return StyleBuilder
*/
public function setBackgroundColor($color)
{
$this->style->setBackgroundColor($color);
return $this;
}
/**
* Returns the configured style. The style is cached and can be reused.
*

View File

@ -0,0 +1,68 @@
<?php
namespace Box\Spout\Writer\XLSX\Helper;
use Box\Spout\Writer\Style\Border;
use Box\Spout\Writer\Style\BorderPart;
class BorderHelper
{
public static $xlsxStyleMap = [
Border::STYLE_SOLID => [
Border::WIDTH_THIN => 'thin',
Border::WIDTH_MEDIUM => 'medium',
Border::WIDTH_THICK => 'thick'
],
Border::STYLE_DOTTED => [
Border::WIDTH_THIN => 'dotted',
Border::WIDTH_MEDIUM => 'dotted',
Border::WIDTH_THICK => 'dotted',
],
Border::STYLE_DASHED => [
Border::WIDTH_THIN => 'dashed',
Border::WIDTH_MEDIUM => 'mediumDashed',
Border::WIDTH_THICK => 'mediumDashed',
],
Border::STYLE_DOUBLE => [
Border::WIDTH_THIN => 'double',
Border::WIDTH_MEDIUM => 'double',
Border::WIDTH_THICK => 'double',
],
Border::STYLE_NONE => [
Border::WIDTH_THIN => 'none',
Border::WIDTH_MEDIUM => 'none',
Border::WIDTH_THICK => 'none',
],
];
/**
* @param BorderPart $borderPart
* @return string
*/
public static function serializeBorderPart(BorderPart $borderPart)
{
$borderStyle = self::getBorderStyle($borderPart);
$colorEl = $borderPart->getColor() ? sprintf('<color rgb="%s"/>', $borderPart->getColor()) : '';
$partEl = sprintf(
'<%s style="%s">%s</%s>',
$borderPart->getName(),
$borderStyle,
$colorEl,
$borderPart->getName()
);
return $partEl . PHP_EOL;
}
/**
* Get the style definition from the style map
*
* @param BorderPart $borderPart
* @return string
*/
protected static function getBorderStyle(BorderPart $borderPart)
{
return self::$xlsxStyleMap[$borderPart->getStyle()][$borderPart->getWidth()];
}
}

View File

@ -284,7 +284,7 @@ EOD;
EOD;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$escaper = new \Box\Spout\Common\Escaper\XLSX();
$escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
/** @var Worksheet $worksheet */
foreach ($worksheets as $worksheet) {

View File

@ -49,7 +49,7 @@ EOD;
fwrite($this->sharedStringsFilePointer, $header);
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->stringsEscaper = new \Box\Spout\Common\Escaper\XLSX();
$this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
}
/**
@ -88,6 +88,10 @@ EOD;
*/
public function close()
{
if (!is_resource($this->sharedStringsFilePointer)) {
return;
}
fwrite($this->sharedStringsFilePointer, '</sst>');
// Replace the default strings count with the actual number of shared strings in the file header

View File

@ -4,6 +4,7 @@ namespace Box\Spout\Writer\XLSX\Helper;
use Box\Spout\Writer\Common\Helper\AbstractStyleHelper;
use Box\Spout\Writer\Style\Color;
use Box\Spout\Writer\Style\Style;
/**
* Class StyleHelper
@ -13,6 +14,131 @@ use Box\Spout\Writer\Style\Color;
*/
class StyleHelper extends AbstractStyleHelper
{
/**
* @var array
*/
protected $registeredFills = [];
/**
* @var array [STYLE_ID] => [FILL_ID] maps a style to a fill declaration
*/
protected $styleIdToFillMappingTable = [];
/**
* Excel preserves two default fills with index 0 and 1
* Since Excel is the dominant vendor - we play along here
*
* @var int The fill index counter for custom fills.
*/
protected $fillIndex = 2;
/**
* @var array
*/
protected $registeredBorders = [];
/**
* @var array [STYLE_ID] => [BORDER_ID] maps a style to a border declaration
*/
protected $styleIdToBorderMappingTable = [];
/**
* XLSX specific operations on the registered styles
*
* @param \Box\Spout\Writer\Style\Style $style
* @return \Box\Spout\Writer\Style\Style
*/
public function registerStyle($style)
{
$registeredStyle = parent::registerStyle($style);
$this->registerFill($registeredStyle);
$this->registerBorder($registeredStyle);
return $registeredStyle;
}
/**
* Register a fill definition
*
* @param \Box\Spout\Writer\Style\Style $style
*/
protected function registerFill($style)
{
$styleId = $style->getId();
// Currently - only solid backgrounds are supported
// so $backgroundColor is a scalar value (RGB Color)
$backgroundColor = $style->getBackgroundColor();
if ($backgroundColor) {
$isBackgroundColorRegistered = isset($this->registeredFills[$backgroundColor]);
// We need to track the already registered background definitions
if ($isBackgroundColorRegistered) {
$registeredStyleId = $this->registeredFills[$backgroundColor];
$registeredFillId = $this->styleIdToFillMappingTable[$registeredStyleId];
$this->styleIdToFillMappingTable[$styleId] = $registeredFillId;
} else {
$this->registeredFills[$backgroundColor] = $styleId;
$this->styleIdToFillMappingTable[$styleId] = $this->fillIndex++;
}
} else {
// The fillId maps a style to a fill declaration
// When there is no background color definition - we default to 0
$this->styleIdToFillMappingTable[$styleId] = 0;
}
}
/**
* Register a border definition
*
* @param \Box\Spout\Writer\Style\Style $style
*/
protected function registerBorder($style)
{
$styleId = $style->getId();
if ($style->shouldApplyBorder()) {
$border = $style->getBorder();
$serializedBorder = serialize($border);
$isBorderAlreadyRegistered = isset($this->registeredBorders[$serializedBorder]);
if ($isBorderAlreadyRegistered) {
$registeredStyleId = $this->registeredBorders[$serializedBorder];
$registeredBorderId = $this->styleIdToBorderMappingTable[$registeredStyleId];
$this->styleIdToBorderMappingTable[$styleId] = $registeredBorderId;
} else {
$this->registeredBorders[$serializedBorder] = $styleId;
$this->styleIdToBorderMappingTable[$styleId] = count($this->registeredBorders);
}
} else {
// If no border should be applied - the mapping is the default border: 0
$this->styleIdToBorderMappingTable[$styleId] = 0;
}
}
/**
* For empty cells, we can specify a style or not. If no style are specified,
* then the software default will be applied. But sometimes, it may be useful
* to override this default style, for instance if the cell should have a
* background color different than the default one or some borders
* (fonts property don't really matter here).
*
* @param int $styleId
* @return bool Whether the cell should define a custom style
*/
public function shouldApplyStyleOnEmptyCell($styleId)
{
$hasStyleCustomFill = (isset($this->styleIdToFillMappingTable[$styleId]) && $this->styleIdToFillMappingTable[$styleId] !== 0);
$hasStyleCustomBorders = (isset($this->styleIdToBorderMappingTable[$styleId]) && $this->styleIdToBorderMappingTable[$styleId] !== 0);
return ($hasStyleCustomFill || $hasStyleCustomBorders);
}
/**
* Returns the content of the "styles.xml" file, given a list of styles.
*
@ -84,13 +210,28 @@ EOD;
*/
protected function getFillsSectionContent()
{
return <<<EOD
<fills count="1">
<fill>
<patternFill patternType="none"/>
</fill>
</fills>
EOD;
// Excel reserves two default fills
$fillsCount = count($this->registeredFills) + 2;
$content = sprintf('<fills count="%d">', $fillsCount);
$content .= '<fill><patternFill patternType="none"/></fill>';
$content .= '<fill><patternFill patternType="gray125"/></fill>';
// The other fills are actually registered by setting a background color
foreach ($this->registeredFills as $styleId) {
/** @var Style $style */
$style = $this->styleIdToStyleMappingTable[$styleId];
$backgroundColor = $style->getBackgroundColor();
$content .= sprintf(
'<fill><patternFill patternType="solid"><fgColor rgb="%s"/></patternFill></fill>',
$backgroundColor
);
}
$content .= '</fills>';
return $content;
}
/**
@ -100,17 +241,39 @@ EOD;
*/
protected function getBordersSectionContent()
{
return <<<EOD
<borders count="1">
<border>
<left/>
<right/>
<top/>
<bottom/>
<diagonal/>
</border>
</borders>
EOD;
// There is one default border with index 0
$borderCount = count($this->registeredBorders) + 1;
$content = '<borders count="' . $borderCount . '">';
// Default border starting at index 0
$content .= '<border><left/><right/><top/><bottom/></border>';
foreach ($this->registeredBorders as $styleId) {
/** @var \Box\Spout\Writer\Style\Style $style */
$style = $this->styleIdToStyleMappingTable[$styleId];
$border = $style->getBorder();
$content .= '<border>';
// @link https://github.com/box/spout/issues/271
$sortOrder = ['left', 'right', 'top', 'bottom'];
foreach ($sortOrder as $partName) {
if ($border->hasPart($partName)) {
/** @var $part \Box\Spout\Writer\Style\BorderPart */
$part = $border->getPart($partName);
$content .= BorderHelper::serializeBorderPart($part);
}
}
$content .= '</border>';
}
$content .= '</borders>';
return $content;
}
/**
@ -139,12 +302,18 @@ EOD;
$content = '<cellXfs count="' . count($registeredStyles) . '">';
foreach ($registeredStyles as $style) {
$content .= '<xf numFmtId="0" fontId="' . $style->getId() . '" fillId="0" borderId="0" xfId="0"';
$styleId = $style->getId();
$fillId = $this->styleIdToFillMappingTable[$styleId];
$borderId = $this->styleIdToBorderMappingTable[$styleId];
$content .= '<xf numFmtId="0" fontId="' . $styleId . '" fillId="' . $fillId . '" borderId="' . $borderId . '" xfId="0"';
if ($style->shouldApplyFont()) {
$content .= ' applyFont="1"';
}
$content .= sprintf(' applyBorder="%d"', $style->shouldApplyBorder() ? 1 : 0);
if ($style->shouldWrapText()) {
$content .= ' applyAlignment="1">';
$content .= '<alignment wrapText="1"/>';

View File

@ -86,7 +86,7 @@ class Workbook extends AbstractWorkbook
$sheet = new Sheet($newSheetIndex);
$worksheetFilesFolder = $this->fileSystemHelper->getXlWorksheetsFolder();
$worksheet = new Worksheet($sheet, $worksheetFilesFolder, $this->sharedStringsHelper, $this->shouldUseInlineStrings);
$worksheet = new Worksheet($sheet, $worksheetFilesFolder, $this->sharedStringsHelper, $this->styleHelper, $this->shouldUseInlineStrings);
$this->worksheets[] = $worksheet;
return $worksheet;

View File

@ -30,6 +30,9 @@ EOD;
/** @var \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper Helper to write shared strings */
protected $sharedStringsHelper;
/** @var \Box\Spout\Writer\XLSX\Helper\StyleHelper Helper to work with styles */
protected $styleHelper;
/** @var bool Whether inline or shared strings should be used */
protected $shouldUseInlineStrings;
@ -46,17 +49,19 @@ EOD;
* @param \Box\Spout\Writer\Common\Sheet $externalSheet The associated "external" sheet
* @param string $worksheetFilesFolder Temporary folder where the files to create the XLSX will be stored
* @param \Box\Spout\Writer\XLSX\Helper\SharedStringsHelper $sharedStringsHelper Helper for shared strings
* @param \Box\Spout\Writer\XLSX\Helper\StyleHelper Helper to work with styles
* @param bool $shouldUseInlineStrings Whether inline or shared strings should be used
* @throws \Box\Spout\Common\Exception\IOException If the sheet data file cannot be opened for writing
*/
public function __construct($externalSheet, $worksheetFilesFolder, $sharedStringsHelper, $shouldUseInlineStrings)
public function __construct($externalSheet, $worksheetFilesFolder, $sharedStringsHelper, $styleHelper, $shouldUseInlineStrings)
{
$this->externalSheet = $externalSheet;
$this->sharedStringsHelper = $sharedStringsHelper;
$this->styleHelper = $styleHelper;
$this->shouldUseInlineStrings = $shouldUseInlineStrings;
/** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
$this->stringsEscaper = new \Box\Spout\Common\Escaper\XLSX();
$this->stringsEscaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
$this->worksheetFilePath = $worksheetFilesFolder . '/' . strtolower($this->externalSheet->getName()) . '.xml';
$this->startSheet();
@ -134,29 +139,7 @@ EOD;
$rowXML = '<row r="' . $rowIndex . '" spans="1:' . $numCells . '">';
foreach($dataRow as $cellValue) {
$columnIndex = CellHelper::getCellIndexFromColumnIndex($cellNumber);
$cellXML = '<c r="' . $columnIndex . $rowIndex . '"';
$cellXML .= ' s="' . $style->getId() . '"';
if (CellHelper::isNonEmptyString($cellValue)) {
if ($this->shouldUseInlineStrings) {
$cellXML .= ' t="inlineStr"><is><t>' . $this->stringsEscaper->escape($cellValue) . '</t></is></c>';
} else {
$sharedStringId = $this->sharedStringsHelper->writeString($cellValue);
$cellXML .= ' t="s"><v>' . $sharedStringId . '</v></c>';
}
} else if (CellHelper::isBoolean($cellValue)) {
$cellXML .= ' t="b"><v>' . intval($cellValue) . '</v></c>';
} else if (CellHelper::isNumeric($cellValue)) {
$cellXML .= '><v>' . $cellValue . '</v></c>';
} else if (empty($cellValue)) {
// don't write empty cells (not appending to $cellXML is the right behavior!)
$cellXML = '';
} else {
throw new InvalidArgumentException('Trying to add a value with an unsupported type: ' . gettype($cellValue));
}
$rowXML .= $cellXML;
$rowXML .= $this->getCellXML($rowIndex, $cellNumber, $cellValue, $style->getId());
$cellNumber++;
}
@ -171,6 +154,48 @@ EOD;
$this->lastWrittenRowIndex++;
}
/**
* Build and return xml for a single cell.
*
* @param int $rowIndex
* @param int $cellNumber
* @param mixed $cellValue
* @param int $styleId
* @return string
* @throws InvalidArgumentException
*/
private function getCellXML($rowIndex, $cellNumber, $cellValue, $styleId)
{
$columnIndex = CellHelper::getCellIndexFromColumnIndex($cellNumber);
$cellXML = '<c r="' . $columnIndex . $rowIndex . '"';
$cellXML .= ' s="' . $styleId . '"';
if (CellHelper::isNonEmptyString($cellValue)) {
if ($this->shouldUseInlineStrings) {
$cellXML .= ' t="inlineStr"><is><t>' . $this->stringsEscaper->escape($cellValue) . '</t></is></c>';
} else {
$sharedStringId = $this->sharedStringsHelper->writeString($cellValue);
$cellXML .= ' t="s"><v>' . $sharedStringId . '</v></c>';
}
} else if (CellHelper::isBoolean($cellValue)) {
$cellXML .= ' t="b"><v>' . intval($cellValue) . '</v></c>';
} else if (CellHelper::isNumeric($cellValue)) {
$cellXML .= '><v>' . $cellValue . '</v></c>';
} else if (empty($cellValue)) {
if ($this->styleHelper->shouldApplyStyleOnEmptyCell($styleId)) {
$cellXML .= '/>';
} else {
// don't write empty cells that do no need styling
// NOTE: not appending to $cellXML is the right behavior!!
$cellXML = '';
}
} else {
throw new InvalidArgumentException('Trying to add a value with an unsupported type: ' . gettype($cellValue));
}
return $cellXML;
}
/**
* Closes the worksheet
*
@ -178,6 +203,10 @@ EOD;
*/
public function close()
{
if (!is_resource($this->sheetFilePointer)) {
return;
}
fwrite($this->sheetFilePointer, '</sheetData>');
fwrite($this->sheetFilePointer, '</worksheet>');
fclose($this->sheetFilePointer);