diff --git a/TODO b/TODO index b25cf9fc..dfe865fe 100644 --- a/TODO +++ b/TODO @@ -17,6 +17,8 @@ afraid to cast your vote for the next feature to be implemented! - Maintain old attribute data in tokens (configurable?) - Allow URIFilters to run early? - Lazy update of token when validating attributes? +- Investigate how early internal structures can be accessed; this would + prevent structures from being parsed and serialized multiple times. FUTURE VERSIONS --------------- diff --git a/library/HTMLPurifier/AttrDef/CSS/Number.php b/library/HTMLPurifier/AttrDef/CSS/Number.php index 3d4028ee..15153e4b 100644 --- a/library/HTMLPurifier/AttrDef/CSS/Number.php +++ b/library/HTMLPurifier/AttrDef/CSS/Number.php @@ -18,6 +18,10 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef $this->non_negative = $non_negative; } + /** + * @warning Some contexts do not pass $config, $context. These + * variables should not be used without checking HTMLPurifier_Length + */ public function validate($number, $config, $context) { $number = $this->parseCDATA($number); diff --git a/library/HTMLPurifier/Length.php b/library/HTMLPurifier/Length.php index 815d9f02..d3028f56 100644 --- a/library/HTMLPurifier/Length.php +++ b/library/HTMLPurifier/Length.php @@ -2,7 +2,7 @@ /** * Represents a measurable length, with a string numeric magnitude - * and a unit. + * and a unit. This object is immutable. */ class HTMLPurifier_Length { @@ -10,12 +10,17 @@ class HTMLPurifier_Length /** * String numeric magnitude. */ - public $n; + protected $n; /** * String unit. False is permitted if $n = 0. */ - public $unit; + protected $unit; + + /** + * Whether or not this length is valid. Null if not calculated yet. + */ + protected $isValid; /** * Lookup array of units recognized by CSS 2.1 @@ -30,8 +35,8 @@ class HTMLPurifier_Length * @param string $u Unit */ public function __construct($n = '0', $u = false) { - $this->n = $n; - $this->unit = $u; + $this->n = (string) $n; + $this->unit = $u !== false ? (string) $u : false; } /** @@ -51,13 +56,15 @@ class HTMLPurifier_Length * @param bool $non_negative Whether or not to disable negative values. * @note Maybe should be put in another class. */ - public function validate($non_negative = false, $config, $context) { + protected function validate($non_negative = false) { // Special case: + if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; if ($this->n === '0' && $this->unit === false) return true; if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; + // Hack: $def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); - $result = $def->validate($this->n, $config, $context); + $result = $def->validate($this->n, false, false); if ($result === false) return false; $this->n = $result; return true; @@ -67,7 +74,26 @@ class HTMLPurifier_Length * Returns string representation of number. */ public function toString() { + if (!$this->isValid()) return false; return $this->n . $this->unit; } + /** + * Retrieves string numeric magnitude. + */ + public function getN() {return $this->n;} + + /** + * Retrieves string unit. + */ + public function getUnit() {return $this->unit;} + + /** + * Returns true if this length unit is valid. + */ + public function isValid($non_negative = false) { + if ($this->isValid === null) $this->isValid = $this->validate($non_negative); + return $this->isValid; + } + } diff --git a/library/HTMLPurifier/UnitConverter.php b/library/HTMLPurifier/UnitConverter.php index f4d29037..3482021c 100644 --- a/library/HTMLPurifier/UnitConverter.php +++ b/library/HTMLPurifier/UnitConverter.php @@ -50,6 +50,11 @@ class HTMLPurifier_UnitConverter /** * Converts a length object of one unit into another unit. + * @param HTMLPurifier_Length $length + * Instance of HTMLPurifier_Length to convert. You must validate() + * it before passing it here! + * @param string $to_unit + * Unit to convert to. * @note * About precision: This conversion function pays very special * attention to the incoming precision of values and attempts @@ -60,39 +65,35 @@ class HTMLPurifier_UnitConverter * - If a number contains less than four sigfigs ($outputPrecision) * and this causes some decimals to be excluded, those * decimals will be added on. - * - Significant digits will be ignored for quantities greater - * than one. This is a limitation of BCMath and I don't - * feel like coding around it. */ public function convert($length, $to_unit) { - if ($length->n === '0' || $length->unit === false) { - return new HTMLPurifier_Length('0', $unit); + + if (!$length->isValid()) return false; + + $n = $length->getN(); + $unit = $length->getUnit(); + + if ($n === '0' || $unit === false) { + return new HTMLPurifier_Length('0', false); } - $state = $dest = false; + $state = $dest_state = false; foreach (self::$units as $k => $x) { - if (isset($x[$length->unit])) $state = $k; + if (isset($x[$unit])) $state = $k; if (isset($x[$to_unit])) $dest_state = $k; } if (!$state || !$dest_state) return false; - $n = $length->n; - $unit = $length->unit; - // Some calculations about the initial precision of the number; // this will be useful when we need to do final rounding. - $log = (int) floor(log($n, 10)); - if (strpos($n, '.') === false) { - $sigfigs = strlen(trim($n, '0+-')); - } else { - $sigfigs = strlen(ltrim($n, '0+-')) - 1; // eliminate extra decimal character - } + $sigfigs = $this->getSigFigs($n); if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision; // BCMath's internal precision deals only with decimals. Use // our default if the initial number has no decimals, or increase // it by how ever many decimals, thus, the number of guard digits // will always be greater than or equal to internalPrecision. + $log = (int) floor(log(abs($n), 10)); $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision for ($i = 0; $i < 2; $i++) { @@ -113,7 +114,7 @@ class HTMLPurifier_UnitConverter $unit = $dest_unit; } - // Output was zero, so bail out early + // Output was zero, so bail out early. Shouldn't ever happen. if ($n === '') { $n = '0'; $unit = $to_unit; @@ -148,17 +149,21 @@ class HTMLPurifier_UnitConverter // Calculate how many decimals we need ($rp) // Calculations will always be carried to the decimal; this is // a limitation with BC (we can't set the scale to be negative) - $new_log = (int) floor(log($n, 10)); + $new_log = (int) floor(log(abs($n), 10)); $rp = $sigfigs - $new_log - 1; - //echo "----\n"; - //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n"; + $neg = $n < 0 ? '-' : ''; + + // Useful for debugging: + //echo "
n"; + //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n\n"; + if ($rp >= 0) { - $n = bcadd($n, '0.' . str_repeat('0', $rp) . '5', $rp + 1); + $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1); $n = bcdiv($n, '1', $rp); } else { if ($new_log + 1 >= $sigfigs) { - $n = bcadd($n, '5' . str_repeat('0', $new_log - $sigfigs)); - $n = substr($n, 0, $sigfigs) . str_repeat('0', $new_log + 1 - $sigfigs); + $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs)); + $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log + 1 - $sigfigs); } } if (strpos($n, '.') !== false) $n = rtrim($n, '0'); @@ -167,4 +172,21 @@ class HTMLPurifier_UnitConverter return new HTMLPurifier_Length($n, $unit); } + /** + * Returns the number of significant figures in a string number. + * @param string $n Decimal number + * @return int number of sigfigs + */ + public function getSigFigs($n) { + $n = ltrim($n, '0+-'); + $dp = strpos($n, '.'); // decimal position + if ($dp === false) { + $sigfigs = strlen(rtrim($n, '0')); + } else { + $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character + if ($dp !== 0) $sigfigs--; + } + return $sigfigs; + } + } diff --git a/tests/HTMLPurifier/LengthTest.php b/tests/HTMLPurifier/LengthTest.php index eaa993cd..3a07a14b 100644 --- a/tests/HTMLPurifier/LengthTest.php +++ b/tests/HTMLPurifier/LengthTest.php @@ -5,14 +5,14 @@ class HTMLPurifier_LengthTest extends HTMLPurifier_Harness function testConstruct() { $l = new HTMLPurifier_Length('23', 'in'); - $this->assertIdentical($l->n, '23'); - $this->assertIdentical($l->unit, 'in'); + $this->assertIdentical($l->getN(), '23'); + $this->assertIdentical($l->getUnit(), 'in'); } function testMake() { $l = HTMLPurifier_Length::make('+23.4in'); - $this->assertIdentical($l->n, '+23.4'); - $this->assertIdentical($l->unit, 'in'); + $this->assertIdentical($l->getN(), '+23.4'); + $this->assertIdentical($l->getUnit(), 'in'); } function testToString() { @@ -23,13 +23,15 @@ class HTMLPurifier_LengthTest extends HTMLPurifier_Harness protected function assertValidate($string, $expect = true, $disable_negative = false) { if ($expect === true) $expect = $string; $l = HTMLPurifier_Length::make($string); - $result = $l->validate($disable_negative, $this->config, $this->context); + $result = $l->isValid($disable_negative); if ($result === false) $this->assertIdentical($expect, false); else $this->assertIdentical($l->toString(), $expect); } function testValidate() { $this->assertValidate('0'); + $this->assertValidate('+0', '0'); + $this->assertValidate('-0', '0'); $this->assertValidate('0px'); $this->assertValidate('4.5px'); $this->assertValidate('-4.5px'); diff --git a/tests/HTMLPurifier/UnitConverterTest.php b/tests/HTMLPurifier/UnitConverterTest.php index 80ba70dc..1a8b8700 100644 --- a/tests/HTMLPurifier/UnitConverterTest.php +++ b/tests/HTMLPurifier/UnitConverterTest.php @@ -3,12 +3,35 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness { - protected function assertConversion($input, $expect) { - $input = HTMLPurifier_Length::make($input); - $expect = HTMLPurifier_Length::make($expect); + protected function assertConversion($input, $expect, $unit = null, $test_negative = true) { + $length = HTMLPurifier_Length::make($input); + if ($expect !== false) $expectl = HTMLPurifier_Length::make($expect); + else $expectl = false; $converter = new HTMLPurifier_UnitConverter(); - $result = $converter->convert($input, $expect->unit); - $this->assertIdentical($result, $expect); + $result = $converter->convert($length, $unit !== null ? $unit : $expectl->getUnit()); + $this->assertIdentical($result, $expectl); + if ($test_negative) { + $this->assertConversion( + "-$input", + $expect === false ? false : "-$expect", + $unit, + false + ); + } + } + + function testFail() { + $this->assertConversion('1in', false, 'foo'); + $this->assertConversion('1foo', false, 'in'); + } + + function testZero() { + $this->assertConversion('0', '0', 'in', false); + $this->assertConversion('-0', '0', 'in', false); + $this->assertConversion('0in', '0', 'in', false); + $this->assertConversion('-0in', '0', 'in', false); + $this->assertConversion('0in', '0', 'pt', false); + $this->assertConversion('-0in', '0', 'pt', false); } function testEnglish() { @@ -39,7 +62,8 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness $this->assertConversion('0.3937in', '1cm'); } - function testRounding() { + function testRoundingMinPrecision() { + // One sig-fig, modified to be four, conversion rounds up $this->assertConversion('100pt', '1.389in'); $this->assertConversion('1000pt', '13.89in'); $this->assertConversion('10000pt', '138.9in'); @@ -47,4 +71,43 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness $this->assertConversion('1000000pt', '13890in'); } + function testRoundingUserPrecision() { + // Five sig-figs, conversion rounds down + $this->assertConversion('11112000pt', '154330in'); + $this->assertConversion('1111200pt', '15433in'); + $this->assertConversion('111120pt', '1543.3in'); + $this->assertConversion('11112pt', '154.33in'); + $this->assertConversion('1111.2pt', '15.433in'); + $this->assertConversion('111.12pt', '1.5433in'); + $this->assertConversion('11.112pt', '0.15433in'); + } + + protected function assertSigFig($n, $sigfigs) { + $converter = new HTMLPurifier_UnitConverter(); + $result = $converter->getSigFigs($n); + $this->assertIdentical($result, $sigfigs); + } + + function test_getSigFigs() { + $this->assertSigFig('0', 0); + $this->assertSigFig('1', 1); + $this->assertSigFig('-1', 1); + $this->assertSigFig('+1', 1); + $this->assertSigFig('01', 1); + $this->assertSigFig('001', 1); + $this->assertSigFig('12', 2); + $this->assertSigFig('012', 2); + $this->assertSigFig('10', 1); + $this->assertSigFig('10.', 2); + $this->assertSigFig('100.', 3); + $this->assertSigFig('103', 3); + $this->assertSigFig('130', 2); + $this->assertSigFig('.1', 1); + $this->assertSigFig('0.1', 1); + $this->assertSigFig('00.1', 1); + $this->assertSigFig('0.01', 1); + $this->assertSigFig('0.010', 2); + $this->assertSigFig('0.012', 2); + } + }