From 245b5bdb27245cf92d43f2b62c5b2c4429cb7913 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 11 Jun 2008 01:21:36 +0000 Subject: [PATCH] Merged r1746: Length and UnitConverter implementation. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1783 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 4 +- library/HTMLPurifier/Length.php | 69 ++++++++++ library/HTMLPurifier/UnitConverter.php | 168 +++++++++++++++++++++++ tests/HTMLPurifier/LengthTest.php | 51 +++++++ tests/HTMLPurifier/UnitConverterTest.php | 44 ++++++ tests/test_files.php | 2 + 6 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 library/HTMLPurifier/Length.php create mode 100644 library/HTMLPurifier/UnitConverter.php create mode 100644 tests/HTMLPurifier/LengthTest.php create mode 100644 tests/HTMLPurifier/UnitConverterTest.php diff --git a/NEWS b/NEWS index e9d6a2df..846d37f1 100644 --- a/NEWS +++ b/NEWS @@ -15,7 +15,9 @@ ERRATA HTML Purifier. 2.1.5, unknown release date - +. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient + handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses + this class. 2.1.4, released 2008-05-18 ! DefinitionCacheFactory now can register new implementations diff --git a/library/HTMLPurifier/Length.php b/library/HTMLPurifier/Length.php new file mode 100644 index 00000000..65d77cd8 --- /dev/null +++ b/library/HTMLPurifier/Length.php @@ -0,0 +1,69 @@ +n = $n; + $this->unit = $u; + } + + /** + * @param string $s Unit string, like '2em' or '3.4in' + * @warning Does not perform validation. + */ + function make($s) { + $n_length = strspn($s, '1234567890.+-'); + $n = substr($s, 0, $n_length); + $unit = substr($s, $n_length); + if ($unit === '') $unit = false; + return new HTMLPurifier_Length($n, $unit); + } + + /** + * Validates the number and unit. + * @param bool $non_negative Whether or not to disable negative values. + * @note Maybe should be put in another class. + */ + function validate($non_negative = false, $config, $context) { + // Special case: + static $allowedUnits = array( + 'em' => true, 'ex' => true, 'px' => true, 'in' => true, + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true + ); + if ($this->n === '0' && $this->unit === false) return true; + if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); + if (!isset($allowedUnits[$this->unit])) return false; + $def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); + $result = $def->validate($this->n, $config, $context); + if ($result === false) return false; + $this->n = $result; + return true; + } + + /** + * Returns string representation of number. + */ + function toString() { + return $this->n . $this->unit; + } + +} diff --git a/library/HTMLPurifier/UnitConverter.php b/library/HTMLPurifier/UnitConverter.php new file mode 100644 index 00000000..d506c3f0 --- /dev/null +++ b/library/HTMLPurifier/UnitConverter.php @@ -0,0 +1,168 @@ +outputPrecision = $output_precision; + $this->internalPrecision = $internal_precision; + } + + /** + * Converts a length object of one unit into another unit. + * @note + * About precision: This conversion function pays very special + * attention to the incoming precision of values and attempts + * to maintain a number of significant figure. Results are + * fairly accurate up to nine digits. Some caveats: + * - If a number is zero-padded as a result of this significant + * figure tracking, the zeroes will be eliminated. + * - If a number contains less than four sigfigs ($outputPrecision) + * and this causes some decimals to be excluded, those + * decimals will be added on. + * - Significant digits will be ignored for quantities greater + * than one. This is a limitation of BCMath and I don't + * feel like coding around it. + */ + function convert($length, $to_unit) { + + /** + * Units information array. Units are grouped into measuring systems + * (English, Metric), and are assigned an integer representing + * the conversion factor between that unit and the smallest unit in + * the system. Numeric indexes are actually magical constants that + * encode conversion data from one system to the next, with a O(n^2) + * constraint on memory (this is generally not a problem, since + * the number of measuring systems is small.) + */ + static $units = array( + 1 => array( + 'pt' => 1, + 'pc' => 12, + 'in' => 72, + 2 => array('pt', '0.352777778', 'mm'), + ), + 2 => array( + 'mm' => 1, + 'cm' => 10, + 1 => array('mm', '2.83464567', 'pt'), + ), + ); + + if ($length->n === '0' || $length->unit === false) { + return new HTMLPurifier_Length('0', $unit); + } + + $state = $dest = false; + foreach ($units as $k => $x) { + if (isset($x[$length->unit])) $state = $k; + if (isset($x[$to_unit])) $dest_state = $k; + } + if (!$state || !$dest_state) return false; + + $n = $length->n; + $unit = $length->unit; + + // Some calculations about the initial precision of the number; + // this will be useful when we need to do final rounding. + $log = (int) floor(log($n, 10)); + if (strpos($n, '.') === false) { + $sigfigs = strlen(trim($n, '0+-')); + } else { + $sigfigs = strlen(ltrim($n, '0+-')) - 1; // eliminate extra decimal character + } + if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision; + + // BCMath's internal precision deals only with decimals. Use + // our default if the initial number has no decimals, or increase + // it by how ever many decimals, thus, the number of guard digits + // will always be greater than or equal to internalPrecision. + $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision + + for ($i = 0; $i < 2; $i++) { + + // Determine what unit IN THIS SYSTEM we need to convert to + if ($dest_state === $state) { + // Simple conversion + $dest_unit = $to_unit; + } else { + // Convert to the smallest unit, pending a system shift + $dest_unit = $units[$state][$dest_state][0]; + } + + // Do the conversion if necessary + if ($dest_unit !== $unit) { + $factor = bcdiv($units[$state][$unit], $units[$state][$dest_unit], $cp); + $n = bcmul($n, $factor, $cp); + $unit = $dest_unit; + } + + // Output was zero, so bail out early + if ($n === '') { + $n = '0'; + $unit = $to_unit; + break; + } + + // It was a simple conversion, so bail out + if ($dest_state === $state) { + break; + } + + if ($i !== 0) { + // Conversion failed! Apparently, the system we forwarded + // to didn't have this unit. This should never happen! + return false; + } + + // Pre-condition: $i == 0 + + // Perform conversion to next system of units + $n = bcmul($n, $units[$state][$dest_state][1], $cp); + $unit = $units[$state][$dest_state][2]; + $state = $dest_state; + + // One more loop around to convert the unit in the new system. + + } + + // Post-condition: $unit == $to_unit + if ($unit !== $to_unit) return false; + + // Calculate how many decimals we need ($rp) + // Calculations will always be carried to the decimal; this is + // a limitation with BC (we can't set the scale to be negative) + $new_log = (int) floor(log($n, 10)); + + $rp = $sigfigs - $new_log - $log - 1; + if ($rp < 0) $rp = 0; + + $n = bcadd($n, '0.' . str_repeat('0', $rp) . '5', $rp + 1); + $n = bcdiv($n, '1', $rp); + if (strpos($n, '.') !== false) $n = rtrim($n, '0'); + $n = rtrim($n, '.'); + + return new HTMLPurifier_Length($n, $unit); + } + +} diff --git a/tests/HTMLPurifier/LengthTest.php b/tests/HTMLPurifier/LengthTest.php new file mode 100644 index 00000000..55ae8b8e --- /dev/null +++ b/tests/HTMLPurifier/LengthTest.php @@ -0,0 +1,51 @@ +assertIdentical($l->n, '23'); + $this->assertIdentical($l->unit, 'in'); + } + + function testMake() { + $l = HTMLPurifier_Length::make('+23.4in'); + $this->assertIdentical($l->n, '+23.4'); + $this->assertIdentical($l->unit, 'in'); + } + + function testToString() { + $l = new HTMLPurifier_Length('23', 'in'); + $this->assertIdentical($l->toString(), '23in'); + } + + function assertValidate($string, $expect = true, $disable_negative = false) { + if ($expect === true) $expect = $string; + $l = HTMLPurifier_Length::make($string); + $result = $l->validate($disable_negative, $this->config, $this->context); + if ($result === false) $this->assertIdentical($expect, false); + else $this->assertIdentical($l->toString(), $expect); + } + + function testValidate() { + $this->assertValidate('0'); + $this->assertValidate('0px'); + $this->assertValidate('4.5px'); + $this->assertValidate('-4.5px'); + $this->assertValidate('3ex'); + $this->assertValidate('3em'); + $this->assertValidate('3in'); + $this->assertValidate('3cm'); + $this->assertValidate('3mm'); + $this->assertValidate('3pt'); + $this->assertValidate('3pc'); + $this->assertValidate('3PX', '3px'); + $this->assertValidate('3', false); + $this->assertValidate('3miles', false); + $this->assertValidate('-3mm', false, true); // no-negatives + } + +} diff --git a/tests/HTMLPurifier/UnitConverterTest.php b/tests/HTMLPurifier/UnitConverterTest.php new file mode 100644 index 00000000..1abe58bd --- /dev/null +++ b/tests/HTMLPurifier/UnitConverterTest.php @@ -0,0 +1,44 @@ +convert($input, $expect->unit); + $this->assertIdentical($result, $expect); + } + + function testEnglish() { + $this->assertConversion('1in', '6pc'); + $this->assertConversion('6pc', '1in'); + + $this->assertConversion('1in', '72pt'); + $this->assertConversion('72pt', '1in'); + + $this->assertConversion('1pc', '12pt'); + $this->assertConversion('12pt', '1pc'); + + $this->assertConversion('1pt', '0.01389in'); + $this->assertConversion('1.000pt', '0.01389in'); + $this->assertConversion('100000pt', '1389in'); + } + + function testMetric() { + $this->assertConversion('1cm', '10mm'); + $this->assertConversion('10mm', '1cm'); + $this->assertConversion('1mm', '0.1cm'); + $this->assertConversion('100mm', '10cm'); + } + + function testEnglishMetric() { + $this->assertConversion('2.835pt', '1mm'); + $this->assertConversion('1mm', '2.835pt'); + $this->assertConversion('0.3937in', '1cm'); + } + +} diff --git a/tests/test_files.php b/tests/test_files.php index adb0df4c..1f976745 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -89,6 +89,7 @@ $test_files[] = 'HTMLPurifier/Injector/LinkifyTest.php'; $test_files[] = 'HTMLPurifier/Injector/PurifierLinkifyTest.php'; $test_files[] = 'HTMLPurifier/LanguageFactoryTest.php'; $test_files[] = 'HTMLPurifier/LanguageTest.php'; +$test_files[] = 'HTMLPurifier/LengthTest.php'; $test_files[] = 'HTMLPurifier/Lexer/DirectLexTest.php'; $test_files[] = 'HTMLPurifier/Lexer/DirectLex_ErrorsTest.php'; $test_files[] = 'HTMLPurifier/LexerTest.php'; @@ -108,6 +109,7 @@ $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_IDTest.php'; $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php'; $test_files[] = 'HTMLPurifier/TagTransformTest.php'; $test_files[] = 'HTMLPurifier/TokenTest.php'; +$test_files[] = 'HTMLPurifier/UnitConverterTest.php'; $test_files[] = 'HTMLPurifier/URIDefinitionTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php'; $test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';