diff --git a/phpunit.xml.dist b/phpunit.xml.dist index f432b02e32..ddbe4bd080 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -28,6 +28,7 @@ ms-files ms-required external-http + html-api-html5lib-tests diff --git a/tests/phpunit/data/html5lib-tests/.gitattributes b/tests/phpunit/data/html5lib-tests/.gitattributes new file mode 100644 index 0000000000..b23807b2c3 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/.gitattributes @@ -0,0 +1 @@ +*.dat -text diff diff --git a/tests/phpunit/data/html5lib-tests/AUTHORS.rst b/tests/phpunit/data/html5lib-tests/AUTHORS.rst new file mode 100644 index 0000000000..4a7de17ad4 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/AUTHORS.rst @@ -0,0 +1,34 @@ +Credits +======= + +The ``html5lib`` test data is maintained by: + +- James Graham +- Geoffrey Sneddon + + +Contributors +------------ + +- Adam Barth +- Andi Sidwell +- Anne van Kesteren +- David Flanagan +- Edward Z. Yang +- Geoffrey Sneddon +- Henri Sivonen +- Ian Hickson +- Jacques Distler +- James Graham +- Lachlan Hunt +- lantis63 +- Mark Pilgrim +- Mats Palmgren +- Ms2ger +- Nolan Waite +- Philip Taylor +- Rafael Weinstein +- Ryan King +- Sam Ruby +- Simon Pieters +- Thomas Broyer diff --git a/tests/phpunit/data/html5lib-tests/LICENSE b/tests/phpunit/data/html5lib-tests/LICENSE new file mode 100644 index 0000000000..8812371b41 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and +other contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/tests/phpunit/data/html5lib-tests/README.md b/tests/phpunit/data/html5lib-tests/README.md new file mode 100644 index 0000000000..be775c8b49 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/README.md @@ -0,0 +1,25 @@ +# html5lib-tests + +This directory contains a third-party test suite used for testing the WordPress HTML API. + +`html5lib-tests` can be found on GitHub at [html5lib/html5lib-tests](https://github.com/html5lib/html5lib-tests). + +The necessary files have been copied to this directory: + +- `AUTHORS.rst` +- `LICENSE` +- `README.md` +- `tree-construction/README.md` +- `tree-construction/*.dat` + +The version of these files was taken from the git commit with +SHA [`a9f44960a9fedf265093d22b2aa3c7ca123727b9`](https://github.com/html5lib/html5lib-tests/commit/a9f44960a9fedf265093d22b2aa3c7ca123727b9). + +## Updating + +If there have been changes to the html5lib-tests repository, this test suite can be updated. In +order to update: + +1. Check out the latest version of git repository mentioned above. +1. Copy the files listed above into this directory. +1. Update the SHA mentioned in this README file with the new html5lib-tests SHA. diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/README.md b/tests/phpunit/data/html5lib-tests/tree-construction/README.md new file mode 100644 index 0000000000..4737a3a867 --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/tree-construction/README.md @@ -0,0 +1,108 @@ +Tree Construction Tests +======================= + +Each file containing tree construction tests consists of any number of +tests separated by two newlines (LF) and a single newline before the end +of the file. For instance: + + [TEST]LF + LF + [TEST]LF + LF + [TEST]LF + +Where [TEST] is the following format: + +Each test must begin with a string "\#data" followed by a newline (LF). +All subsequent lines until a line that says "\#errors" are the test data +and must be passed to the system being tested unchanged, except with the +final newline (on the last line) removed. + +Then there must be a line that says "\#errors". It must be followed by +one line per parse error that a conformant checker would return. It +doesn't matter what those lines are, although they can't be +"\#new-errors", "\#document-fragment", "\#document", "\#script-off", +"\#script-on", or empty, the only thing that matters is that there be +the right number of parse errors. + +Then there \*may\* be a line that says "\#new-errors", which works like +the "\#errors" section adding more errors to the expected number of +errors. + +Then there \*may\* be a line that says "\#document-fragment", which must +be followed by a newline (LF), followed by a string of characters that +indicates the context element, followed by a newline (LF). If the string +of characters starts with "svg ", the context element is in the SVG +namespace and the substring after "svg " is the local name. If the +string of characters starts with "math ", the context element is in the +MathML namespace and the substring after "math " is the local name. +Otherwise, the context element is in the HTML namespace and the string +is the local name. If this line is present the "\#data" must be parsed +using the HTML fragment parsing algorithm with the context element as +context. + +Then there \*may\* be a line that says "\#script-off" or +"\#script-on". If a line that says "\#script-off" is present, the +parser must set the scripting flag to disabled. If a line that says +"\#script-on" is present, it must set it to enabled. Otherwise, the +test should be run in both modes. + +Then there must be a line that says "\#document", which must be followed +by a dump of the tree of the parsed DOM. Each node must be represented +by a single line. Each line must start with "| ", followed by two spaces +per parent node that the node has before the root document node. + +- Element nodes must be represented by a "`<`" then the *tag name + string* "`>`", and all the attributes must be given, sorted + lexicographically by UTF-16 code unit according to their *attribute + name string*, on subsequent lines, as if they were children of the + element node. +- Attribute nodes must have the *attribute name string*, then an "=" + sign, then the attribute value in double quotes ("). +- Text nodes must be the string, in double quotes. Newlines aren't + escaped. +- Comments must be "`<`" then "`!-- `" then the data then "` -->`". +- DOCTYPEs must be "``". +- Processing instructions must be "``". (The HTML parser cannot emit + processing instructions, but scripts can, and the WebVTT to DOM + rules can emit them.) +- Template contents are represented by the string "content" with the + children below it. + +The *tag name string* is the local name prefixed by a namespace +designator. For the HTML namespace, the namespace designator is the +empty string, i.e. there's no prefix. For the SVG namespace, the +namespace designator is "svg ". For the MathML namespace, the namespace +designator is "math ". + +The *attribute name string* is the local name prefixed by a namespace +designator. For no namespace, the namespace designator is the empty +string, i.e. there's no prefix. For the XLink namespace, the namespace +designator is "xlink ". For the XML namespace, the namespace designator +is "xml ". For the XMLNS namespace, the namespace designator is "xmlns +". Note the difference between "xlink:href" which is an attribute in no +namespace with the local name "xlink:href" and "xlink href" which is an +attribute in the xlink namespace with the local name "href". + +If there is also a "\#document-fragment" the bit following "\#document" +must be a representation of the HTML fragment serialization for the +context element given by "\#document-fragment". + +For example: + + #data +

One

Two + #errors + 3: Missing document type declaration + #document + | + | + | + |

+ | "One" + |

+ | "Two" diff --git a/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat b/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat new file mode 100644 index 0000000000..38f98efded --- /dev/null +++ b/tests/phpunit/data/html5lib-tests/tree-construction/adoption01.dat @@ -0,0 +1,354 @@ +#data +

+#errors +(1,3): expected-doctype-but-got-start-tag +(1,10): adoption-agency-1.3 +#document +| +| +| +| +|

+| + +#data +1

23

+#errors +(1,3): expected-doctype-but-got-start-tag +(1,12): adoption-agency-1.3 +#document +| +| +| +| +| "1" +|

+| +| "2" +| "3" + +#data +1 +#errors +(1,3): expected-doctype-but-got-start-tag +(1,17): adoption-agency-1.3 +#document +| +| +| +| +| "1" +|