. namespace core; /** * Tests our html2text hacks * * Note: includes original tests from testweblib.php * * @package core * @category test * @copyright 2012 Petr Skoda {@link http://skodak.org} * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later */ class html2text_test extends \basic_testcase { /** * ALT as image replacements. */ public function test_images() { $this->assertSame('[edit]', html_to_text('edit')); $text = 'xxsome gifxx'; $result = html_to_text($text, null, false, false); $this->assertSame($result, 'xx[some gif]xx'); } /** * No magic quotes messing. */ public function test_no_strip_slashes() { $this->assertSame('[\edit]', html_to_text('\edit')); $text = '\\magic\\quotes\\are\\\\horrible'; $result = html_to_text($text, null, false, false); $this->assertSame($result, $text); } /** * core_text integration. */ public function test_core_text() { $text = 'Žluťoučký koníček'; $result = html_to_text($text, null, false, false); $this->assertSame($result, 'ŽLUŤOUČKÝ KONÍČEK'); } /** * Protect 0. */ public function test_zero() { $text = '0'; $result = html_to_text($text, null, false, false); $this->assertSame($result, $text); $this->assertSame('0', html_to_text('0')); } /** * Test the links list enumeration. */ public function test_build_link_list() { // Note the trailing whitespace left intentionally in the text after first link. $text = 'Total of ' . ' 27 issues and some other have been fixed last week'; // Do not collect links. $result = html_to_text($text, 5000, false); $this->assertSame('Total of 27 ISSUES and some other have been fixed LAST WEEK', $result); // Collect and enumerate links. $result = html_to_text($text, 5000, true); $this->assertSame(0, strpos($result, 'Total of 27 ISSUES [1] and some [2] other have been fixed LAST WEEK [3]')); $this->assertSame(false, strpos($result, '[0]')); $this->assertSame(1, preg_match('|^'.preg_quote('[1] http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D').'$|m', $result)); $this->assertSame(1, preg_match('|^'.preg_quote('[2] http://another.url/?f=a&b=2').'$|m', $result)); $this->assertSame(1, preg_match('|^'.preg_quote('[3] http://third.url/view.php').'$|m', $result)); $this->assertSame(false, strpos($result, '[4]')); // Test multiple occurrences of the same URL. $text = '

See moodle.org, google, univ-lemans and google. Also try google via HTTPS.'; $result = html_to_text($text, 5000, true); $this->assertSame(0, strpos($result, 'See moodle.org [1], google [2], univ-lemans [3] and google [2]. Also try google via HTTPS [4].')); $this->assertSame(false, strpos($result, '[0]')); $this->assertSame(1, preg_match('|^'.preg_quote('[1] http://moodle.org').'$|m', $result)); $this->assertSame(1, preg_match('|^'.preg_quote('[2] http://www.google.fr').'$|m', $result)); $this->assertSame(1, preg_match('|^'.preg_quote('[3] http://www.univ-lemans.fr').'$|m', $result)); $this->assertSame(1, preg_match('|^'.preg_quote('[4] https://www.google.fr').'$|m', $result)); $this->assertSame(false, strpos($result, '[5]')); } /** * Various invalid HTML typed by users that ignore html strict. **/ public function test_invalid_html() { $text = 'Gin & Tonic'; $result = html_to_text($text, null, false, false); $this->assertSame($result, $text); $text = 'Gin > Tonic'; $result = html_to_text($text, null, false, false); $this->assertSame($result, $text); $text = 'Gin < Tonic'; $result = html_to_text($text, null, false, false); $this->assertSame($result, $text); } /** * Basic text formatting. */ public function test_simple() { $this->assertSame("_Hello_ WORLD!\n", html_to_text('

Hello world!

')); $this->assertSame("All the WORLD’S a stage.\n\n-- William Shakespeare\n", html_to_text('

All the world’s a stage.

-- William Shakespeare

')); $this->assertSame("HELLO WORLD!\n\n", html_to_text('

Hello world!

')); $this->assertSame("Hello\nworld!", html_to_text('Hello
world!')); } /** * Test line wrapping. */ public function test_text_nowrap() { $long = "Here is a long string, more than 75 characters long, since by default html_to_text wraps text at 75 chars."; $wrapped = "Here is a long string, more than 75 characters long, since by default\nhtml_to_text wraps text at 75 chars."; $this->assertSame($long, html_to_text($long, 0)); $this->assertSame($wrapped, html_to_text($long)); } /** * Whitespace removal. */ public function test_trailing_whitespace() { $this->assertSame('With trailing whitespace and some more text', html_to_text("With trailing whitespace \nand some more text", 0)); } /** * PRE parsing. */ public function test_html_to_text_pre_parsing_problem() { $strorig = 'Consider the following function:
void FillMeUp(char* in_string) {'.
            '
int i = 0;
while (in_string[i] != \'\0\') {
in_string[i] = \'X\';
i++;
}
'. '}
What would happen if a non-terminated string were input to this function?

'; // Note, the spaces in the
 section are Unicode NBSPs - they may not be displayed in your editor.
        $strconv = 'Consider the following function:

void FillMeUp(char* in_string) {
  int i = 0;
  while (in_string[i] != \'\0\') {
    in_string[i] = \'X\';
    i++;
  }
}
What would happen if a non-terminated string were input to this function?

';

        $this->assertSame($strconv, html_to_text($strorig));
    }

    /**
     * Scripts should be stripped.
     */
    public function test_strip_scripts() {
        $this->assertSame('Interesting text',
                html_to_text('Interesting  text', 0));
    }
}