1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-22 14:13:01 +02:00

tests: Add tests for several different ways to extract the title

This commit is contained in:
Cristian
2020-10-30 08:04:26 -05:00
parent aede134ab3
commit e7e33ea7a5
4 changed files with 761 additions and 4 deletions

View File

@@ -1,3 +1,6 @@
import os
import sqlite3
from .fixtures import *
def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
@@ -6,8 +9,50 @@ def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractor
Unencoded content should not be rendered as it facilitates xss injections
and breaks the layout.
"""
subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'],
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/title_with_html.com.html'],
capture_output=True, env=disable_extractors_dict)
list_process = subprocess.run(["archivebox", "list", "--html"], capture_output=True)
assert "<textarea>" not in list_process.stdout.decode("utf-8")
assert "<textarea>" not in list_process.stdout.decode("utf-8")
def test_title_in_meta_title(tmp_path, process, disable_extractors_dict):
add_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/title_with_html.com.html"],
capture_output=True, env=disable_extractors_dict)
os.chdir(tmp_path)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("SELECT title from core_snapshot")
snapshot = c.fetchone()
conn.close()
assert snapshot[0] == "It All Starts with a Humble <textarea> ◆ 24 ways"
def test_title_in_meta_og(tmp_path, process, disable_extractors_dict):
add_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/title_og_with_html.com.html"],
capture_output=True, env=disable_extractors_dict)
os.chdir(tmp_path)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("SELECT title from core_snapshot")
snapshot = c.fetchone()
conn.close()
assert snapshot[0] == "It All Starts with a Humble <textarea>"
def test_title_malformed(tmp_path, process, disable_extractors_dict):
add_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/malformed.html"],
capture_output=True, env=disable_extractors_dict)
os.chdir(tmp_path)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("SELECT title from core_snapshot")
snapshot = c.fetchone()
conn.close()
assert snapshot[0] == "malformed document"