mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-21 05:41:54 +02:00
Add htmltotext extractor
Saves HTML text nodes and selected element attributes in `htmltotext.txt` for each Snapshot. Primarily intended to be used for search indexing.
This commit is contained in:
@@ -17,6 +17,7 @@ def disable_extractors_dict():
|
||||
"USE_SINGLEFILE": "false",
|
||||
"USE_READABILITY": "false",
|
||||
"USE_MERCURY": "false",
|
||||
"SAVE_HTMLTOTEXT": "false",
|
||||
"SAVE_PDF": "false",
|
||||
"SAVE_SCREENSHOT": "false",
|
||||
"SAVE_DOM": "false",
|
||||
|
Reference in New Issue
Block a user