mirror of
https://github.com/minimaxir/big-list-of-naughty-strings.git
synced 2025-09-25 13:19:15 +02:00
Merge pull request #125 from zackw/add-c0-and-c1-controls
Add control characters and whitespace
This commit is contained in:
1126
blns.base64.json
1126
blns.base64.json
File diff suppressed because it is too large
Load Diff
1186
blns.base64.txt
1186
blns.base64.txt
File diff suppressed because it is too large
Load Diff
@@ -91,6 +91,12 @@
|
||||
",./;'[]\\-=",
|
||||
"<>?:\"{}|_+",
|
||||
"!@#$%^&*()`~",
|
||||
"\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f",
|
||||
"",
|
||||
"\t\u000b\f
",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"Ω≈ç√∫˜µ≤≥÷",
|
||||
"åß∂ƒ©˙∆˚¬…æ",
|
||||
"œ∑´®†¥¨ˆøπ“‘",
|
||||
@@ -157,7 +163,7 @@
|
||||
"הָיְתָהtestالصفحات التّحول",
|
||||
"﷽",
|
||||
"ﷺ",
|
||||
"مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، ",
|
||||
"مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، ",
|
||||
"",
|
||||
" ",
|
||||
"",
|
||||
|
49
blns.txt
49
blns.txt
@@ -98,12 +98,46 @@ INF
|
||||
|
||||
# Special Characters
|
||||
#
|
||||
# Strings which contain common special ASCII characters (may need to be escaped)
|
||||
# ASCII punctuation. All of these characters may need to be escaped in some
|
||||
# contexts. Divided into three groups based on (US-layout) keyboard position.
|
||||
|
||||
,./;'[]\-=
|
||||
<>?:"{}|_+
|
||||
!@#$%^&*()`~
|
||||
|
||||
# Non-whitespace C0 controls: U+0001 through U+0008, U+000E through U+001F,
|
||||
# and U+007F (DEL)
|
||||
# Often forbidden to appear in various text-based file formats (e.g. XML),
|
||||
# or reused for internal delimiters on the theory that they should never
|
||||
# appear in input.
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
# Non-whitespace C1 controls: U+0080 through U+0084 and U+0086 through U+009F.
|
||||
# Commonly misinterpreted as additional graphic characters.
|
||||
# The next line may appear to be blank, mojibake, or dingbats in some viewers.
|
||||
|
||||
|
||||
# Whitespace: all of the characters with category Zs, Zl, or Zp (in Unicode
|
||||
# version 8.0.0), plus U+0009 (HT), U+000B (VT), U+000C (FF), U+0085 (NEL),
|
||||
# and U+200B (ZERO WIDTH SPACE), which are in the C categories but are often
|
||||
# treated as whitespace in some contexts.
|
||||
# This file unfortunately cannot express strings containing
|
||||
# U+0000, U+000A, or U+000D (NUL, LF, CR).
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
# The next line may be flagged for "trailing whitespace" in some viewers.
|
||||
|
||||
|
||||
# Unicode additional control characters: all of the characters with
|
||||
# general category Cf (in Unicode 8.0.0).
|
||||
# The next line may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
# "Byte order marks", U+FEFF and U+FFFE, each on its own line.
|
||||
# The next two lines may appear to be blank or mojibake in some viewers.
|
||||
|
||||
|
||||
|
||||
# Unicode Symbols
|
||||
#
|
||||
# Strings which contain common unicode symbols (e.g. smart quotes)
|
||||
@@ -223,19 +257,6 @@ __ロ(,_,*)
|
||||
ﷺ
|
||||
مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،
|
||||
|
||||
# Unicode Spaces
|
||||
#
|
||||
# Strings which contain unicode space characters with special properties (c.f. https://www.cs.tut.fi/~jkorpela/chars/spaces.html)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
␣
|
||||
␢
|
||||
␡
|
||||
|
||||
# Trick Unicode
|
||||
#
|
||||
# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf)
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user