1
0
mirror of https://github.com/minimaxir/big-list-of-naughty-strings.git synced 2025-09-29 07:09:01 +02:00

Merge pull request #125 from zackw/add-c0-and-c1-controls

Add control characters and whitespace
This commit is contained in:
Max Woolf
2017-01-19 22:45:09 -08:00
committed by GitHub
5 changed files with 2717 additions and 2053 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -91,6 +91,12 @@
",./;'[]\\-=", ",./;'[]\\-=",
"<>?:\"{}|_+", "<>?:\"{}|_+",
"!@#$%^&*()`~", "!@#$%^&*()`~",
"\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f",
"€‚ƒ„†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ",
"\t\u000b\f …  ",
"­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪‫‬‭‮⁠⁡⁢⁣⁤⁦⁧⁨⁩𑂽𛲠𛲡𛲢𛲣𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺󠀁󠀠󠀡󠀢󠀣󠀤󠀥󠀦󠀧󠀨󠀩󠀪󠀫󠀬󠀭󠀮󠀯󠀰󠀱󠀲󠀳󠀴󠀵󠀶󠀷󠀸󠀹󠀺󠀻󠀼󠀽󠀾󠀿󠁀󠁁󠁂󠁃󠁄󠁅󠁆󠁇󠁈󠁉󠁊󠁋󠁌󠁍󠁎󠁏󠁐󠁑󠁒󠁓󠁔󠁕󠁖󠁗󠁘󠁙󠁚󠁛󠁜󠁝󠁞󠁟󠁠󠁡󠁢󠁣󠁤󠁥󠁦󠁧󠁨󠁩󠁪󠁫󠁬󠁭󠁮󠁯󠁰󠁱󠁲󠁳󠁴󠁵󠁶󠁷󠁸󠁹󠁺󠁻󠁼󠁽󠁾󠁿",
"",
"￾",
"Ω≈ç√∫˜µ≤≥÷", "Ω≈ç√∫˜µ≤≥÷",
"åß∂ƒ©˙∆˚¬…æ", "åß∂ƒ©˙∆˚¬…æ",
"œ∑´®†¥¨ˆøπ“‘", "œ∑´®†¥¨ˆøπ“‘",
@@ -157,7 +163,7 @@
"הָיְתָהtestالصفحات التّحول", "הָיְתָהtestالصفحات التّحول",
"﷽", "﷽",
"ﷺ", "ﷺ",
"مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، ", "مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، ",
"", "",
"", "",
"", "",

View File

@@ -98,12 +98,46 @@ INF
# Special Characters # Special Characters
# #
# Strings which contain common special ASCII characters (may need to be escaped) # ASCII punctuation. All of these characters may need to be escaped in some
# contexts. Divided into three groups based on (US-layout) keyboard position.
,./;'[]\-= ,./;'[]\-=
<>?:"{}|_+ <>?:"{}|_+
!@#$%^&*()`~ !@#$%^&*()`~
# Non-whitespace C0 controls: U+0001 through U+0008, U+000E through U+001F,
# and U+007F (DEL)
# Often forbidden to appear in various text-based file formats (e.g. XML),
# or reused for internal delimiters on the theory that they should never
# appear in input.
# The next line may appear to be blank or mojibake in some viewers.

# Non-whitespace C1 controls: U+0080 through U+0084 and U+0086 through U+009F.
# Commonly misinterpreted as additional graphic characters.
# The next line may appear to be blank, mojibake, or dingbats in some viewers.
€‚ƒ„†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ
# Whitespace: all of the characters with category Zs, Zl, or Zp (in Unicode
# version 8.0.0), plus U+0009 (HT), U+000B (VT), U+000C (FF), U+0085 (NEL),
# and U+200B (ZERO WIDTH SPACE), which are in the C categories but are often
# treated as whitespace in some contexts.
# This file unfortunately cannot express strings containing
# U+0000, U+000A, or U+000D (NUL, LF, CR).
# The next line may appear to be blank or mojibake in some viewers.
# The next line may be flagged for "trailing whitespace" in some viewers.
…   
# Unicode additional control characters: all of the characters with
# general category Cf (in Unicode 8.0.0).
# The next line may appear to be blank or mojibake in some viewers.
­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪‫‬‭‮⁠⁡⁢⁣⁤⁦⁧⁨⁩𑂽𛲠𛲡𛲢𛲣𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺󠀁󠀠󠀡󠀢󠀣󠀤󠀥󠀦󠀧󠀨󠀩󠀪󠀫󠀬󠀭󠀮󠀯󠀰󠀱󠀲󠀳󠀴󠀵󠀶󠀷󠀸󠀹󠀺󠀻󠀼󠀽󠀾󠀿󠁀󠁁󠁂󠁃󠁄󠁅󠁆󠁇󠁈󠁉󠁊󠁋󠁌󠁍󠁎󠁏󠁐󠁑󠁒󠁓󠁔󠁕󠁖󠁗󠁘󠁙󠁚󠁛󠁜󠁝󠁞󠁟󠁠󠁡󠁢󠁣󠁤󠁥󠁦󠁧󠁨󠁩󠁪󠁫󠁬󠁭󠁮󠁯󠁰󠁱󠁲󠁳󠁴󠁵󠁶󠁷󠁸󠁹󠁺󠁻󠁼󠁽󠁾󠁿
# "Byte order marks", U+FEFF and U+FFFE, each on its own line.
# The next two lines may appear to be blank or mojibake in some viewers.

# Unicode Symbols # Unicode Symbols
# #
# Strings which contain common unicode symbols (e.g. smart quotes) # Strings which contain common unicode symbols (e.g. smart quotes)
@@ -223,19 +257,6 @@ __ロ(,_,*)
مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،
# Unicode Spaces
#
# Strings which contain unicode space characters with special properties (c.f. https://www.cs.tut.fi/~jkorpela/chars/spaces.html)
 

# Trick Unicode # Trick Unicode
# #
# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf) # Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf)

File diff suppressed because it is too large Load Diff