Add benchmark for transliteration

This commit is contained in:
Bjørn Erik Pedersen
2024-02-06 09:18:06 +01:00
parent bd0200da6e
commit a949a34673
2 changed files with 90 additions and 0 deletions

View File

@@ -23,6 +23,64 @@ import (
"golang.org/x/text/unicode/norm" "golang.org/x/text/unicode/norm"
) )
var transliteratePool = &sync.Pool{
New: func() any {
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)),
runes.Map(func(r rune) rune {
switch r {
case 'ą':
return 'a'
case 'ć':
return 'c'
case 'ę':
return 'e'
case 'ł':
return 'l'
case 'ń':
return 'n'
case 'ó':
return 'o'
case 'ś':
return 's'
case 'ż':
return 'z'
case 'ź':
return 'z'
case 'ø':
return 'o'
}
return r
}),
norm.NFC)
},
}
var transliterateMap = map[rune]rune{
'ą': 'a',
'ć': 'c',
'ę': 'e',
'ł': 'l',
'ń': 'n',
'ó': 'o',
'ś': 's',
'ż': 'z',
'ź': 'z',
'ø': 'o',
}
var transliteratePoolMap = &sync.Pool{
New: func() any {
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)),
runes.Map(func(r rune) rune {
if rr, ok := transliterateMap[r]; ok {
return rr
}
return r
}),
norm.NFC)
},
}
var accentTransformerPool = &sync.Pool{ var accentTransformerPool = &sync.Pool{
New: func() any { New: func() any {
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
@@ -47,6 +105,22 @@ func RemoveAccentsString(s string) string {
return s return s
} }
func TransliterateString(s string) string {
t := transliteratePool.Get().(transform.Transformer)
s, _, _ = transform.String(t, s)
t.Reset()
transliteratePool.Put(t)
return s
}
func TransliterateStringMap(s string) string {
t := transliteratePoolMap.Get().(transform.Transformer)
s, _, _ = transform.String(t, s)
t.Reset()
transliteratePoolMap.Put(t)
return s
}
// Chomp removes trailing newline characters from s. // Chomp removes trailing newline characters from s.
func Chomp(s string) string { func Chomp(s string) string {
return strings.TrimRightFunc(s, func(r rune) bool { return strings.TrimRightFunc(s, func(r rune) bool {

View File

@@ -70,3 +70,19 @@ func BenchmarkVisitLinesAfter(b *testing.B) {
}) })
} }
} }
func BenchmarkTransliterate(b *testing.B) {
s := "ƀ Ɓ Ƃ ƃ Ƅ ƅ Ɔ Ƈ ƈ Ɖ Ɗ Ƌ ƌ ƍ Ǝ Ə Ɛ Ƒ ƒ Ɠ Ɣ ƕ Ɩ Ɨ Ƙ ƙ ƚ ƛ Ɯ Ɲ ƞ Ɵ Ơ ơ Ƣ ƣ Ƥ ƥ Ʀ Ƨ ƨ Ʃ ƪ ƫ Ƭ ƭ Ʈ Ư ư Ʊ Ʋ Ƴ ƴ Ƶ ƶ Ʒ Ƹ ƹ ƺ ƻ Ƽ ƽ ƾ ƿ ǀ ǁ ǂ ǃ DŽ Dž dž LJ Lj lj NJ Nj nj Ǎ ǎ Ǐ ǐ Ǒ ǒ Ǔ ǔ Ǖ ǖ Ǘ ǘ Ǚ ǚ Ǜ ǜ ǝ Ǟ ǟ Ǡ ǡ Ǣ ǣ Ǥ ǥ Ǧ ǧ Ǩ ǩ Ǫ ǫ Ǭ ǭ Ǯ ǯ ǰ DZ Dz dz Ǵ ǵ Ǻ ǻ Ǽ ǽ Ǿ ǿ Ȁ ȁ Ȃ ȃ"
b.Run("switch", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = TransliterateString(s)
}
})
b.Run("map", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = TransliterateStringMap(s)
}
})
}