mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-28 22:19:59 +02:00
WordCount and Summary support CJK Language
* add global `hasCJKLanguage` flag, if true, turn on auto-detecting CJKLanguage * add `isCJKLanguage` frontmatter to force specify whether is CJKLanguage or not * For .Summary: If isCJKLanguage is true, use the runes as basis for truncation, else keep as today. * For WordCount: If isCJKLanguage is true, use the runes as basis for calculation, else keep as today. * Unexport RuneCount Fixes #1377
This commit is contained in:
committed by
Bjørn Erik Pedersen
parent
2c045ac449
commit
823334875d
@@ -19,9 +19,9 @@ package helpers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
"html/template"
|
||||
"os/exec"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/miekg/mmark"
|
||||
"github.com/russross/blackfriday"
|
||||
@@ -178,7 +178,6 @@ func GetHTMLRenderer(defaultFlags int, ctx *RenderingContext) blackfriday.Render
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func getMarkdownExtensions(ctx *RenderingContext) int {
|
||||
flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
|
||||
blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |
|
||||
@@ -385,61 +384,51 @@ func TruncateWords(s string, max int) string {
|
||||
return strings.Join(words[:max], " ")
|
||||
}
|
||||
|
||||
func TruncateWordsByRune(words []string, max int) (string, bool) {
|
||||
count := 0
|
||||
for index, word := range words {
|
||||
if count >= max {
|
||||
return strings.Join(words[:index], " "), true
|
||||
}
|
||||
runeCount := utf8.RuneCountInString(word)
|
||||
if len(word) == runeCount {
|
||||
count++
|
||||
} else if count+runeCount < max {
|
||||
count += runeCount
|
||||
} else {
|
||||
for ri, _ := range word {
|
||||
if count >= max {
|
||||
truncatedWords := append(words[:index], word[:ri])
|
||||
return strings.Join(truncatedWords, " "), true
|
||||
} else {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(words, " "), false
|
||||
}
|
||||
|
||||
// TruncateWordsToWholeSentence takes content and an int
|
||||
// and returns entire sentences from content, delimited by the int
|
||||
// and whether it's truncated or not.
|
||||
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
|
||||
count := 0
|
||||
index, word := 0, ""
|
||||
truncated := false
|
||||
|
||||
for index, word = range words {
|
||||
runeCount := utf8.RuneCountInString(word)
|
||||
if len(word) == runeCount {
|
||||
count++;
|
||||
} else {
|
||||
if count + runeCount <= max {
|
||||
count += runeCount
|
||||
} else {
|
||||
offset := 0
|
||||
for count < max {
|
||||
_, width := utf8.DecodeRuneInString(word[offset:])
|
||||
offset += width
|
||||
count++
|
||||
}
|
||||
words[index] = word[:offset]
|
||||
truncated = true
|
||||
}
|
||||
}
|
||||
|
||||
if count >= max {
|
||||
if index < len(words) - 1 {
|
||||
truncated = true
|
||||
}
|
||||
break
|
||||
if max >= len(words) {
|
||||
return strings.Join(words, " "), false
|
||||
}
|
||||
|
||||
for counter, word := range words[max:] {
|
||||
if strings.HasSuffix(word, ".") ||
|
||||
strings.HasSuffix(word, "?") ||
|
||||
strings.HasSuffix(word, ".\"") ||
|
||||
strings.HasSuffix(word, "!") {
|
||||
upper := max + counter + 1
|
||||
return strings.Join(words[:upper], " "), (upper < len(words))
|
||||
}
|
||||
}
|
||||
|
||||
index += 1
|
||||
|
||||
if index < len(words) {
|
||||
for counter, word := range words[index:] {
|
||||
if len(word) != utf8.RuneCountInString(word) {
|
||||
break
|
||||
}
|
||||
if strings.HasSuffix(word, ".") ||
|
||||
strings.HasSuffix(word, "?") ||
|
||||
strings.HasSuffix(word, ".\"") ||
|
||||
strings.HasSuffix(word, "!") {
|
||||
upper := index + counter + 1
|
||||
return strings.Join(words[:upper], " "), (upper < len(words))
|
||||
}
|
||||
}
|
||||
} else if index > len(words) {
|
||||
return strings.Join(words, " "), truncated
|
||||
}
|
||||
|
||||
return strings.Join(words[:index], " "), truncated
|
||||
|
||||
return strings.Join(words[:max], " "), true
|
||||
}
|
||||
|
||||
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper
|
||||
|
@@ -1,10 +1,11 @@
|
||||
package helpers
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"html/template"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const tstHTMLContent = "<!DOCTYPE html><html><head><script src=\"http://two/foobar.js\"></script></head><body><nav><ul><li hugo-nav=\"section_0\"></li><li hugo-nav=\"section_1\"></li></ul></nav><article>content <a href=\"http://two/foobar\">foobar</a>. Follow up</article><p>This is some text.<br>And some more.</p></body></html>"
|
||||
@@ -54,8 +55,6 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
|
||||
{"a b c", "a b c", 12, false},
|
||||
{"a b c", "a b c", 3, false},
|
||||
{"a", "a", 1, false},
|
||||
{"Hello 中国", "Hello 中", 2, true},
|
||||
{"Hello 中国", "Hello 中国", 3, false},
|
||||
{"This is a sentence.", "This is a sentence.", 5, false},
|
||||
{"This is also a sentence!", "This is also a sentence!", 1, false},
|
||||
{"To be. Or not to be. That's the question.", "To be.", 1, true},
|
||||
@@ -72,3 +71,36 @@ func TestTruncateWordsToWholeSentence(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateWordsByRune(t *testing.T) {
|
||||
type test struct {
|
||||
input, expected string
|
||||
max int
|
||||
truncated bool
|
||||
}
|
||||
data := []test{
|
||||
{"", "", 1, false},
|
||||
{"a b c", "a b c", 12, false},
|
||||
{"a b c", "a b c", 3, false},
|
||||
{"a", "a", 1, false},
|
||||
{"Hello 中国", "", 0, true},
|
||||
{"这是中文,全中文。", "这是中文,", 5, true},
|
||||
{"Hello 中国", "Hello 中", 2, true},
|
||||
{"Hello 中国", "Hello 中国", 3, false},
|
||||
{"Hello中国 Good 好的", "Hello中国 Good 好", 9, true},
|
||||
{"This is a sentence.", "This is", 2, true},
|
||||
{"This is also a sentence!", "This", 1, true},
|
||||
{"To be. Or not to be. That's the question.", "To be. Or not", 4, true},
|
||||
{" \nThis is not a sentence\n ", "This is not", 3, true},
|
||||
}
|
||||
for i, d := range data {
|
||||
output, truncated := TruncateWordsByRune(strings.Fields(d.input), d.max)
|
||||
if d.expected != output {
|
||||
t.Errorf("Test %d failed. Expected %q got %q", i, d.expected, output)
|
||||
}
|
||||
|
||||
if d.truncated != truncated {
|
||||
t.Errorf("Test %d failed. Expected truncated=%t got %t", i, d.truncated, truncated)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user