WordCount and Summary support CJK Language

* add global `hasCJKLanguage` flag, if true, turn on auto-detecting CJKLanguage
 * add `isCJKLanguage` frontmatter to force specify whether is CJKLanguage or not
 * For .Summary: If isCJKLanguage is true, use the runes as basis for truncation, else keep as today.
 * For WordCount: If isCJKLanguage is true, use the runes as basis for calculation, else keep as today.
 * Unexport RuneCount

Fixes #1377
This commit is contained in:
coderzh
2015-09-03 18:22:20 +08:00
committed by Bjørn Erik Pedersen
parent 2c045ac449
commit 823334875d
5 changed files with 247 additions and 93 deletions

View File

@@ -19,9 +19,9 @@ package helpers
import (
"bytes"
"unicode/utf8"
"html/template"
"os/exec"
"unicode/utf8"
"github.com/miekg/mmark"
"github.com/russross/blackfriday"
@@ -178,7 +178,6 @@ func GetHTMLRenderer(defaultFlags int, ctx *RenderingContext) blackfriday.Render
}
}
func getMarkdownExtensions(ctx *RenderingContext) int {
flags := 0 | blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
blackfriday.EXTENSION_TABLES | blackfriday.EXTENSION_FENCED_CODE |
@@ -385,61 +384,51 @@ func TruncateWords(s string, max int) string {
return strings.Join(words[:max], " ")
}
func TruncateWordsByRune(words []string, max int) (string, bool) {
count := 0
for index, word := range words {
if count >= max {
return strings.Join(words[:index], " "), true
}
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
count++
} else if count+runeCount < max {
count += runeCount
} else {
for ri, _ := range word {
if count >= max {
truncatedWords := append(words[:index], word[:ri])
return strings.Join(truncatedWords, " "), true
} else {
count++
}
}
}
}
return strings.Join(words, " "), false
}
// TruncateWordsToWholeSentence takes content and an int
// and returns entire sentences from content, delimited by the int
// and whether it's truncated or not.
func TruncateWordsToWholeSentence(words []string, max int) (string, bool) {
count := 0
index, word := 0, ""
truncated := false
for index, word = range words {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
count++;
} else {
if count + runeCount <= max {
count += runeCount
} else {
offset := 0
for count < max {
_, width := utf8.DecodeRuneInString(word[offset:])
offset += width
count++
}
words[index] = word[:offset]
truncated = true
}
}
if count >= max {
if index < len(words) - 1 {
truncated = true
}
break
if max >= len(words) {
return strings.Join(words, " "), false
}
for counter, word := range words[max:] {
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := max + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
}
}
index += 1
if index < len(words) {
for counter, word := range words[index:] {
if len(word) != utf8.RuneCountInString(word) {
break
}
if strings.HasSuffix(word, ".") ||
strings.HasSuffix(word, "?") ||
strings.HasSuffix(word, ".\"") ||
strings.HasSuffix(word, "!") {
upper := index + counter + 1
return strings.Join(words[:upper], " "), (upper < len(words))
}
}
} else if index > len(words) {
return strings.Join(words, " "), truncated
}
return strings.Join(words[:index], " "), truncated
return strings.Join(words[:max], " "), true
}
// GetAsciidocContent calls asciidoctor or asciidoc as an external helper