WordCount and Summary support CJK Language

* add global `hasCJKLanguage` flag, if true, turn on auto-detecting CJKLanguage
 * add `isCJKLanguage` frontmatter to force specify whether is CJKLanguage or not
 * For .Summary: If isCJKLanguage is true, use the runes as basis for truncation, else keep as today.
 * For WordCount: If isCJKLanguage is true, use the runes as basis for calculation, else keep as today.
 * Unexport RuneCount

Fixes #1377
This commit is contained in:
coderzh
2015-09-03 18:22:20 +08:00
committed by Bjørn Erik Pedersen
parent 2c045ac449
commit 823334875d
5 changed files with 247 additions and 93 deletions

View File

@@ -28,6 +28,7 @@ import (
"net/url"
"path"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
@@ -42,6 +43,10 @@ import (
"github.com/spf13/viper"
)
var (
cjk = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`)
)
type Page struct {
Params map[string]interface{}
Content template.HTML
@@ -67,7 +72,6 @@ type Page struct {
contentShortCodes map[string]string
plain string // TODO should be []byte
plainWords []string
plainRuneCount int
plainInit sync.Once
plainSecondaryInit sync.Once
renderingConfig *helpers.Blackfriday
@@ -78,6 +82,7 @@ type Page struct {
Node
pageMenus PageMenus
pageMenusInit sync.Once
isCJKLanguage bool
}
type Source struct {
@@ -111,12 +116,6 @@ func (p *Page) PlainWords() []string {
return p.plainWords
}
// RuneCount returns the rune count, excluding any whitespace, of the plain content.
func (p *Page) RuneCount() int {
p.initPlainSecondary()
return p.plainRuneCount
}
func (p *Page) initPlain() {
p.plainInit.Do(func() {
p.plain = helpers.StripHTML(string(p.Content))
@@ -125,20 +124,6 @@ func (p *Page) initPlain() {
})
}
func (p *Page) initPlainSecondary() {
p.plainSecondaryInit.Do(func() {
p.initPlain()
runeCount := 0
for _, r := range p.plain {
if !helpers.IsWhitespace(r) {
runeCount++
}
}
p.plainRuneCount = runeCount
return
})
}
func (p *Page) IsNode() bool {
return false
}
@@ -218,7 +203,13 @@ func (p *Page) setSummary() {
} else {
// If hugo defines split:
// render, strip html, then split
summary, truncated := helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
var summary string
var truncated bool
if p.isCJKLanguage {
summary, truncated = helpers.TruncateWordsByRune(p.PlainWords(), helpers.SummaryLength)
} else {
summary, truncated = helpers.TruncateWordsToWholeSentence(p.PlainWords(), helpers.SummaryLength)
}
p.Summary = template.HTML(summary)
p.Truncated = truncated
@@ -363,18 +354,27 @@ func (p *Page) ReadFrom(buf io.Reader) (int64, error) {
}
func (p *Page) analyzePage() {
p.WordCount = 0
for _, word := range p.PlainWords() {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
p.WordCount++
} else {
p.WordCount += runeCount
if p.isCJKLanguage {
p.WordCount = 0
for _, word := range p.PlainWords() {
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
p.WordCount++
} else {
p.WordCount += runeCount
}
}
} else {
p.WordCount = len(p.PlainWords())
}
p.FuzzyWordCount = int((p.WordCount+100)/100) * 100
p.ReadingTime = int((p.WordCount + 212) / 213)
if p.isCJKLanguage {
p.ReadingTime = int((p.WordCount + 500) / 501)
} else {
p.ReadingTime = int((p.WordCount + 212) / 213)
}
}
func (p *Page) permalink() (*url.URL, error) {
@@ -481,7 +481,7 @@ func (p *Page) update(f interface{}) error {
}
m := f.(map[string]interface{})
var err error
var draft, published *bool
var draft, published, isCJKLanguage *bool
for k, v := range m {
loki := strings.ToLower(k)
switch loki {
@@ -542,6 +542,9 @@ func (p *Page) update(f interface{}) error {
p.Status = cast.ToString(v)
case "sitemap":
p.Sitemap = parseSitemap(cast.ToStringMap(v))
case "iscjklanguage":
isCJKLanguage = new(bool)
*isCJKLanguage = cast.ToBool(v)
default:
// If not one of the explicit values, store in Params
switch vv := v.(type) {
@@ -596,6 +599,16 @@ func (p *Page) update(f interface{}) error {
p.Lastmod = p.Date
}
if isCJKLanguage != nil {
p.isCJKLanguage = *isCJKLanguage
} else if viper.GetBool("HasCJKLanguage") {
if cjk.Match(p.rawContent) {
p.isCJKLanguage = true
} else {
p.isCJKLanguage = false
}
}
return nil
}
@@ -766,6 +779,8 @@ func (p *Page) parse(reader io.Reader) error {
p.renderable = psr.IsRenderable()
p.frontmatter = psr.FrontMatter()
p.rawContent = psr.Content()
meta, err := psr.Metadata()
if meta != nil {
if err != nil {
@@ -778,8 +793,6 @@ func (p *Page) parse(reader io.Reader) error {
}
}
p.rawContent = psr.Content()
return nil
}