Add Page.Contents with scope support

Note that this also adds a new `.ContentWithoutSummary` method, and to do that we had to unify the different summary types:

Both `auto` and `manual` now returns HTML. Before this commit, `auto` would return plain text. This could be considered to be a slightly breaking change, but for the better: Now you can treat the `.Summary` the same without thinking about where it comes from, and if you want plain text, pipe it into `{{ .Summary | plainify }}`.

Fixes #8680
Fixes #12761
Fixes #12778
Fixes #716
This commit is contained in:
Bjørn Erik Pedersen
2024-08-13 15:49:56 +02:00
parent 2b5c335e93
commit 37609262dc
22 changed files with 1614 additions and 858 deletions

View File

@@ -74,10 +74,17 @@ type ChildCareProvider interface {
Resources() resource.Resources
}
type MarkupProvider interface {
Markup(opts ...any) Markup
}
// ContentProvider provides the content related values for a Page.
type ContentProvider interface {
Content(context.Context) (any, error)
// ContentWithoutSummary returns the Page Content stripped of the summary.
ContentWithoutSummary(ctx context.Context) (template.HTML, error)
// Plain returns the Page Content stripped of HTML markup.
Plain(context.Context) string
@@ -169,6 +176,7 @@ type PageProvider interface {
// Page is the core interface in Hugo and what you get as the top level data context in your templates.
type Page interface {
MarkupProvider
ContentProvider
TableOfContentsProvider
PageWithoutContent
@@ -260,7 +268,7 @@ type PageMetaInternalProvider interface {
type PageRenderProvider interface {
// Render renders the given layout with this Page as context.
Render(ctx context.Context, layout ...string) (template.HTML, error)
// RenderString renders the first value in args with tPaginatorhe content renderer defined
// RenderString renders the first value in args with the content renderer defined
// for this Page.
// It takes an optional map as a second argument:
//

View File

@@ -35,6 +35,7 @@ type OutputFormatContentProvider interface {
// OutputFormatPageContentProvider holds the exported methods from Page that are "outputFormat aware".
type OutputFormatPageContentProvider interface {
MarkupProvider
ContentProvider
TableOfContentsProvider
PageRenderProvider
@@ -74,6 +75,11 @@ func (lcp *LazyContentProvider) Reset() {
lcp.init.Reset()
}
func (lcp *LazyContentProvider) Markup(opts ...any) Markup {
lcp.init.Do(context.Background())
return lcp.cp.Markup(opts...)
}
func (lcp *LazyContentProvider) TableOfContents(ctx context.Context) template.HTML {
lcp.init.Do(ctx)
return lcp.cp.TableOfContents(ctx)
@@ -89,6 +95,11 @@ func (lcp *LazyContentProvider) Content(ctx context.Context) (any, error) {
return lcp.cp.Content(ctx)
}
func (lcp *LazyContentProvider) ContentWithoutSummary(ctx context.Context) (template.HTML, error) {
lcp.init.Do(ctx)
return lcp.cp.ContentWithoutSummary(ctx)
}
func (lcp *LazyContentProvider) Plain(ctx context.Context) string {
lcp.init.Do(ctx)
return lcp.cp.Plain(ctx)

View File

@@ -0,0 +1,344 @@
// Copyright 2024 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package page
import (
"context"
"html/template"
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/gohugoio/hugo/media"
"github.com/gohugoio/hugo/tpl"
)
type Content interface {
Content(context.Context) (template.HTML, error)
ContentWithoutSummary(context.Context) (template.HTML, error)
Summary(context.Context) (Summary, error)
Plain(context.Context) string
PlainWords(context.Context) []string
WordCount(context.Context) int
FuzzyWordCount(context.Context) int
ReadingTime(context.Context) int
Len(context.Context) int
}
type Markup interface {
Render(context.Context) (Content, error)
RenderString(ctx context.Context, args ...any) (template.HTML, error)
RenderShortcodes(context.Context) (template.HTML, error)
Fragments(context.Context) *tableofcontents.Fragments
}
var _ types.PrintableValueProvider = Summary{}
const (
SummaryTypeAuto = "auto"
SummaryTypeManual = "manual"
SummaryTypeFrontMatter = "frontmatter"
)
type Summary struct {
Text template.HTML
Type string // "auto", "manual" or "frontmatter"
Truncated bool
}
func (s Summary) IsZero() bool {
return s.Text == ""
}
func (s Summary) PrintableValue() any {
return s.Text
}
var _ types.PrintableValueProvider = (*Summary)(nil)
type HtmlSummary struct {
source string
SummaryLowHigh types.LowHigh[string]
SummaryEndTag types.LowHigh[string]
WrapperStart types.LowHigh[string]
WrapperEnd types.LowHigh[string]
Divider types.LowHigh[string]
}
func (s HtmlSummary) wrap(ss string) string {
if s.WrapperStart.IsZero() {
return ss
}
return s.source[s.WrapperStart.Low:s.WrapperStart.High] + ss + s.source[s.WrapperEnd.Low:s.WrapperEnd.High]
}
func (s HtmlSummary) wrapLeft(ss string) string {
if s.WrapperStart.IsZero() {
return ss
}
return s.source[s.WrapperStart.Low:s.WrapperStart.High] + ss
}
func (s HtmlSummary) Value(l types.LowHigh[string]) string {
return s.source[l.Low:l.High]
}
func (s HtmlSummary) trimSpace(ss string) string {
return strings.TrimSpace(ss)
}
func (s HtmlSummary) Content() string {
if s.Divider.IsZero() {
return s.source
}
ss := s.source[:s.Divider.Low]
ss += s.source[s.Divider.High:]
return s.trimSpace(ss)
}
func (s HtmlSummary) Summary() string {
if s.Divider.IsZero() {
return s.trimSpace(s.wrap(s.Value(s.SummaryLowHigh)))
}
ss := s.source[s.SummaryLowHigh.Low:s.Divider.Low]
if s.SummaryLowHigh.High > s.Divider.High {
ss += s.source[s.Divider.High:s.SummaryLowHigh.High]
}
if !s.SummaryEndTag.IsZero() {
ss += s.Value(s.SummaryEndTag)
}
return s.trimSpace(s.wrap(ss))
}
func (s HtmlSummary) ContentWithoutSummary() string {
if s.Divider.IsZero() {
if s.SummaryLowHigh.Low == s.WrapperStart.High && s.SummaryLowHigh.High == s.WrapperEnd.Low {
return ""
}
return s.trimSpace(s.wrapLeft(s.source[s.SummaryLowHigh.High:]))
}
if s.SummaryEndTag.IsZero() {
return s.trimSpace(s.wrapLeft(s.source[s.Divider.High:]))
}
return s.trimSpace(s.wrapLeft(s.source[s.SummaryEndTag.High:]))
}
func (s HtmlSummary) Truncated() bool {
return s.SummaryLowHigh.High < len(s.source)
}
func (s *HtmlSummary) resolveParagraphTagAndSetWrapper(mt media.Type) tagReStartEnd {
ptag := startEndP
switch mt.SubType {
case media.DefaultContentTypes.AsciiDoc.SubType:
ptag = startEndDiv
case media.DefaultContentTypes.ReStructuredText.SubType:
const markerStart = "<div class=\"document\">"
const markerEnd = "</div>"
i1 := strings.Index(s.source, markerStart)
i2 := strings.LastIndex(s.source, markerEnd)
if i1 > -1 && i2 > -1 {
s.WrapperStart = types.LowHigh[string]{Low: 0, High: i1 + len(markerStart)}
s.WrapperEnd = types.LowHigh[string]{Low: i2, High: len(s.source)}
}
}
return ptag
}
// ExtractSummaryFromHTML extracts a summary from the given HTML content.
func ExtractSummaryFromHTML(mt media.Type, input string, numWords int, isCJK bool) (result HtmlSummary) {
result.source = input
ptag := result.resolveParagraphTagAndSetWrapper(mt)
if numWords <= 0 {
return result
}
var count int
countWord := func(word string) int {
if isCJK {
word = tpl.StripHTML(word)
runeCount := utf8.RuneCountInString(word)
if len(word) == runeCount {
return 1
} else {
return runeCount
}
}
return 1
}
high := len(input)
if result.WrapperEnd.Low > 0 {
high = result.WrapperEnd.Low
}
for j := result.WrapperStart.High; j < high; {
s := input[j:]
closingIndex := strings.Index(s, "</"+ptag.tagName)
if closingIndex == -1 {
break
}
s = s[:closingIndex]
// Count the words in the current paragraph.
var wi int
for i, r := range s {
if unicode.IsSpace(r) || (i+utf8.RuneLen(r) == len(s)) {
word := s[wi:i]
count += countWord(word)
wi = i
if count >= numWords {
break
}
}
}
if count >= numWords {
result.SummaryLowHigh = types.LowHigh[string]{
Low: result.WrapperStart.High,
High: j + closingIndex + len(ptag.tagName) + 3,
}
return
}
j += closingIndex + len(ptag.tagName) + 2
}
result.SummaryLowHigh = types.LowHigh[string]{
Low: result.WrapperStart.High,
High: high,
}
return
}
// ExtractSummaryFromHTMLWithDivider extracts a summary from the given HTML content with
// a manual summary divider.
func ExtractSummaryFromHTMLWithDivider(mt media.Type, input, divider string) (result HtmlSummary) {
result.source = input
result.Divider.Low = strings.Index(input, divider)
result.Divider.High = result.Divider.Low + len(divider)
if result.Divider.Low == -1 {
// No summary.
return
}
ptag := result.resolveParagraphTagAndSetWrapper(mt)
if !mt.IsHTML() {
result.Divider, result.SummaryEndTag = expandSummaryDivider(result.source, ptag, result.Divider)
}
result.SummaryLowHigh = types.LowHigh[string]{
Low: result.WrapperStart.High,
High: result.Divider.Low,
}
return
}
var (
pOrDiv = regexp.MustCompile(`<p[^>]?>|<div[^>]?>$`)
startEndDiv = tagReStartEnd{
startEndOfString: regexp.MustCompile(`<div[^>]*?>$`),
endEndOfString: regexp.MustCompile(`</div>$`),
tagName: "div",
}
startEndP = tagReStartEnd{
startEndOfString: regexp.MustCompile(`<p[^>]*?>$`),
endEndOfString: regexp.MustCompile(`</p>$`),
tagName: "p",
}
)
type tagReStartEnd struct {
startEndOfString *regexp.Regexp
endEndOfString *regexp.Regexp
tagName string
}
func expandSummaryDivider(s string, re tagReStartEnd, divider types.LowHigh[string]) (types.LowHigh[string], types.LowHigh[string]) {
var endMarkup types.LowHigh[string]
if divider.IsZero() {
return divider, endMarkup
}
lo, hi := divider.Low, divider.High
var preserveEndMarkup bool
// Find the start of the paragraph.
for i := lo - 1; i >= 0; i-- {
if s[i] == '>' {
if match := re.startEndOfString.FindString(s[:i+1]); match != "" {
lo = i - len(match) + 1
break
}
if match := pOrDiv.FindString(s[:i+1]); match != "" {
i -= len(match) - 1
continue
}
}
r, _ := utf8.DecodeRuneInString(s[i:])
if !unicode.IsSpace(r) {
preserveEndMarkup = true
break
}
}
divider.Low = lo
// Now walk forward to the end of the paragraph.
for ; hi < len(s); hi++ {
if s[hi] != '>' {
continue
}
if match := re.endEndOfString.FindString(s[:hi+1]); match != "" {
hi++
break
}
}
if preserveEndMarkup {
endMarkup.Low = divider.High
endMarkup.High = hi
} else {
divider.High = hi
}
// Consume trailing newline if any.
if divider.High < len(s) && s[divider.High] == '\n' {
divider.High++
}
return divider, endMarkup
}

View File

@@ -0,0 +1,337 @@
// Copyright 2024 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package page_test
import (
"testing"
"github.com/gohugoio/hugo/hugolib"
"github.com/gohugoio/hugo/markup/asciidocext"
"github.com/gohugoio/hugo/markup/rst"
)
func TestPageMarkupMethods(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
summaryLength=2
-- content/p1.md --
---
title: "Post 1"
date: "2020-01-01"
---
{{% foo %}}
-- layouts/shortcodes/foo.html --
Two *words*.
{{/* Test that markup scope is set in all relevant constructs. */}}
{{ if eq hugo.Context.MarkupScope "foo" }}
## Heading 1
Sint ad mollit qui Lorem ut occaecat culpa officia. Et consectetur aute voluptate non sit ullamco adipisicing occaecat. Sunt deserunt amet sit ad. Deserunt enim voluptate proident ipsum dolore dolor ut sit velit esse est mollit irure esse. Mollit incididunt veniam laboris magna et excepteur sit duis. Magna adipisicing reprehenderit tempor irure.
### Heading 2
Exercitation quis est consectetur occaecat nostrud. Ullamco aute mollit aliqua est amet. Exercitation ullamco consectetur dolor labore et non irure eu cillum Lorem.
{{ end }}
-- layouts/index.html --
Home.
{{ .Content }}
-- layouts/_default/single.html --
Single.
Page.ContentWithoutSummmary: {{ .ContentWithoutSummary }}|
{{ template "render-scope" (dict "page" . "scope" "main") }}
{{ template "render-scope" (dict "page" . "scope" "foo") }}
{{ define "render-scope" }}
{{ $c := .page.Markup .scope }}
{{ with $c.Render }}
{{ $.scope }}: Content: {{ .Content }}|
{{ $.scope }}: ContentWithoutSummary: {{ .ContentWithoutSummary }}|
{{ $.scope }}: Plain: {{ .Plain }}|
{{ $.scope }}: PlainWords: {{ .PlainWords }}|
{{ $.scope }}: WordCount: {{ .WordCount }}|
{{ $.scope }}: FuzzyWordCount: {{ .FuzzyWordCount }}|
{{ $.scope }}: ReadingTime: {{ .ReadingTime }}|
{{ $.scope }}: Len: {{ .Len }}|
{{ $.scope }}: Summary: {{ with .Summary }}{{ . }}{{ else }}nil{{ end }}|
{{ end }}
{{ $.scope }}: Fragments: {{ $c.Fragments.Identifiers }}|
{{ end }}
`
b := hugolib.Test(t, files)
// Main scope.
b.AssertFileContent("public/p1/index.html",
"Page.ContentWithoutSummmary: |",
"main: Content: <p>Two <em>words</em>.</p>\n|",
"main: ContentWithoutSummary: |",
"main: Plain: Two words.\n|",
"PlainWords: [Two words.]|\nmain: WordCount: 2|\nmain: FuzzyWordCount: 100|\nmain: ReadingTime: 1|",
"main: Summary: <p>Two <em>words</em>.</p>|\n\nmain: Fragments: []|",
"main: Len: 27|",
)
// Foo scope (has more content).
b.AssertFileContent("public/p1/index.html",
"foo: Content: <p>Two <em>words</em>.</p>\n<h2",
"foo: ContentWithoutSummary: <h2",
"Plain: Two words.\nHeading 1",
"PlainWords: [Two words. Heading 1",
"foo: WordCount: 81|\nfoo: FuzzyWordCount: 100|\nfoo: ReadingTime: 1|\nfoo: Len: 622|",
"foo: Summary: <p>Two <em>words</em>.</p>|",
"foo: Fragments: [heading-1 heading-2]|",
)
}
func TestPageMarkupScope(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
disableKinds = ["taxonomy", "term", "rss", "section"]
-- content/p1.md --
---
title: "Post 1"
date: "2020-01-01"
---
# P1
{{< foo >}}
Begin:{{% includerendershortcodes "p2" %}}:End
Begin:{{< includecontent "p3" >}}:End
-- content/p2.md --
---
title: "Post 2"
date: "2020-01-02"
---
# P2
-- content/p3.md --
---
title: "Post 3"
date: "2020-01-03"
---
# P3
{{< foo >}}
-- layouts/index.html --
Home.
{{ with site.GetPage "p1" }}
{{ with .Markup "home" }}
{{ .Render.Content }}
{{ end }}
{{ end }}
-- layouts/_default/single.html --
Single.
{{ with .Markup }}
{{ with .Render }}
{{ .Content }}
{{ end }}
{{ end }}
-- layouts/_default/_markup/render-heading.html --
Render heading: title: {{ .Text}} scope: {{ hugo.Context.MarkupScope }}|
-- layouts/shortcodes/foo.html --
Foo scope: {{ hugo.Context.MarkupScope }}|
-- layouts/shortcodes/includerendershortcodes.html --
{{ $p := site.GetPage (.Get 0) }}
includerendershortcodes: {{ hugo.Context.MarkupScope }}|{{ $p.Markup.RenderShortcodes }}|
-- layouts/shortcodes/includecontent.html --
{{ $p := site.GetPage (.Get 0) }}
includecontent: {{ hugo.Context.MarkupScope }}|{{ $p.Markup.Render.Content }}|
`
b := hugolib.Test(t, files)
b.AssertFileContent("public/p1/index.html", "Render heading: title: P1 scope: |", "Foo scope: |")
b.AssertFileContent("public/index.html",
"Render heading: title: P1 scope: home|",
"Foo scope: home|",
"Begin:\nincluderendershortcodes: home|</p>\nRender heading: title: P2 scope: home|<p>|:End",
"Begin:\nincludecontent: home|Render heading: title: P3 scope: home|Foo scope: home|\n|\n:End",
)
}
func TestPageMarkupWithoutSummary(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
summaryLength=5
-- content/p1.md --
---
title: "Post 1"
date: "2020-01-01"
---
This is summary.
<!--more-->
This is content.
-- content/p2.md --
---
title: "Post 2"
date: "2020-01-01"
---
This is some content about a summary and more.
Another paragraph.
Third paragraph.
-- layouts/_default/single.html --
Single.
Page.Summary: {{ .Summary }}|
{{ with .Markup.Render }}
Content: {{ .Content }}|
ContentWithoutSummary: {{ .ContentWithoutSummary }}|
WordCount: {{ .WordCount }}|
FuzzyWordCount: {{ .FuzzyWordCount }}|
{{ with .Summary }}
Summary: {{ . }}|
Summary Type: {{ .Type }}|
Summary Truncated: {{ .Truncated }}|
{{ end }}
{{ end }}
`
b := hugolib.Test(t, files)
b.AssertFileContentExact("public/p1/index.html",
"Content: <p>This is summary.</p>\n<p>This is content.</p>",
"ContentWithoutSummary: <p>This is content.</p>|",
"WordCount: 6|",
"FuzzyWordCount: 100|",
"Summary: <p>This is summary.</p>|",
"Summary Type: manual|",
"Summary Truncated: true|",
)
b.AssertFileContent("public/p2/index.html",
"Summary: <p>This is some content about a summary and more.</p>|",
"WordCount: 13|",
"FuzzyWordCount: 100|",
"Summary Type: auto",
"Summary Truncated: true",
)
}
func TestPageMarkupWithoutSummaryRST(t *testing.T) {
t.Parallel()
if !rst.Supports() {
t.Skip("Skip RST test as not supported")
}
files := `
-- hugo.toml --
summaryLength=5
[security.exec]
allow = ["rst", "python"]
-- content/p1.rst --
This is a story about a summary and more.
Another paragraph.
-- content/p2.rst --
This is summary.
<!--more-->
This is content.
-- layouts/_default/single.html --
Single.
Page.Summary: {{ .Summary }}|
{{ with .Markup.Render }}
Content: {{ .Content }}|
ContentWithoutSummary: {{ .ContentWithoutSummary }}|
{{ with .Summary }}
Summary: {{ . }}|
Summary Type: {{ .Type }}|
Summary Truncated: {{ .Truncated }}|
{{ end }}
{{ end }}
`
b := hugolib.Test(t, files)
// Auto summary.
b.AssertFileContentExact("public/p1/index.html",
"Content: <div class=\"document\">\n\n\n<p>This is a story about a summary and more.</p>\n<p>Another paragraph.</p>\n</div>|",
"Summary: <div class=\"document\">\n\n\n<p>This is a story about a summary and more.</p></div>|\nSummary Type: auto|\nSummary Truncated: true|",
"ContentWithoutSummary: <div class=\"document\">\n<p>Another paragraph.</p>\n</div>|",
)
// Manual summary.
b.AssertFileContentExact("public/p2/index.html",
"Content: <div class=\"document\">\n\n\n<p>This is summary.</p>\n<p>This is content.</p>\n</div>|",
"ContentWithoutSummary: <div class=\"document\"><p>This is content.</p>\n</div>|",
"Summary: <div class=\"document\">\n\n\n<p>This is summary.</p>\n</div>|\nSummary Type: manual|\nSummary Truncated: true|",
)
}
func TestPageMarkupWithoutSummaryAsciidoc(t *testing.T) {
t.Parallel()
if !asciidocext.Supports() {
t.Skip("Skip asiidoc test as not supported")
}
files := `
-- hugo.toml --
summaryLength=5
[security.exec]
allow = ["asciidoc", "python"]
-- content/p1.ad --
This is a story about a summary and more.
Another paragraph.
-- content/p2.ad --
This is summary.
<!--more-->
This is content.
-- layouts/_default/single.html --
Single.
Page.Summary: {{ .Summary }}|
{{ with .Markup.Render }}
Content: {{ .Content }}|
ContentWithoutSummary: {{ .ContentWithoutSummary }}|
{{ with .Summary }}
Summary: {{ . }}|
Summary Type: {{ .Type }}|
Summary Truncated: {{ .Truncated }}|
{{ end }}
{{ end }}
`
b := hugolib.Test(t, files)
// Auto summary.
b.AssertFileContentExact("public/p1/index.html",
"Content: <div class=\"paragraph\">\n<p>This is a story about a summary and more.</p>\n</div>\n<div class=\"paragraph\">\n<p>Another paragraph.</p>\n</div>\n|",
"Summary: <div class=\"paragraph\">\n<p>This is a story about a summary and more.</p>\n</div>|",
"Summary Type: auto|\nSummary Truncated: true|",
"ContentWithoutSummary: <div class=\"paragraph\">\n<p>Another paragraph.</p>\n</div>|",
)
// Manual summary.
b.AssertFileContentExact("public/p2/index.html",
"Content: <div class=\"paragraph\">\n<p>This is summary.</p>\n</div>\n<div class=\"paragraph\">\n<p>This is content.</p>\n</div>|",
"ContentWithoutSummary: <div class=\"paragraph\">\n<p>This is content.</p>\n</div>|",
"Summary: <div class=\"paragraph\">\n<p>This is summary.</p>\n</div>|\nSummary Type: manual|\nSummary Truncated: true|",
)
}

View File

@@ -0,0 +1,151 @@
// Copyright 2024 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package page
import (
"strings"
"testing"
qt "github.com/frankban/quicktest"
"github.com/gohugoio/hugo/common/types"
"github.com/gohugoio/hugo/media"
)
func TestExtractSummaryFromHTML(t *testing.T) {
c := qt.New(t)
tests := []struct {
mt media.Type
input string
isCJK bool
numWords int
expectSummary string
expectContentWithoutSummary string
}{
{media.Builtin.ReStructuredTextType, "<div class=\"document\">\n\n\n<p>Simple Page</p>\n</div>", false, 70, "<div class=\"document\">\n\n\n<p>Simple Page</p>\n</div>", ""},
{media.Builtin.ReStructuredTextType, "<div class=\"document\"><p>First paragraph</p><p>Second paragraph</p></div>", false, 2, `<div class="document"><p>First paragraph</p></div>`, "<div class=\"document\"><p>Second paragraph</p></div>"},
{media.Builtin.MarkdownType, "<p>First paragraph</p>", false, 10, "<p>First paragraph</p>", ""},
{media.Builtin.MarkdownType, "<p>First paragraph</p><p>Second paragraph</p>", false, 2, "<p>First paragraph</p>", "<p>Second paragraph</p>"},
{media.Builtin.MarkdownType, "<p>First paragraph</p><p>Second paragraph</p><p>Third paragraph</p>", false, 3, "<p>First paragraph</p><p>Second paragraph</p>", "<p>Third paragraph</p>"},
{media.Builtin.AsciiDocType, "<div><p>First paragraph</p></div><div><p>Second paragraph</p></div>", false, 2, "<div><p>First paragraph</p></div>", "<div><p>Second paragraph</p></div>"},
{media.Builtin.MarkdownType, "<p>这是中文,全中文</p><p>a这是中文全中文</p>", true, 5, "<p>这是中文,全中文</p>", "<p>a这是中文全中文</p>"},
}
for i, test := range tests {
summary := ExtractSummaryFromHTML(test.mt, test.input, test.numWords, test.isCJK)
c.Assert(summary.Summary(), qt.Equals, test.expectSummary, qt.Commentf("Summary %d", i))
c.Assert(summary.ContentWithoutSummary(), qt.Equals, test.expectContentWithoutSummary, qt.Commentf("ContentWithoutSummary %d", i))
}
}
func TestExtractSummaryFromHTMLWithDivider(t *testing.T) {
c := qt.New(t)
const divider = "FOOO"
tests := []struct {
mt media.Type
input string
expectSummary string
expectContentWithoutSummary string
expectContent string
}{
{media.Builtin.MarkdownType, "<p>First paragraph</p><p>FOOO</p><p>Second paragraph</p>", "<p>First paragraph</p>", "<p>Second paragraph</p>", "<p>First paragraph</p><p>Second paragraph</p>"},
{media.Builtin.MarkdownType, "<p>First paragraph</p>\n<p>FOOO</p>\n<p>Second paragraph</p>", "<p>First paragraph</p>", "<p>Second paragraph</p>", "<p>First paragraph</p>\n<p>Second paragraph</p>"},
{media.Builtin.MarkdownType, "<p>FOOO</p>\n<p>First paragraph</p>", "", "<p>First paragraph</p>", "<p>First paragraph</p>"},
{media.Builtin.MarkdownType, "<p>First paragraph</p><p>Second paragraphFOOO</p><p>Third paragraph</p>", "<p>First paragraph</p><p>Second paragraph</p>", "<p>Third paragraph</p>", "<p>First paragraph</p><p>Second paragraph</p><p>Third paragraph</p>"},
{media.Builtin.MarkdownType, "<p>这是中文全中文FOOO</p><p>a这是中文全中文</p>", "<p>这是中文,全中文</p>", "<p>a这是中文全中文</p>", "<p>这是中文,全中文</p><p>a这是中文全中文</p>"},
{media.Builtin.MarkdownType, `<p>a <strong>b</strong>` + "\v" + ` c</p>` + "\n<p>FOOO</p>", "<p>a <strong>b</strong>\v c</p>", "", "<p>a <strong>b</strong>\v c</p>"},
{media.Builtin.HTMLType, "<p>First paragraph</p>FOOO<p>Second paragraph</p>", "<p>First paragraph</p>", "<p>Second paragraph</p>", "<p>First paragraph</p><p>Second paragraph</p>"},
{media.Builtin.ReStructuredTextType, "<div class=\"document\">\n\n\n<p>This is summary.</p>\n<p>FOOO</p>\n<p>This is content.</p>\n</div>", "<div class=\"document\">\n\n\n<p>This is summary.</p>\n</div>", "<div class=\"document\"><p>This is content.</p>\n</div>", "<div class=\"document\">\n\n\n<p>This is summary.</p>\n<p>This is content.</p>\n</div>"},
{media.Builtin.ReStructuredTextType, "<div class=\"document\"><p>First paragraphFOOO</p><p>Second paragraph</p></div>", "<div class=\"document\"><p>First paragraph</p></div>", "<div class=\"document\"><p>Second paragraph</p></div>", `<div class="document"><p>First paragraph</p><p>Second paragraph</p></div>`},
{media.Builtin.AsciiDocType, "<div class=\"paragraph\"><p>Summary Next Line</p></div><div class=\"paragraph\"><p>FOOO</p></div><div class=\"paragraph\"><p>Some more text</p></div>", "<div class=\"paragraph\"><p>Summary Next Line</p></div>", "<div class=\"paragraph\"><p>Some more text</p></div>", "<div class=\"paragraph\"><p>Summary Next Line</p></div><div class=\"paragraph\"><p>Some more text</p></div>"},
{media.Builtin.AsciiDocType, "<div class=\"paragraph\">\n<p>Summary Next Line</p>\n</div>\n<div class=\"paragraph\">\n<p>FOOO</p>\n</div>\n<div class=\"paragraph\">\n<p>Some more text</p>\n</div>\n", "<div class=\"paragraph\">\n<p>Summary Next Line</p>\n</div>", "<div class=\"paragraph\">\n<p>Some more text</p>\n</div>", "<div class=\"paragraph\">\n<p>Summary Next Line</p>\n</div>\n<div class=\"paragraph\">\n<p>Some more text</p>\n</div>"},
{media.Builtin.AsciiDocType, "<div><p>FOOO</p></div><div><p>First paragraph</p></div>", "", "<div><p>First paragraph</p></div>", "<div><p>First paragraph</p></div>"},
{media.Builtin.AsciiDocType, "<div><p>First paragraphFOOO</p></div><div><p>Second paragraph</p></div>", "<div><p>First paragraph</p></div>", "<div><p>Second paragraph</p></div>", "<div><p>First paragraph</p></div><div><p>Second paragraph</p></div>"},
}
for i, test := range tests {
summary := ExtractSummaryFromHTMLWithDivider(test.mt, test.input, divider)
c.Assert(summary.Summary(), qt.Equals, test.expectSummary, qt.Commentf("Summary %d", i))
c.Assert(summary.ContentWithoutSummary(), qt.Equals, test.expectContentWithoutSummary, qt.Commentf("ContentWithoutSummary %d", i))
c.Assert(summary.Content(), qt.Equals, test.expectContent, qt.Commentf("Content %d", i))
}
}
func TestExpandDivider(t *testing.T) {
c := qt.New(t)
for i, test := range []struct {
input string
divider string
ptag tagReStartEnd
expect string
expectEndMarkup string
}{
{"<p>First paragraph</p>\n<p>FOOO</p>\n<p>Second paragraph</p>", "FOOO", startEndP, "<p>FOOO</p>\n", ""},
{"<div class=\"paragraph\">\n<p>FOOO</p>\n</div>", "FOOO", startEndDiv, "<div class=\"paragraph\">\n<p>FOOO</p>\n</div>", ""},
{"<div><p>FOOO</p></div><div><p>Second paragraph</p></div>", "FOOO", startEndDiv, "<div><p>FOOO</p></div>", ""},
{"<div><p>First paragraphFOOO</p></div><div><p>Second paragraph</p></div>", "FOOO", startEndDiv, "FOOO", "</p></div>"},
{" <p> abc FOOO </p> ", "FOOO", startEndP, "FOOO", " </p>"},
{" <p> FOOO </p> ", "FOOO", startEndP, "<p> FOOO </p>", ""},
{" <p>\n \nFOOO </p> ", "FOOO", startEndP, "<p>\n \nFOOO </p>", ""},
{" <div> FOOO </div> ", "FOOO", startEndDiv, "<div> FOOO </div>", ""},
} {
l := types.LowHigh[string]{Low: strings.Index(test.input, test.divider), High: strings.Index(test.input, test.divider) + len(test.divider)}
e, t := expandSummaryDivider(test.input, test.ptag, l)
c.Assert(test.input[e.Low:e.High], qt.Equals, test.expect, qt.Commentf("[%d] Test.expect %q", i, test.input))
c.Assert(test.input[t.Low:t.High], qt.Equals, test.expectEndMarkup, qt.Commentf("[%d] Test.expectEndMarkup %q", i, test.input))
}
}
func BenchmarkSummaryFromHTML(b *testing.B) {
b.StopTimer()
input := "<p>First paragraph</p><p>Second paragraph</p>"
b.StartTimer()
for i := 0; i < b.N; i++ {
summary := ExtractSummaryFromHTML(media.Builtin.MarkdownType, input, 2, false)
if s := summary.Content(); s != input {
b.Fatalf("unexpected content: %q", s)
}
if s := summary.ContentWithoutSummary(); s != "<p>Second paragraph</p>" {
b.Fatalf("unexpected content without summary: %q", s)
}
if s := summary.Summary(); s != "<p>First paragraph</p>" {
b.Fatalf("unexpected summary: %q", s)
}
}
}
func BenchmarkSummaryFromHTMLWithDivider(b *testing.B) {
b.StopTimer()
input := "<p>First paragraph</p><p>FOOO</p><p>Second paragraph</p>"
b.StartTimer()
for i := 0; i < b.N; i++ {
summary := ExtractSummaryFromHTMLWithDivider(media.Builtin.MarkdownType, input, "FOOO")
if s := summary.Content(); s != "<p>First paragraph</p><p>Second paragraph</p>" {
b.Fatalf("unexpected content: %q", s)
}
if s := summary.ContentWithoutSummary(); s != "<p>Second paragraph</p>" {
b.Fatalf("unexpected content without summary: %q", s)
}
if s := summary.Summary(); s != "<p>First paragraph</p>" {
b.Fatalf("unexpected summary: %q", s)
}
}
}

View File

@@ -44,6 +44,8 @@ import (
var (
NopPage Page = new(nopPage)
NopContentRenderer ContentRenderer = new(nopContentRenderer)
NopMarkup Markup = new(nopMarkup)
NopContent Content = new(nopContent)
NopCPageContentRenderer = struct {
OutputFormatPageContentProvider
ContentRenderer
@@ -109,10 +111,18 @@ func (p *nopPage) BundleType() string {
return ""
}
func (p *nopPage) Markup(...any) Markup {
return NopMarkup
}
func (p *nopPage) Content(context.Context) (any, error) {
return "", nil
}
func (p *nopPage) ContentWithoutSummary(ctx context.Context) (template.HTML, error) {
return "", nil
}
func (p *nopPage) ContentBaseName() string {
return ""
}
@@ -547,3 +557,69 @@ func (r *nopContentRenderer) ParseContent(ctx context.Context, content []byte) (
func (r *nopContentRenderer) RenderContent(ctx context.Context, content []byte, doc any) (converter.ResultRender, bool, error) {
return nil, false, nil
}
type (
nopMarkup int
nopContent int
)
var (
_ Markup = (*nopMarkup)(nil)
_ Content = (*nopContent)(nil)
)
func (c *nopMarkup) Render(context.Context) (Content, error) {
return NopContent, nil
}
func (c *nopMarkup) RenderString(ctx context.Context, args ...any) (template.HTML, error) {
return "", nil
}
func (c *nopMarkup) RenderShortcodes(context.Context) (template.HTML, error) {
return "", nil
}
func (c *nopContent) Plain(context.Context) string {
return ""
}
func (c *nopContent) PlainWords(context.Context) []string {
return nil
}
func (c *nopContent) WordCount(context.Context) int {
return 0
}
func (c *nopContent) FuzzyWordCount(context.Context) int {
return 0
}
func (c *nopContent) ReadingTime(context.Context) int {
return 0
}
func (c *nopContent) Len(context.Context) int {
return 0
}
func (c *nopContent) Content(context.Context) (template.HTML, error) {
return "", nil
}
func (c *nopContent) ContentWithoutSummary(context.Context) (template.HTML, error) {
return "", nil
}
func (c *nopMarkup) Fragments(context.Context) *tableofcontents.Fragments {
return nil
}
func (c *nopMarkup) FragmentsHTML(context.Context) template.HTML {
return ""
}
func (c *nopContent) Summary(context.Context) (Summary, error) {
return Summary{}, nil
}

View File

@@ -149,6 +149,10 @@ func (p *testPage) Content(context.Context) (any, error) {
panic("testpage: not implemented")
}
func (p *testPage) Markup(...any) Markup {
panic("testpage: not implemented")
}
func (p *testPage) ContentBaseName() string {
panic("testpage: not implemented")
}
@@ -177,6 +181,10 @@ func (p *testPage) Description() string {
return ""
}
func (p *testPage) ContentWithoutSummary(ctx context.Context) (template.HTML, error) {
return "", nil
}
func (p *testPage) Dir() string {
panic("testpage: not implemented")
}