mirror of
https://github.com/gohugoio/hugo.git
synced 2025-08-30 22:39:58 +02:00
hugolib: Implement "related content"
This closes #98, even if this commit does not do full content text search. We may revisit that problem in the future, but that deserves its own issue. Fixes #98
This commit is contained in:
276
related/inverted_index_test.go
Normal file
276
related/inverted_index_test.go
Normal file
@@ -0,0 +1,276 @@
|
||||
// Copyright 2017-present The Hugo Authors. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package related
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type testDoc struct {
|
||||
keywords map[string][]Keyword
|
||||
date time.Time
|
||||
}
|
||||
|
||||
func (k *testDoc) String() string {
|
||||
s := "\n"
|
||||
for k, v := range k.keywords {
|
||||
s += k + ":\t\t"
|
||||
for _, vv := range v {
|
||||
s += " " + vv.String()
|
||||
}
|
||||
s += "\n"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func newTestDoc(name string, keywords ...string) *testDoc {
|
||||
km := make(map[string][]Keyword)
|
||||
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
kw := &testDoc{keywords: km, date: time.Now()}
|
||||
|
||||
kw.addKeywords(name, keywords...)
|
||||
return kw
|
||||
}
|
||||
|
||||
func (t *testDoc) addKeywords(name string, keywords ...string) *testDoc {
|
||||
keywordm := createTestKeywords(name, keywords...)
|
||||
|
||||
for k, v := range keywordm {
|
||||
keywords := make([]Keyword, len(v))
|
||||
for i := 0; i < len(v); i++ {
|
||||
keywords[i] = StringKeyword(v[i])
|
||||
}
|
||||
t.keywords[k] = keywords
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
func createTestKeywords(name string, keywords ...string) map[string][]string {
|
||||
return map[string][]string{
|
||||
name: keywords,
|
||||
}
|
||||
}
|
||||
|
||||
func (k *testDoc) SearchKeywords(cfg IndexConfig) ([]Keyword, error) {
|
||||
return k.keywords[cfg.Name], nil
|
||||
}
|
||||
|
||||
func (k *testDoc) PubDate() time.Time {
|
||||
return k.date
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
|
||||
config := Config{
|
||||
Threshold: 90,
|
||||
IncludeNewer: false,
|
||||
Indices: IndexConfigs{
|
||||
IndexConfig{Name: "tags", Weight: 50},
|
||||
IndexConfig{Name: "keywords", Weight: 65},
|
||||
},
|
||||
}
|
||||
|
||||
idx := NewInvertedIndex(config)
|
||||
//idx.debug = true
|
||||
|
||||
docs := []Document{
|
||||
newTestDoc("tags", "a", "b", "c", "d"),
|
||||
newTestDoc("tags", "b", "d", "g"),
|
||||
newTestDoc("tags", "b", "h").addKeywords("keywords", "a"),
|
||||
newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
|
||||
}
|
||||
|
||||
idx.Add(docs...)
|
||||
|
||||
t.Run("count", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
assert.Len(idx.index, 2)
|
||||
set1, found := idx.index["tags"]
|
||||
assert.True(found)
|
||||
// 6 tags
|
||||
assert.Len(set1, 6)
|
||||
|
||||
set2, found := idx.index["keywords"]
|
||||
assert.True(found)
|
||||
assert.Len(set2, 2)
|
||||
|
||||
})
|
||||
|
||||
t.Run("search-tags", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
|
||||
assert.NoError(err)
|
||||
assert.Len(m, 2)
|
||||
assert.Equal(docs[0], m[0])
|
||||
assert.Equal(docs[1], m[1])
|
||||
})
|
||||
|
||||
t.Run("search-tags-and-keywords", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
m, err := idx.search(
|
||||
newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
|
||||
newQueryElement("keywords", StringsToKeywords("a", "b")...))
|
||||
assert.NoError(err)
|
||||
assert.Len(m, 3)
|
||||
assert.Equal(docs[3], m[0])
|
||||
assert.Equal(docs[2], m[1])
|
||||
assert.Equal(docs[0], m[2])
|
||||
})
|
||||
|
||||
t.Run("searchdoc-all", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
|
||||
m, err := idx.SearchDoc(doc)
|
||||
assert.NoError(err)
|
||||
assert.Len(m, 2)
|
||||
assert.Equal(docs[3], m[0])
|
||||
assert.Equal(docs[2], m[1])
|
||||
})
|
||||
|
||||
t.Run("searchdoc-tags", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
||||
m, err := idx.SearchDoc(doc, "tags")
|
||||
assert.NoError(err)
|
||||
assert.Len(m, 2)
|
||||
assert.Equal(docs[0], m[0])
|
||||
assert.Equal(docs[1], m[1])
|
||||
})
|
||||
|
||||
t.Run("searchdoc-keywords-date", func(t *testing.T) {
|
||||
assert := require.New(t)
|
||||
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
|
||||
// This will get a date newer than the others.
|
||||
newDoc := newTestDoc("keywords", "a", "b")
|
||||
idx.Add(newDoc)
|
||||
|
||||
m, err := idx.SearchDoc(doc, "keywords")
|
||||
assert.NoError(err)
|
||||
assert.Len(m, 2)
|
||||
assert.Equal(docs[3], m[0])
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkRelatedNewIndex(b *testing.B) {
|
||||
|
||||
pages := make([]*testDoc, 100)
|
||||
numkeywords := 30
|
||||
allKeywords := make([]string, numkeywords)
|
||||
for i := 0; i < numkeywords; i++ {
|
||||
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
||||
}
|
||||
|
||||
for i := 0; i < len(pages); i++ {
|
||||
start := rand.Intn(len(allKeywords))
|
||||
end := start + 3
|
||||
if end >= len(allKeywords) {
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
kw := newTestDoc("tags", allKeywords[start:end]...)
|
||||
if i%5 == 0 {
|
||||
start := rand.Intn(len(allKeywords))
|
||||
end := start + 3
|
||||
if end >= len(allKeywords) {
|
||||
end = start + 1
|
||||
}
|
||||
kw.addKeywords("keywords", allKeywords[start:end]...)
|
||||
}
|
||||
|
||||
pages[i] = kw
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Threshold: 50,
|
||||
Indices: IndexConfigs{
|
||||
IndexConfig{Name: "tags", Weight: 100},
|
||||
IndexConfig{Name: "keywords", Weight: 200},
|
||||
},
|
||||
}
|
||||
|
||||
b.Run("singles", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
idx := NewInvertedIndex(cfg)
|
||||
for _, doc := range pages {
|
||||
idx.Add(doc)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("all", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
idx := NewInvertedIndex(cfg)
|
||||
docs := make([]Document, len(pages))
|
||||
for i := 0; i < len(pages); i++ {
|
||||
docs[i] = pages[i]
|
||||
}
|
||||
idx.Add(docs...)
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func BenchmarkRelatedMatchesIn(b *testing.B) {
|
||||
|
||||
q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
|
||||
q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
|
||||
|
||||
docs := make([]*testDoc, 1000)
|
||||
numkeywords := 20
|
||||
allKeywords := make([]string, numkeywords)
|
||||
for i := 0; i < numkeywords; i++ {
|
||||
allKeywords[i] = fmt.Sprintf("keyword%d", i+1)
|
||||
}
|
||||
|
||||
cfg := Config{
|
||||
Threshold: 20,
|
||||
Indices: IndexConfigs{
|
||||
IndexConfig{Name: "tags", Weight: 100},
|
||||
IndexConfig{Name: "keywords", Weight: 200},
|
||||
},
|
||||
}
|
||||
|
||||
idx := NewInvertedIndex(cfg)
|
||||
|
||||
for i := 0; i < len(docs); i++ {
|
||||
start := rand.Intn(len(allKeywords))
|
||||
end := start + 3
|
||||
if end >= len(allKeywords) {
|
||||
end = start + 1
|
||||
}
|
||||
|
||||
index := "tags"
|
||||
if i%5 == 0 {
|
||||
index = "keywords"
|
||||
}
|
||||
|
||||
idx.Add(newTestDoc(index, allKeywords[start:end]...))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if i%10 == 0 {
|
||||
idx.search(q2)
|
||||
} else {
|
||||
idx.search(q1)
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user