1
0
mirror of https://github.com/RyanGreenup/cadmus.git synced 2025-08-02 12:21:18 +02:00
Files
cadmus/main.go

217 lines
4.9 KiB
Go
Executable File

package main
// TODO improve the logic for opening the index, see line 52 of this exemplar
// TODO see line 117 for example of walking directory
// https://github.com/blevesearch/beer-search/blob/master/main.go
// TODO Improve index time with batch?
// https://github.com/blevesearch/bleve/issues/831
// TODO command line argument to re-index? that would be quicker
// TODO just return the name of matches
// TODO I need to evaluate how long tantivy takes to index, Ideally I want to index on the fly, I need to see if that's an option
// i.e. an option if I had 100 times the notes I had, so the time taken would need to be 2 ms when this is 10, I doubt rust will
// bring that sort of a performance gain, but I should see none the less.
// NOTE Tantivy said it took 2000 docs per sec from the JSON, currently this does 70 docs/sec
// this should help
// https://tantivy-search.github.io/examples/basic_search.html
// TODO Can I map this over multiple cores? Indexing would be much faster in that case it looks like it's already working over many cores though.
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/schollz/progressbar/v3"
)
type text_structure struct {
Path string
Content string
}
var batchSize = flag.Int("batchSize", 200, "batch size for indexing")
func main() {
dir := ""
if len(os.Args) < 2 {
dir = "./"
} else {
dir = os.Args[1]
}
files := listFiles(dir)
index_path := "example.bleve"
// delete_index(index_path)
index := Make_index(index_path, files)
do_search(index)
}
func do_search(index bleve.Index) {
// search for some text
// TODO Make this an interactive query
query_text := getInput()
query := bleve.NewMatchQuery(query_text)
search := bleve.NewSearchRequest(query)
searchResults, err := index.Search(search)
if err != nil {
fmt.Println("No Search Results :( ")
fmt.Println(err)
return
}
// Print out the Results
// TODO Only print out the File Path
// TODO, give this file path to fuzzy-finder-go --preview
fmt.Println(searchResults) // This prints everything
// fmt.Println(searchResults.Hits[0].ID) // This prints the ID of the first
}
func getInput() string {
reader := bufio.NewReader(os.Stdin)
fmt.Print("Enter a Search Term:\n")
term, _ := reader.ReadString('\n')
return term
}
func Make_index(index_path string, files []string) bleve.Index {
// TODO This should have more error checking, what if there is just a write permission?
// what if it's the wrong file? then it would be overwritten...
index, err := bleve.Open(index_path)
if err != nil {
fmt.Println(err)
fmt.Println("Creating a New index")
mapping := bleve.NewIndexMapping()
index, err = bleve.New(index_path, mapping)
if err != nil {
fmt.Print(err)
fmt.Print("Unable to Create new index")
os.Exit(1)
}
} else {
fmt.Println("Appending to Old Index")
}
var notecontent string
var a_document text_structure
bar := progressbar.Default(int64(len(files)))
documents := []text_structure{}
count := 1
startTime := time.Now()
batchcount := 1
batch := index.NewBatch()
// batch := index.NewBatch()
for _, file := range files {
// fmt.Println(file)
count = count + 1
notecontent = getFile(file)
a_document = text_structure{
Path: file,
Content: notecontent,
}
documents = append(documents, a_document)
// fmt.Println(file)
// index.Index(a_document.Path, a_document)
// Add them to a batch
batch.Index(a_document.Path, a_document)
batchcount++
// Index the batch now
if batchcount >= *batchSize {
err = index.Batch(batch)
if err != nil {
panic(err)
}
// Reset the batch
batch = index.NewBatch()
batchcount = 0
}
bar.Add(1)
}
// Index the last inclomplete batch
if batchcount > 0 {
err = index.Batch(batch)
if err != nil {
panic(err)
}
// no need to reset the batch, we are don
}
indexDuration := time.Since(startTime)
indexDurationSeconds := float64(indexDuration) / float64(time.Second)
timePerDoc := float64(indexDuration) / float64(count)
log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))
return index
}
func getFile(path string) string {
buf, err := os.ReadFile(path)
notecontent := string(buf)
if err != nil {
fmt.Print("Error Reading File")
os.Exit(1)
}
return notecontent
}
func delete_index(path string) {
os.RemoveAll(path)
}
func listFiles(dir string) []string {
files := []string{}
// TODO Why is this different?
// https://stackoverflow.com/a/42423998
// https://flaviocopes.com/go-list-files/
append_files := func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
// TODO there should be many formats allowed
if !(filepath.Ext(path) == ".md") {
return nil
}
files = append(files, path)
return nil
}
// TODO Why not Symlinks?
root := dir
err := filepath.Walk(root, append_files)
if err != nil {
panic(err)
}
return files
}