cadmus/main.go

package main

// TODO improve the logic for opening the index, see line 52 of this exemplar
// TODO see line 117 for example of walking directory
// https://github.com/blevesearch/beer-search/blob/master/main.go
// TODO Improve index time with batch?
// https://github.com/blevesearch/bleve/issues/831
// TODO command line argument to re-index? that would be quicker
// TODO just return the name of matches
// TODO I need to evaluate how long tantivy takes to index, Ideally I want to index on the fly, I need to see if that's an option
// i.e. an option if I had 100 times the notes I had, so the time taken would need to be 2 ms when this is 10, I doubt rust will
// bring that sort of a performance gain, but I should see none the less.
// NOTE Tantivy said it took 2000 docs per sec from the JSON, currently this does 70 docs/sec
// this should help
// https://tantivy-search.github.io/examples/basic_search.html
// TODO Can I map this over multiple cores? Indexing would be much faster in that case it looks like it's already working over many cores though.

import (
	"bufio"
	"flag"
	"fmt"
	"log"
	"os"
	"path/filepath"
	"time"

	"github.com/blevesearch/bleve/v2"
	"github.com/schollz/progressbar/v3"
)

type text_structure struct {
	Path    string
	Content string
}

var batchSize = flag.Int("batchSize", 200, "batch size for indexing")

func main() {

	dir := ""
	if len(os.Args) < 2 {
		dir = "./"
	} else {
		dir = os.Args[1]
	}

	files := listFiles(dir)

	index_path := "example.bleve"

	// delete_index(index_path)
	index := Make_index(index_path, files)

	do_search(index)

}

func do_search(index bleve.Index) {

	// search for some text
	// TODO Make this an interactive query
	query_text := getInput()
	query := bleve.NewMatchQuery(query_text)
	search := bleve.NewSearchRequest(query)
	searchResults, err := index.Search(search)
	if err != nil {

		fmt.Println("No Search Results :( ")
		fmt.Println(err)
		return
	}

	// Print out the Results
	// TODO Only print out the File Path
	// TODO, give this file path to fuzzy-finder-go --preview
	fmt.Println(searchResults) // This prints everything
	// fmt.Println(searchResults.Hits[0].ID) // This prints the ID of the first

}

func getInput() string {
	reader := bufio.NewReader(os.Stdin)
	fmt.Print("Enter a Search Term:\n")
	term, _ := reader.ReadString('\n')
	return term
}

func Make_index(index_path string, files []string) bleve.Index {

	// TODO This should have more error checking, what if there is just a write permission?
	// what if it's the wrong file? then it would be overwritten...
	index, err := bleve.Open(index_path)
	if err != nil {
		fmt.Println(err)
		fmt.Println("Creating a New index")
		mapping := bleve.NewIndexMapping()

		index, err = bleve.New(index_path, mapping)

		if err != nil {
			fmt.Print(err)
			fmt.Print("Unable to Create new index")
			os.Exit(1)
		}
	} else {
		fmt.Println("Appending to Old Index")
	}

	var notecontent string
	var a_document text_structure

	bar := progressbar.Default(int64(len(files)))
	documents := []text_structure{}
	count := 1
	startTime := time.Now()
	batchcount := 1
	batch := index.NewBatch()

	// batch := index.NewBatch()
	for _, file := range files {
		// fmt.Println(file)
		count = count + 1

		notecontent = getFile(file)

		a_document = text_structure{
			Path:    file,
			Content: notecontent,
		}

		documents = append(documents, a_document)
		// fmt.Println(file)

		// index.Index(a_document.Path, a_document)
		// Add them to a batch
		batch.Index(a_document.Path, a_document)
		batchcount++

		// Index the batch now
		if batchcount >= *batchSize {
			err = index.Batch(batch)
			if err != nil {
				panic(err)
			}
			// Reset the batch
			batch = index.NewBatch()
			batchcount = 0

		}

		bar.Add(1)

	}

	// Index the last inclomplete batch
	if batchcount > 0 {
		err = index.Batch(batch)
		if err != nil {
			panic(err)
		}
		// no need to reset the batch, we are don

	}
	indexDuration := time.Since(startTime)
	indexDurationSeconds := float64(indexDuration) / float64(time.Second)
	timePerDoc := float64(indexDuration) / float64(count)
	log.Printf("Indexed %d documents, in %.2fs (average %.2fms/doc)", count, indexDurationSeconds, timePerDoc/float64(time.Millisecond))

	return index
}

func getFile(path string) string {
	buf, err := os.ReadFile(path)
	notecontent := string(buf)

	if err != nil {
		fmt.Print("Error Reading File")
		os.Exit(1)
	}
	return notecontent
}

func delete_index(path string) {
	os.RemoveAll(path)
}

func listFiles(dir string) []string {
	files := []string{}

	// TODO Why is this different?
	// https://stackoverflow.com/a/42423998
	// https://flaviocopes.com/go-list-files/
	append_files := func(path string, info os.FileInfo, err error) error {

		if info.IsDir() {
			return nil
		}

		// TODO there should be many formats allowed
		if !(filepath.Ext(path) == ".md") {
			return nil
		}

		files = append(files, path)
		return nil
	}

	// TODO Why not Symlinks?
	root := dir
	err := filepath.Walk(root, append_files)
	if err != nil {
		panic(err)
	}

	return files
}