// Return 50 words of an index from notes, using lowest IDf values // TODO Read in all text documents from directory // TODO Find IDF values // TODO Find top 50 words corresponding to lowest IDF values // TODO use fzf to select one of those words // TODO return the top 20 documents that contain that word most frequently package main import ( "fmt" "strings" ) func main() { // Get some documents var docs [][]string = get_documents() // get the unique words out var all_words []string = unique_words(docs) // Make a DTM DTM := make_dtm(docs, all_words) // Print the DTM fmt.Println(all_words) for i := 0; i < len(DTM); i++ { for j := 0; j < len(DTM[1]); j++ { fmt.Print(DTM[i][j], " ") } fmt.Print("\n") } } func get_documents() [][]string { // Create some strings doc1 := "quick fox, \n *brown*, fox, jumpy fox" doc2 := "jumping jacks jacks jacks are fun fun meh" doc3 := "the fox fox fox was foo bar blah quick" // store the strings into an array var all_docs_str []string = []string{doc1, doc2, doc3} // Create an empty matrix all_docs := [][]string{} // Fill the Matrix with the terms for i := 0; i < 3; i++ { all_docs = append(all_docs, extract_words(all_docs_str[i])) } return all_docs } // Clean a string and return back a vector of words func extract_words(doc string) []string { doc = strings.ToLower(doc) // remove commas ore strings.Fields respects them doc = strings.ReplaceAll(doc, ",", "") doc = strings.ReplaceAll(doc, "/", "") doc = strings.ReplaceAll(doc, "*", "") doc = strings.TrimSpace(doc) doc_vec := strings.Fields(doc) return doc_vec } func unique_words(docs [][]string) []string { var unique_words []string for i := 0; i < len(docs); i++ { for j := 0; j < len(docs[i]); j++ { // test if the word is in the vector before appending to esnure uniqueness if !in_arr(unique_words, docs[i][j]) { unique_words = append(unique_words, docs[i][j]) } } } // fmt.Println(words) return unique_words } func in_arr(words []string, word string) bool { for i := 0; i < len(words); i++ { if words[i] == word { return true } } return false } func make_dtm(docs [][]string, all_words []string) [][]float64 { N := len(docs) n := len(all_words) DTM := make([][]float64, N, N) rows := make([]float64, n*N) for i := 0; i < N; i++ { DTM[i] = rows[i*n : (i+1)*n] } for i := 0; i < N; i++ { doc := docs[i] for k := 0; k < n; k++ { word := all_words[k] DTM[i][k] = how_many_times(word, doc) } } return DTM } func how_many_times(word string, doc []string) float64 { var count float64 = 0 for i := 0; i < len(doc); i++ { if word == doc[i] { count = count + 1 } } return count }