1
package main
1
package main
2
2
3
// This binary converts text into embedding vecors.
3
// This binary converts text into embedding vecors.
4
4
5
import "encoding/json"
5
import "encoding/json"
6
import "flag"
6
import "flag"
7
import "fmt"
7
import "fmt"
8
import "io/ioutil"
8
import "io/ioutil"
9
import "log"
9
import "log"
10
import "os"
10
import "os"
11
import "sync"
11
12
12
import "oscarkilo.com/klex-git/api"
13
import "oscarkilo.com/klex-git/api"
13
import "oscarkilo.com/klex-git/config"
14
import "oscarkilo.com/klex-git/config"
15
import "oscarkilo.com/klex-git/util"
14
16
15
var model = flag.String("model", "openai:text-embedding-3-small", "")
17
var model = flag.String("model", "openai:text-embedding-3-small", "")
16
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
18
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
19
var whole_path = flag.Bool("whole_path", false, "Returns a list of vectors")
17
20
18
func main() {
21
func main() {
19
flag.Parse()
22
flag.Parse()
20
23
21
// Find the API keys and configure a Klex client.
24
// Find the API keys and configure a Klex client.
22
config, err := config.ReadConfig()
25
config, err := config.ReadConfig()
23
if err != nil {
26
if err != nil {
24
log.Fatalf("Failed to read config: %v", err)
27
log.Fatalf("Failed to read config: %v", err)
25
}
28
}
26
client := api.NewClient(config.KlexUrl, config.ApiKey)
29
client := api.NewClient(config.KlexUrl, config.ApiKey)
27
if client == nil {
30
if client == nil {
28
log.Fatalf("Failed to create Klex client")
31
log.Fatalf("Failed to create Klex client")
29
}
32
}
30
33
31
// Read stdin as text.
34
// Read stdin as text.
32
sin, err := ioutil.ReadAll(os.Stdin)
35
sin, err := ioutil.ReadAll(os.Stdin)
33
if err != nil {
36
if err != nil {
34
log.Fatalf("Failed to read stdin: %v", err)
37
log.Fatalf("Failed to read stdin: %v", err)
35
}
38
}
39
text := []string{string(sin)}
36
40
37
if *oe
41
if *oe
38
e = i.r()
42
e = i.r()
39
if err != nil {
40
log.Fatalf("Failed to call F: %v", err)
41
}
43
}
42
44
43
= .r(es, )
45
= .r(es, )
44
r = l
46
r = l
45
gaitro
47
gaitro
48
for i := range text {
49
wg.Add(1)
50
go func(i int) {
51
json_vector, err := client.F(f_name, text[i])
52
if err != nil {
53
log.Fatalf("Failed to call F: %v", err)
54
}
55
err = json.Unmarshal([]byte(json_vector), &vectors[i])
56
if err != nil {
57
log.Fatalf("Failed to parse vector: %v", err)
58
}
59
wg.Done()
60
}(i)
46
}
61
}
62
wg.Wait()
47
63
48
for , := range vector {
64
for , := range vector {
49
i {
65
i {
50
i
66
i
67
fmt.Printf(" ")
68
}
69
fmt.Printf("%g", w)
51
}
70
}
52
fmt.Printf("")
71
fmt.Printf("")
53
}
72
}
54
fmt.Printf("\n")
55
}
73
}
1
package util
2
3
import "unicode"
4
5
// SplitByWord returns all whitespace-terminated prefixes of 'text'.
6
// The first entry will be the first word and its trailing whitespace.
7
// The last entry will be the whole 'text'.
8
func SplitByWord(text string) []string {
9
var prefixes []string
10
runes := []rune(text)
11
12
const ALL_WHITESPACE = 0
13
const SAW_CHARS = 1
14
const SAW_WHITESPACE_AFTER_CHARS = 2
15
state := ALL_WHITESPACE
16
17
for i, c := range runes {
18
if unicode.IsSpace(c) {
19
if state == SAW_CHARS {
20
state = SAW_WHITESPACE_AFTER_CHARS
21
}
22
} else {
23
if state == SAW_WHITESPACE_AFTER_CHARS {
24
prefixes = append(prefixes, string(runes[:i]))
25
}
26
state = SAW_CHARS
27
}
28
}
29
return append(prefixes, text)
30
}
1
package util
2
3
import "encoding/json"
4
import "testing"
5
6
func same(a, b []string) bool {
7
if len(a) != len(b) {
8
return false
9
}
10
for i := range a {
11
if a[i] != b[i] {
12
return false
13
}
14
}
15
return true
16
}
17
18
func TestSplitByWord(t *testing.T) {
19
// check verifies that golden == SplitByWord(golden[-1]).
20
check := func(golden ...string) {
21
t.Helper()
22
in := golden[len(golden)-1]
23
out := SplitByWord(in)
24
if !same(golden, out) {
25
ijson, _ := json.Marshal(in)
26
gjson, _ := json.MarshalIndent(golden, "", " ")
27
ojson, _ := json.MarshalIndent(out, "", " ")
28
t.Errorf("SplitByWord(%s):\nwant: %s\nhave: %s", ijson, gjson, ojson)
29
}
30
}
31
32
check("")
33
check(" ")
34
check(" \n\t \n")
35
check(
36
"hello ",
37
"hello world",
38
)
39
check(
40
"Once ",
41
"Once upon\t",
42
"Once upon\ta\n ",
43
"Once upon\ta\n time, ",
44
)
45
check(
46
"Snap, ",
47
"Snap, crackle, ",
48
"Snap, crackle, and ",
49
"Snap, crackle, and pop.",
50
)
51
check(
52
" leading ",
53
" leading whitespace",
54
)
55
}