code.oscarkilo.com/klex-git

Hash:
dbb2708b9bcf00cdf090089fc2e773b957c8d1b9
Author:
Igor Naverniouk <[email protected]>
Date:
Sun May 18 07:18:47 2025 -0700
Message:
vector traces
diff --git a/embed/main.go b/embed/main.go
index 5a98a6c..1448b2f 100644
--- a/embed/main.go
+++ b/embed/main.go
@@ -8,12 +8,15 @@ import "fmt"
import "io/ioutil"
import "log"
import "os"
+import "sync"

import "oscarkilo.com/klex-git/api"
import "oscarkilo.com/klex-git/config"
+import "oscarkilo.com/klex-git/util"

var model = flag.String("model", "openai:text-embedding-3-small", "")
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
+var whole_path = flag.Bool("whole_path", false, "Returns a list of vectors")

func main() {
flag.Parse()
@@ -33,23 +36,38 @@ func main() {
if err != nil {
log.Fatalf("Failed to read stdin: %v", err)
}
+ text := []string{string(sin)}

- f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
- json_vector, err := client.F(f_name, string(sin))
- if err != nil {
- log.Fatalf("Failed to call F: %v", err)
+ if *whole_path {
+ text = util.SplitByWord(text[0])
}
- var vector []float32
- err = json.Unmarshal([]byte(json_vector), &vector)
- if err != nil {
- log.Fatalf("Failed to parse vector: %v", err)
+
+ f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
+ vectors := make([][]float32, len(text))
+ wg := sync.WaitGroup{}
+ for i := range text {
+ wg.Add(1)
+ go func(i int) {
+ json_vector, err := client.F(f_name, text[i])
+ if err != nil {
+ log.Fatalf("Failed to call F: %v", err)
+ }
+ err = json.Unmarshal([]byte(json_vector), &vectors[i])
+ if err != nil {
+ log.Fatalf("Failed to parse vector: %v", err)
+ }
+ wg.Done()
+ }(i)
}
+ wg.Wait()

- for i, w := range vector {
- if i > 0 {
- fmt.Printf(" ")
+ for _, vector := range vectors {
+ for i, w := range vector {
+ if i > 0 {
+ fmt.Printf(" ")
+ }
+ fmt.Printf("%g", w)
}
- fmt.Printf("%g", w)
+ fmt.Printf("\n")
}
- fmt.Printf("\n")
}
diff --git a/util/prompts.go b/util/prompts.go
new file mode 100644
index 0000000..5611247
--- /dev/null
+++ b/util/prompts.go
@@ -0,0 +1,30 @@
+package util
+
+import "unicode"
+
+// SplitByWord returns all whitespace-terminated prefixes of 'text'.
+// The first entry will be the first word and its trailing whitespace.
+// The last entry will be the whole 'text'.
+func SplitByWord(text string) []string {
+ var prefixes []string
+ runes := []rune(text)
+
+ const ALL_WHITESPACE = 0
+ const SAW_CHARS = 1
+ const SAW_WHITESPACE_AFTER_CHARS = 2
+ state := ALL_WHITESPACE
+
+ for i, c := range runes {
+ if unicode.IsSpace(c) {
+ if state == SAW_CHARS {
+ state = SAW_WHITESPACE_AFTER_CHARS
+ }
+ } else {
+ if state == SAW_WHITESPACE_AFTER_CHARS {
+ prefixes = append(prefixes, string(runes[:i]))
+ }
+ state = SAW_CHARS
+ }
+ }
+ return append(prefixes, text)
+}
diff --git a/util/prompts_test.go b/util/prompts_test.go
new file mode 100644
index 0000000..35a6022
--- /dev/null
+++ b/util/prompts_test.go
@@ -0,0 +1,55 @@
+package util
+
+import "encoding/json"
+import "testing"
+
+func same(a, b []string) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if a[i] != b[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func TestSplitByWord(t *testing.T) {
+ // check verifies that golden == SplitByWord(golden[-1]).
+ check := func(golden ...string) {
+ t.Helper()
+ in := golden[len(golden)-1]
+ out := SplitByWord(in)
+ if !same(golden, out) {
+ ijson, _ := json.Marshal(in)
+ gjson, _ := json.MarshalIndent(golden, "", " ")
+ ojson, _ := json.MarshalIndent(out, "", " ")
+ t.Errorf("SplitByWord(%s):\nwant: %s\nhave: %s", ijson, gjson, ojson)
+ }
+ }
+
+ check("")
+ check(" ")
+ check(" \n\t \n")
+ check(
+ "hello ",
+ "hello world",
+ )
+ check(
+ "Once ",
+ "Once upon\t",
+ "Once upon\ta\n ",
+ "Once upon\ta\n time, ",
+ )
+ check(
+ "Snap, ",
+ "Snap, crackle, ",
+ "Snap, crackle, and ",
+ "Snap, crackle, and pop.",
+ )
+ check(
+ " leading ",
+ " leading whitespace",
+ )
+}
a/embed/main.go
b/embed/main.go
1
package main
1
package main
2
2
3
// This binary converts text into embedding vecors.
3
// This binary converts text into embedding vecors.
4
4
5
import "encoding/json"
5
import "encoding/json"
6
import "flag"
6
import "flag"
7
import "fmt"
7
import "fmt"
8
import "io/ioutil"
8
import "io/ioutil"
9
import "log"
9
import "log"
10
import "os"
10
import "os"
11
import "sync"
11
12
12
import "oscarkilo.com/klex-git/api"
13
import "oscarkilo.com/klex-git/api"
13
import "oscarkilo.com/klex-git/config"
14
import "oscarkilo.com/klex-git/config"
15
import "oscarkilo.com/klex-git/util"
14
16
15
var model = flag.String("model", "openai:text-embedding-3-small", "")
17
var model = flag.String("model", "openai:text-embedding-3-small", "")
16
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
18
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
19
var whole_path = flag.Bool("whole_path", false, "Returns a list of vectors")
17
20
18
func main() {
21
func main() {
19
flag.Parse()
22
flag.Parse()
20
23
21
// Find the API keys and configure a Klex client.
24
// Find the API keys and configure a Klex client.
22
config, err := config.ReadConfig()
25
config, err := config.ReadConfig()
23
if err != nil {
26
if err != nil {
24
log.Fatalf("Failed to read config: %v", err)
27
log.Fatalf("Failed to read config: %v", err)
25
}
28
}
26
client := api.NewClient(config.KlexUrl, config.ApiKey)
29
client := api.NewClient(config.KlexUrl, config.ApiKey)
27
if client == nil {
30
if client == nil {
28
log.Fatalf("Failed to create Klex client")
31
log.Fatalf("Failed to create Klex client")
29
}
32
}
30
33
31
// Read stdin as text.
34
// Read stdin as text.
32
sin, err := ioutil.ReadAll(os.Stdin)
35
sin, err := ioutil.ReadAll(os.Stdin)
33
if err != nil {
36
if err != nil {
34
log.Fatalf("Failed to read stdin: %v", err)
37
log.Fatalf("Failed to read stdin: %v", err)
35
}
38
}
39
text := []string{string(sin)}
36
40
37
f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
41
if *whole_path {
38
json_vector, err := client.F(f_name, string(sin))
42
text = util.SplitByWord(text[0])
39
if err != nil {
40
log.Fatalf("Failed to call F: %v", err)
41
}
43
}
42
var vector []float32
44
43
err = json.Unmarshal([]byte(json_vector), &vector)
45
f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
44
if err != nil {
46
vectors := make([][]float32, len(text))
45
log.Fatalf("Failed to parse vector: %v", err)
47
wg := sync.WaitGroup{}
48
for i := range text {
49
wg.Add(1)
50
go func(i int) {
51
json_vector, err := client.F(f_name, text[i])
52
if err != nil {
53
log.Fatalf("Failed to call F: %v", err)
54
}
55
err = json.Unmarshal([]byte(json_vector), &vectors[i])
56
if err != nil {
57
log.Fatalf("Failed to parse vector: %v", err)
58
}
59
wg.Done()
60
}(i)
46
}
61
}
62
wg.Wait()
47
63
48
for i, w := range vector {
64
for _, vector := range vectors {
49
if i > 0 {
65
for i, w := range vector {
50
fmt.Printf(" ")
66
if i > 0 {
67
fmt.Printf(" ")
68
}
69
fmt.Printf("%g", w)
51
}
70
}
52
fmt.Printf("%g", w)
71
fmt.Printf("\n")
53
}
72
}
54
fmt.Printf("\n")
55
}
73
}
/dev/null
b/util/prompts.go
1
package util
2
3
import "unicode"
4
5
// SplitByWord returns all whitespace-terminated prefixes of 'text'.
6
// The first entry will be the first word and its trailing whitespace.
7
// The last entry will be the whole 'text'.
8
func SplitByWord(text string) []string {
9
var prefixes []string
10
runes := []rune(text)
11
12
const ALL_WHITESPACE = 0
13
const SAW_CHARS = 1
14
const SAW_WHITESPACE_AFTER_CHARS = 2
15
state := ALL_WHITESPACE
16
17
for i, c := range runes {
18
if unicode.IsSpace(c) {
19
if state == SAW_CHARS {
20
state = SAW_WHITESPACE_AFTER_CHARS
21
}
22
} else {
23
if state == SAW_WHITESPACE_AFTER_CHARS {
24
prefixes = append(prefixes, string(runes[:i]))
25
}
26
state = SAW_CHARS
27
}
28
}
29
return append(prefixes, text)
30
}
/dev/null
b/util/prompts_test.go
1
package util
2
3
import "encoding/json"
4
import "testing"
5
6
func same(a, b []string) bool {
7
if len(a) != len(b) {
8
return false
9
}
10
for i := range a {
11
if a[i] != b[i] {
12
return false
13
}
14
}
15
return true
16
}
17
18
func TestSplitByWord(t *testing.T) {
19
// check verifies that golden == SplitByWord(golden[-1]).
20
check := func(golden ...string) {
21
t.Helper()
22
in := golden[len(golden)-1]
23
out := SplitByWord(in)
24
if !same(golden, out) {
25
ijson, _ := json.Marshal(in)
26
gjson, _ := json.MarshalIndent(golden, "", " ")
27
ojson, _ := json.MarshalIndent(out, "", " ")
28
t.Errorf("SplitByWord(%s):\nwant: %s\nhave: %s", ijson, gjson, ojson)
29
}
30
}
31
32
check("")
33
check(" ")
34
check(" \n\t \n")
35
check(
36
"hello ",
37
"hello world",
38
)
39
check(
40
"Once ",
41
"Once upon\t",
42
"Once upon\ta\n ",
43
"Once upon\ta\n time, ",
44
)
45
check(
46
"Snap, ",
47
"Snap, crackle, ",
48
"Snap, crackle, and ",
49
"Snap, crackle, and pop.",
50
)
51
check(
52
" leading ",
53
" leading whitespace",
54
)
55
}