code.oscarkilo.com/klex-git

Hash:
90583ee446b7f1e80b7d2bc20d236b9d6e118259
Author:
Igor Naverniouk <[email protected]>
Date:
Sun May 18 19:50:44 2025 -0700
Message:
reimplements `embed` using /embed/do
diff --git a/api/api.go b/api/api.go
index 3a64e9a..f06f49f 100644
--- a/api/api.go
+++ b/api/api.go
@@ -100,6 +100,17 @@ func (c *Client) Messages(req MessagesRequest) (*MessagesResponse, error) {
return &res, nil
}

+// Embed returns semantic embedding vectors for the given text.
+func (c *Client) Embed(req EmbedRequest) ([][]float32, error) {
+ url := fmt.Sprintf(
+ "/embed/do?model=%s&dims=%d&path=%t",
+ req.Model, req.Dims, req.WholePath,
+ )
+ var resp [][]float32
+ err := c.call("POST", url, req.Text, &resp)
+ return resp, err
+}
+
// NewDataset creates a new dataset or updates an existing one.
// This is the simplest way, meant for datasets smaller than ~1GB.
func (c *Client) NewDataset(name string, data map[string]string) error {
diff --git a/api/embed.go b/api/embed.go
new file mode 100644
index 0000000..93c7ae7
--- /dev/null
+++ b/api/embed.go
@@ -0,0 +1,19 @@
+package api
+
+type EmbedRequest struct {
+ // Text should be shorter than ~8000 tokens.
+ Text string `json:"text"`
+
+ // Model is an embedding model name.
+ // These are hard-coded in //funky/builtins.
+ // A good choice is "openai:text-embedding-3-small".
+ Model string `json:"model"`
+
+ // Dims is the number of vector dimensions to return.
+ // A good chioce is 1536 for openai:text-embedding-3-small.
+ Dims int `json:"dims"`
+
+ // WholePath returns a sequence of vectors, one per prefix of Text.
+ // Instead of the usual array of numbers, you'll get a 2D array.
+ WholePath bool `json:"whole_path"`
+}
diff --git a/embed/main.go b/embed/main.go
index 1448b2f..4b4dd05 100644
--- a/embed/main.go
+++ b/embed/main.go
@@ -2,17 +2,17 @@ package main

// This binary converts text into embedding vecors.

-import "encoding/json"
+//import "encoding/json"
import "flag"
import "fmt"
import "io/ioutil"
import "log"
import "os"
-import "sync"
+//import "sync"

import "oscarkilo.com/klex-git/api"
import "oscarkilo.com/klex-git/config"
-import "oscarkilo.com/klex-git/util"
+//import "oscarkilo.com/klex-git/util"

var model = flag.String("model", "openai:text-embedding-3-small", "")
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
@@ -36,8 +36,19 @@ func main() {
if err != nil {
log.Fatalf("Failed to read stdin: %v", err)
}
- text := []string{string(sin)}
+ //text := []string{string(sin)}

+ vectors, err := client.Embed(api.EmbedRequest{
+ Text: string(sin),
+ Model: *model,
+ Dims: *dims,
+ WholePath: *whole_path,
+ })
+ if err != nil {
+ log.Fatalf("Failed to call Embed: %v", err)
+ }
+
+ /*
if *whole_path {
text = util.SplitByWord(text[0])
}
@@ -60,6 +71,7 @@ func main() {
}(i)
}
wg.Wait()
+ */

for _, vector := range vectors {
for i, w := range vector {
a/api/api.go
b/api/api.go
1
package api
1
package api
2
2
3
// This file is for Golang clients of Klex.
3
// This file is for Golang clients of Klex.
4
4
5
import (
5
import (
6
"bytes"
6
"bytes"
7
"encoding/json"
7
"encoding/json"
8
"fmt"
8
"fmt"
9
"io/ioutil"
9
"io/ioutil"
10
"log"
10
"log"
11
"net/http"
11
"net/http"
12
"sort"
12
"sort"
13
)
13
)
14
14
15
type Client struct {
15
type Client struct {
16
KlexURL string
16
KlexURL string
17
APIKey string
17
APIKey string
18
}
18
}
19
19
20
func NewClient(klexURL, apiKey string) *Client {
20
func NewClient(klexURL, apiKey string) *Client {
21
if klexURL == "" || apiKey == "" {
21
if klexURL == "" || apiKey == "" {
22
log.Printf("NewClient: missing klexURL or apiKey")
22
log.Printf("NewClient: missing klexURL or apiKey")
23
return nil
23
return nil
24
}
24
}
25
return &Client{klexURL, apiKey}
25
return &Client{klexURL, apiKey}
26
}
26
}
27
27
28
func (c *Client) call(method, path string, req, res interface{}) error {
28
func (c *Client) call(method, path string, req, res interface{}) error {
29
reqBody, err := json.Marshal(req)
29
reqBody, err := json.Marshal(req)
30
if err != nil {
30
if err != nil {
31
return fmt.Errorf("Cannot marshal request: %v", err)
31
return fmt.Errorf("Cannot marshal request: %v", err)
32
}
32
}
33
reqBytes := bytes.NewBuffer(reqBody)
33
reqBytes := bytes.NewBuffer(reqBody)
34
r, err := http.NewRequest(method, c.KlexURL + path, reqBytes)
34
r, err := http.NewRequest(method, c.KlexURL + path, reqBytes)
35
if err != nil {
35
if err != nil {
36
return fmt.Errorf("In http.NewRequest: %v", err)
36
return fmt.Errorf("In http.NewRequest: %v", err)
37
}
37
}
38
r.Header.Set("Authorization", "Bearer " + c.APIKey)
38
r.Header.Set("Authorization", "Bearer " + c.APIKey)
39
r.Header.Set("Content-Type", "application/json")
39
r.Header.Set("Content-Type", "application/json")
40
resHttp, err := http.DefaultClient.Do(r)
40
resHttp, err := http.DefaultClient.Do(r)
41
if err != nil {
41
if err != nil {
42
return fmt.Errorf("http.DefaultClient.Do: %v", err)
42
return fmt.Errorf("http.DefaultClient.Do: %v", err)
43
}
43
}
44
defer resHttp.Body.Close()
44
defer resHttp.Body.Close()
45
resBody, err := ioutil.ReadAll(resHttp.Body)
45
resBody, err := ioutil.ReadAll(resHttp.Body)
46
if err != nil {
46
if err != nil {
47
return fmt.Errorf("Response error: %v", err)
47
return fmt.Errorf("Response error: %v", err)
48
}
48
}
49
if resHttp.StatusCode != 200 && resHttp.StatusCode != 204 {
49
if resHttp.StatusCode != 200 && resHttp.StatusCode != 204 {
50
return fmt.Errorf("Status %d; response=%s", resHttp.StatusCode, resBody)
50
return fmt.Errorf("Status %d; response=%s", resHttp.StatusCode, resBody)
51
}
51
}
52
if res != nil {
52
if res != nil {
53
if err := json.Unmarshal(resBody, res); err != nil {
53
if err := json.Unmarshal(resBody, res); err != nil {
54
return fmt.Errorf("Bad response %s\nerror=%v", resBody, err)
54
return fmt.Errorf("Bad response %s\nerror=%v", resBody, err)
55
}
55
}
56
}
56
}
57
return nil
57
return nil
58
}
58
}
59
59
60
// F executes a function on one given input.
60
// F executes a function on one given input.
61
func (c *Client) F(f, in string) (string, error) {
61
func (c *Client) F(f, in string) (string, error) {
62
var res FResponse
62
var res FResponse
63
err := c.call("POST", "/f", FRequest{FName: f, In: in}, &res)
63
err := c.call("POST", "/f", FRequest{FName: f, In: in}, &res)
64
if err != nil {
64
if err != nil {
65
return "", err
65
return "", err
66
}
66
}
67
if res.Err != "" {
67
if res.Err != "" {
68
return "", fmt.Errorf(res.Err)
68
return "", fmt.Errorf(res.Err)
69
}
69
}
70
return res.Out, nil
70
return res.Out, nil
71
}
71
}
72
72
73
// Messages executes an LLM function using the Messages API.
73
// Messages executes an LLM function using the Messages API.
74
// Set req.Model to one of the Klex LLM function names.
74
// Set req.Model to one of the Klex LLM function names.
75
func (c *Client) Messages(req MessagesRequest) (*MessagesResponse, error) {
75
func (c *Client) Messages(req MessagesRequest) (*MessagesResponse, error) {
76
f := req.Model
76
f := req.Model
77
req.Model = ""
77
req.Model = ""
78
if f == "" {
78
if f == "" {
79
return nil, fmt.Errorf("MessagesRequest.Model is empty")
79
return nil, fmt.Errorf("MessagesRequest.Model is empty")
80
}
80
}
81
in, err := json.Marshal(req)
81
in, err := json.Marshal(req)
82
if err != nil {
82
if err != nil {
83
return nil, fmt.Errorf("Cannot marshal request: %v", err)
83
return nil, fmt.Errorf("Cannot marshal request: %v", err)
84
}
84
}
85
out, err := c.F(f, string(in))
85
out, err := c.F(f, string(in))
86
if err != nil {
86
if err != nil {
87
return nil, err
87
return nil, err
88
}
88
}
89
var res MessagesResponse
89
var res MessagesResponse
90
err = json.Unmarshal([]byte(out), &res)
90
err = json.Unmarshal([]byte(out), &res)
91
if err != nil {
91
if err != nil {
92
// Instead of failing, treat the whole output as text, and add an error.
92
// Instead of failing, treat the whole output as text, and add an error.
93
// Let the caller figure this out.
93
// Let the caller figure this out.
94
res.Error = &ErrorResponse{
94
res.Error = &ErrorResponse{
95
Type: "response-json",
95
Type: "response-json",
96
Message: err.Error(),
96
Message: err.Error(),
97
}
97
}
98
res.Content = []ContentBlock{{Type: "text", Text: out}}
98
res.Content = []ContentBlock{{Type: "text", Text: out}}
99
}
99
}
100
return &res, nil
100
return &res, nil
101
}
101
}
102
102
103
// Embed returns semantic embedding vectors for the given text.
104
func (c *Client) Embed(req EmbedRequest) ([][]float32, error) {
105
url := fmt.Sprintf(
106
"/embed/do?model=%s&dims=%d&path=%t",
107
req.Model, req.Dims, req.WholePath,
108
)
109
var resp [][]float32
110
err := c.call("POST", url, req.Text, &resp)
111
return resp, err
112
}
113
103
// NewDataset creates a new dataset or updates an existing one.
114
// NewDataset creates a new dataset or updates an existing one.
104
// This is the simplest way, meant for datasets smaller than ~1GB.
115
// This is the simplest way, meant for datasets smaller than ~1GB.
105
func (c *Client) NewDataset(name string, data map[string]string) error {
116
func (c *Client) NewDataset(name string, data map[string]string) error {
106
// TODO: this loses key names; get rid of this API.
117
// TODO: this loses key names; get rid of this API.
107
req := NewDatasetRequest{Name: name, Data: nil}
118
req := NewDatasetRequest{Name: name, Data: nil}
108
keys := make([]string, 0, len(data))
119
keys := make([]string, 0, len(data))
109
for k := range data {
120
for k := range data {
110
keys = append(keys, k)
121
keys = append(keys, k)
111
}
122
}
112
sort.Strings(keys)
123
sort.Strings(keys)
113
for _, k := range keys {
124
for _, k := range keys {
114
req.Data = append(req.Data, data[k])
125
req.Data = append(req.Data, data[k])
115
}
126
}
116
127
117
var res NewDatasetResponse
128
var res NewDatasetResponse
118
err := c.call("POST", "/datasets/new", req, &res)
129
err := c.call("POST", "/datasets/new", req, &res)
119
if err != nil {
130
if err != nil {
120
return fmt.Errorf("Error POSTing to /datasets/new: %v", err)
131
return fmt.Errorf("Error POSTing to /datasets/new: %v", err)
121
}
132
}
122
if res.Name != name || res.Size != len(data) {
133
if res.Name != name || res.Size != len(data) {
123
pretty, _ := json.MarshalIndent(res, "", " ")
134
pretty, _ := json.MarshalIndent(res, "", " ")
124
return fmt.Errorf("Unexpected response from /datasets/new: %s", pretty)
135
return fmt.Errorf("Unexpected response from /datasets/new: %s", pretty)
125
}
136
}
126
return nil
137
return nil
127
}
138
}
128
139
129
// BeginNewDataset starts a new dataset upload using the v2 API.
140
// BeginNewDataset starts a new dataset upload using the v2 API.
130
// Returns the version key to use in UploadKv() and EndNewDataset().
141
// Returns the version key to use in UploadKv() and EndNewDataset().
131
// Keep the key secret until EndNewDataset() returns successfully.
142
// Keep the key secret until EndNewDataset() returns successfully.
132
func (c *Client) BeginNewDataset(name string) (string, error) {
143
func (c *Client) BeginNewDataset(name string) (string, error) {
133
req := BeginNewDatasetRequest{Name: name}
144
req := BeginNewDatasetRequest{Name: name}
134
var res BeginNewDatasetResponse
145
var res BeginNewDatasetResponse
135
err := c.call("POST", "/datasets/begin_new", req, &res)
146
err := c.call("POST", "/datasets/begin_new", req, &res)
136
if err != nil {
147
if err != nil {
137
return "", fmt.Errorf("Error POSTing to /datasets/begin_new: %v", err)
148
return "", fmt.Errorf("Error POSTing to /datasets/begin_new: %v", err)
138
}
149
}
139
return res.VersionKey, nil
150
return res.VersionKey, nil
140
}
151
}
141
152
142
// UploadKv uploads more key-value pairs of the dataset being created.
153
// UploadKv uploads more key-value pairs of the dataset being created.
143
func (c *Client) UploadKV(versionKey string, records []KV) error {
154
func (c *Client) UploadKV(versionKey string, records []KV) error {
144
req := UploadKVRequest{VersionKey: versionKey, Records: records}
155
req := UploadKVRequest{VersionKey: versionKey, Records: records}
145
err := c.call("POST", "/datasets/upload_kv", req, nil)
156
err := c.call("POST", "/datasets/upload_kv", req, nil)
146
if err != nil {
157
if err != nil {
147
return fmt.Errorf("Error POSTing to /datasets/upload_kv: %v", err)
158
return fmt.Errorf("Error POSTing to /datasets/upload_kv: %v", err)
148
}
159
}
149
return nil
160
return nil
150
}
161
}
151
162
152
// EndNewDataset commits the dataset being created.
163
// EndNewDataset commits the dataset being created.
153
func (c *Client) EndNewDataset(name, version_key string, size int) error {
164
func (c *Client) EndNewDataset(name, version_key string, size int) error {
154
req := EndNewDatasetRequest{Name: name, VersionKey: version_key, Size: size}
165
req := EndNewDatasetRequest{Name: name, VersionKey: version_key, Size: size}
155
err := c.call("POST", "/datasets/end_new", req, nil)
166
err := c.call("POST", "/datasets/end_new", req, nil)
156
if err != nil {
167
if err != nil {
157
return fmt.Errorf("Error POSTing to /datasets/end_new: %v", err)
168
return fmt.Errorf("Error POSTing to /datasets/end_new: %v", err)
158
}
169
}
159
return nil
170
return nil
160
}
171
}
/dev/null
b/api/embed.go
1
package api
2
3
type EmbedRequest struct {
4
// Text should be shorter than ~8000 tokens.
5
Text string `json:"text"`
6
7
// Model is an embedding model name.
8
// These are hard-coded in //funky/builtins.
9
// A good choice is "openai:text-embedding-3-small".
10
Model string `json:"model"`
11
12
// Dims is the number of vector dimensions to return.
13
// A good chioce is 1536 for openai:text-embedding-3-small.
14
Dims int `json:"dims"`
15
16
// WholePath returns a sequence of vectors, one per prefix of Text.
17
// Instead of the usual array of numbers, you'll get a 2D array.
18
WholePath bool `json:"whole_path"`
19
}
a/embed/main.go
b/embed/main.go
1
package main
1
package main
2
2
3
// This binary converts text into embedding vecors.
3
// This binary converts text into embedding vecors.
4
4
5
import "encoding/json"
5
//import "encoding/json"
6
import "flag"
6
import "flag"
7
import "fmt"
7
import "fmt"
8
import "io/ioutil"
8
import "io/ioutil"
9
import "log"
9
import "log"
10
import "os"
10
import "os"
11
import "sync"
11
//import "sync"
12
12
13
import "oscarkilo.com/klex-git/api"
13
import "oscarkilo.com/klex-git/api"
14
import "oscarkilo.com/klex-git/config"
14
import "oscarkilo.com/klex-git/config"
15
import "oscarkilo.com/klex-git/util"
15
//import "oscarkilo.com/klex-git/util"
16
16
17
var model = flag.String("model", "openai:text-embedding-3-small", "")
17
var model = flag.String("model", "openai:text-embedding-3-small", "")
18
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
18
var dims = flag.Int("dims", 1536, "Number of vector dimensions to return")
19
var whole_path = flag.Bool("whole_path", false, "Returns a list of vectors")
19
var whole_path = flag.Bool("whole_path", false, "Returns a list of vectors")
20
20
21
func main() {
21
func main() {
22
flag.Parse()
22
flag.Parse()
23
23
24
// Find the API keys and configure a Klex client.
24
// Find the API keys and configure a Klex client.
25
config, err := config.ReadConfig()
25
config, err := config.ReadConfig()
26
if err != nil {
26
if err != nil {
27
log.Fatalf("Failed to read config: %v", err)
27
log.Fatalf("Failed to read config: %v", err)
28
}
28
}
29
client := api.NewClient(config.KlexUrl, config.ApiKey)
29
client := api.NewClient(config.KlexUrl, config.ApiKey)
30
if client == nil {
30
if client == nil {
31
log.Fatalf("Failed to create Klex client")
31
log.Fatalf("Failed to create Klex client")
32
}
32
}
33
33
34
// Read stdin as text.
34
// Read stdin as text.
35
sin, err := ioutil.ReadAll(os.Stdin)
35
sin, err := ioutil.ReadAll(os.Stdin)
36
if err != nil {
36
if err != nil {
37
log.Fatalf("Failed to read stdin: %v", err)
37
log.Fatalf("Failed to read stdin: %v", err)
38
}
38
}
39
text := []string{string(sin)}
39
//text := []string{string(sin)}
40
40
41
vectors, err := client.Embed(api.EmbedRequest{
42
Text: string(sin),
43
Model: *model,
44
Dims: *dims,
45
WholePath: *whole_path,
46
})
47
if err != nil {
48
log.Fatalf("Failed to call Embed: %v", err)
49
}
50
51
/*
41
if *whole_path {
52
if *whole_path {
42
text = util.SplitByWord(text[0])
53
text = util.SplitByWord(text[0])
43
}
54
}
44
55
45
f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
56
f_name := fmt.Sprintf("embed-%s@%d", *model, *dims)
46
vectors := make([][]float32, len(text))
57
vectors := make([][]float32, len(text))
47
wg := sync.WaitGroup{}
58
wg := sync.WaitGroup{}
48
for i := range text {
59
for i := range text {
49
wg.Add(1)
60
wg.Add(1)
50
go func(i int) {
61
go func(i int) {
51
json_vector, err := client.F(f_name, text[i])
62
json_vector, err := client.F(f_name, text[i])
52
if err != nil {
63
if err != nil {
53
log.Fatalf("Failed to call F: %v", err)
64
log.Fatalf("Failed to call F: %v", err)
54
}
65
}
55
err = json.Unmarshal([]byte(json_vector), &vectors[i])
66
err = json.Unmarshal([]byte(json_vector), &vectors[i])
56
if err != nil {
67
if err != nil {
57
log.Fatalf("Failed to parse vector: %v", err)
68
log.Fatalf("Failed to parse vector: %v", err)
58
}
69
}
59
wg.Done()
70
wg.Done()
60
}(i)
71
}(i)
61
}
72
}
62
wg.Wait()
73
wg.Wait()
74
*/
63
75
64
for _, vector := range vectors {
76
for _, vector := range vectors {
65
for i, w := range vector {
77
for i, w := range vector {
66
if i > 0 {
78
if i > 0 {
67
fmt.Printf(" ")
79
fmt.Printf(" ")
68
}
80
}
69
fmt.Printf("%g", w)
81
fmt.Printf("%g", w)
70
}
82
}
71
fmt.Printf("\n")
83
fmt.Printf("\n")
72
}
84
}
73
}
85
}