diff --git a/internal/api/handler.go b/internal/api/handler.go
index 8ae58f4..57db25b 100644
--- a/internal/api/handler.go
+++ b/internal/api/handler.go
@@ -50,6 +50,34 @@ func TranscribeFromFile(c echo.Context, whisperState *WhisperState) error {
return c.JSON(http.StatusOK, response)
}
+func TranscribeBytes(buffer []byte, whisperState *WhisperState) string {
+ whisperState.mutex.Lock()
+ defer whisperState.mutex.Unlock()
+
+ bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true)
+
+ if err != nil {
+ println("Error loading audio file data: ", err)
+ // return err
+ }
+
+ err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial)
+ if err != nil {
+ println("Error processing audio: ", err)
+ // return err
+ }
+
+ result, err := getResult(whisperState.context)
+ if err != nil {
+ println("Error getting result: ", err)
+ // return err
+ }
+
+ trimed := strings.TrimLeft(result, " ")
+ return trimed
+
+}
+
func Transcribe(c echo.Context, whisperState *WhisperState) error {
// Get the file header
fileHeader, err := c.FormFile("file")
diff --git a/internal/resources/cli_arguments.go b/internal/resources/cli_arguments.go
index 3a29a48..01d311a 100644
--- a/internal/resources/cli_arguments.go
+++ b/internal/resources/cli_arguments.go
@@ -20,6 +20,7 @@ type ParsedArguments struct {
Language int32
ModelPath string
Port int
+ // Buffer []byte
}
type LanguageMap map[string]string
@@ -64,6 +65,8 @@ func ParseFlags() (*ParsedArguments, error) {
flag.StringVar(&args.Language, "language", "", "Language to be processed") // Optional: Redundant to demonstrate
flag.StringVar(&args.ModelPath, "m", "", "Path to the model file (required)")
flag.StringVar(&args.ModelPath, "modelPath", "", "Path to the model file (required)") // Optional: Redundant
+ // bufferArg := flag.String("buffer", "", "Base64-encoded buffer data")
+
flag.IntVar(&args.Port, "p", 3031, "Port to start the server on")
flag.IntVar(&args.Port, "port", 3031, "Port to start the server on") // Optional: Redundant
@@ -76,6 +79,16 @@ func ParseFlags() (*ParsedArguments, error) {
// Parsing flags
flag.Parse()
+ // if *bufferArg != "" {
+ // decodedBuffer, err := base64.StdEncoding.DecodeString(*bufferArg)
+ // if err != nil {
+ // fmt.Println("Error decoding buffer:", err)
+ // return nil, err
+ // }
+ // // Process the decoded buffer (e.g., print its contents)
+ // fmt.Println("Decoded Buffer:", string(decodedBuffer))
+ // }
+
args.Language = strings.ToLower(args.Language)
if args.ModelPath == "" {
@@ -92,5 +105,6 @@ func ParseFlags() (*ParsedArguments, error) {
Language: languageCode,
ModelPath: args.ModelPath,
Port: args.Port,
+ // Buffer: []byte(*bufferArg),
}, nil
}
diff --git a/main.go b/main.go
index 5465df9..b9eee5b 100644
--- a/main.go
+++ b/main.go
@@ -1,21 +1,24 @@
package main
import (
+ "bufio"
+ "encoding/base64"
"fmt"
"os"
"path/filepath"
- "github.com/labstack/echo/v4"
- "github.com/labstack/echo/v4/middleware"
- "github.com/labstack/gommon/log"
"github.com/xzeldon/whisper-api-server/internal/api"
"github.com/xzeldon/whisper-api-server/internal/resources"
)
-func change_working_directory(e *echo.Echo) {
+// begin delimiter const
+const beginDelimiter = "[begin]"
+const endDelimiter = "[end]"
+
+func change_working_directory() {
exePath, errs := os.Executable()
if errs != nil {
- e.Logger.Error(errs)
+ println("Error getting executable path")
return
}
@@ -24,7 +27,7 @@ func change_working_directory(e *echo.Echo) {
// Change the working directory to the executable directory
errs = os.Chdir(exeDir)
if errs != nil {
- e.Logger.Error(errs)
+ println("Error changing working directory")
return
}
@@ -34,32 +37,46 @@ func change_working_directory(e *echo.Echo) {
func main() {
- e := echo.New()
- e.HideBanner = true
- change_working_directory(e)
+ change_working_directory()
args, errParsing := resources.ParseFlags()
if errParsing != nil {
- e.Logger.Error("Error parsing flags: ", errParsing)
+ println("Error parsing flags: ", errParsing)
return
}
- e.Use(middleware.CORS())
-
- if l, ok := e.Logger.(*log.Logger); ok {
- l.SetHeader("${time_rfc3339} ${level}")
- }
-
whisperState, err := api.InitializeWhisperState(args.ModelPath, args.Language)
if err != nil {
- e.Logger.Error(err)
+ println("Error initializing whisper state: ", err)
+ }
+ const maxCapacity = 2048 * 10240
+
+ scanner := bufio.NewScanner(os.Stdin)
+ buf := make([]byte, maxCapacity)
+ scanner.Buffer(buf, maxCapacity)
+
+ println("waiting_for_input")
+ if scanner.Scan() {
+ base64Data := scanner.Text()
+ decodedBuffer, err := base64.StdEncoding.DecodeString(base64Data)
+ if err != nil {
+ fmt.Println("Error decoding buffer:", err)
+ return
+ }
+
+ result := api.TranscribeBytes(decodedBuffer, whisperState)
+ println(beginDelimiter + result + endDelimiter)
+ println("finished")
+
+ // Process the decodedBuffer (e.g., print its length)
+ fmt.Println("Received buffer size:", len(decodedBuffer))
+
+ // Send a response back to Node.js (optional)
+ fmt.Fprintln(os.Stdout, "Buffer received successfully!")
+ } else if err := scanner.Err(); err != nil {
+ fmt.Println("Error reading from stdin:", err)
}
- e.POST("/v1/audio/transcriptions", func(c echo.Context) error {
-
- return api.Transcribe(c, whisperState)
- })
-
- e.Logger.Fatal(e.Start(fmt.Sprintf("127.0.0.1:%d", args.Port)))
+ // e.Logger.Fatal(e.Start(fmt.Sprintf("127.0.0.1:%d", args.Port)))
}
diff --git a/pkg/whisper/FullParams.go b/pkg/whisper/FullParams.go
index 4bbdd0a..98dfd4d 100644
--- a/pkg/whisper/FullParams.go
+++ b/pkg/whisper/FullParams.go
@@ -93,7 +93,7 @@ func (this *FullParams) SetLanguage(language int32) {
return
}
- this.cStruct.Language = eLanguage(language)
+ this.cStruct.Language = language
}
/*using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;*/
@@ -178,7 +178,7 @@ type _FullParams struct {
offset_ms int32
duration_ms int32
Flags eFullParamsFlags
- Language eLanguage
+ Language int32
thold_pt float32
thold_ptsum float32
diff --git a/pkg/whisper/language.go b/pkg/whisper/language.go
deleted file mode 100644
index 4e6a042..0000000
--- a/pkg/whisper/language.go
+++ /dev/null
@@ -1,207 +0,0 @@
-package whisper
-
-// https://github.com/Const-me/Whisper/blob/master/WhisperNet/API/eLanguage.cs
-
-type eLanguage int32
-
-const (
- Auto eLanguage = -1 // "af"
-
- Afrikaans = 0x6661 // "af"
- /// Albanian
- Albanian = 0x7173 // "sq"
- /// Amharic
- Amharic = 0x6D61 // "am"
- /// Arabic
- Arabic = 0x7261 // "ar"
- /// Armenian
- Armenian = 0x7968 // "hy"
- /// Assamese
- Assamese = 0x7361 // "as"
- /// Azerbaijani
- Azerbaijani = 0x7A61 // "az"
- /// Bashkir
- Bashkir = 0x6162 // "ba"
- /// Basque
- Basque = 0x7565 // "eu"
- /// Belarusian
- Belarusian = 0x6562 // "be"
- /// Bengali
- Bengali = 0x6E62 // "bn"
- /// Bosnian
- Bosnian = 0x7362 // "bs"
- /// Breton
- Breton = 0x7262 // "br"
- /// Bulgarian
- Bulgarian = 0x6762 // "bg"
- /// Catalan
- Catalan = 0x6163 // "ca"
- /// Chinese
- Chinese = 0x687A // "zh"
- /// Croatian
- Croatian = 0x7268 // "hr"
- /// Czech
- Czech = 0x7363 // "cs"
- /// Danish
- Danish = 0x6164 // "da"
- /// Dutch
- Dutch = 0x6C6E // "nl"
- /// English
- English = 0x6E65 // "en"
- /// Estonian
- Estonian = 0x7465 // "et"
- /// Faroese
- Faroese = 0x6F66 // "fo"
- /// Finnish
- Finnish = 0x6966 // "fi"
- /// French
- French = 0x7266 // "fr"
- /// Galician
- Galician = 0x6C67 // "gl"
- /// Georgian
- Georgian = 0x616B // "ka"
- /// German
- German = 0x6564 // "de"
- /// Greek
- Greek = 0x6C65 // "el"
- /// Gujarati
- Gujarati = 0x7567 // "gu"
- /// Haitian Creole
- HaitianCreole = 0x7468 // "ht"
- /// Hausa
- Hausa = 0x6168 // "ha"
- /// Hawaiian
- Hawaiian = 0x776168 // "haw"
- /// Hebrew
- Hebrew = 0x7769 // "iw"
- /// Hindi
- Hindi = 0x6968 // "hi"
- /// Hungarian
- Hungarian = 0x7568 // "hu"
- /// Icelandic
- Icelandic = 0x7369 // "is"
- /// Indonesian
- Indonesian = 0x6469 // "id"
- /// Italian
- Italian = 0x7469 // "it"
- /// Japanese
- Japanese = 0x616A // "ja"
- /// Javanese
- Javanese = 0x776A // "jw"
- /// Kannada
- Kannada = 0x6E6B // "kn"
- /// Kazakh
- Kazakh = 0x6B6B // "kk"
- /// Khmer
- Khmer = 0x6D6B // "km"
- /// Korean
- Korean = 0x6F6B // "ko"
- /// Lao
- Lao = 0x6F6C // "lo"
- /// Latin
- Latin = 0x616C // "la"
- /// Latvian
- Latvian = 0x766C // "lv"
- /// Lingala
- Lingala = 0x6E6C // "ln"
- /// Lithuanian
- Lithuanian = 0x746C // "lt"
- /// Luxembourgish
- Luxembourgish = 0x626C // "lb"
- /// Macedonian
- Macedonian = 0x6B6D // "mk"
- /// Malagasy
- Malagasy = 0x676D // "mg"
- /// Malay
- Malay = 0x736D // "ms"
- /// Malayalam
- Malayalam = 0x6C6D // "ml"
- /// Maltese
- Maltese = 0x746D // "mt"
- /// Maori
- Maori = 0x696D // "mi"
- /// Marathi
- Marathi = 0x726D // "mr"
- /// Mongolian
- Mongolian = 0x6E6D // "mn"
- /// Myanmar
- Myanmar = 0x796D // "my"
- /// Nepali
- Nepali = 0x656E // "ne"
- /// Norwegian
- Norwegian = 0x6F6E // "no"
- /// Nynorsk
- Nynorsk = 0x6E6E // "nn"
- /// Occitan
- Occitan = 0x636F // "oc"
- /// Pashto
- Pashto = 0x7370 // "ps"
- /// Persian
- Persian = 0x6166 // "fa"
- /// Polish
- Polish = 0x6C70 // "pl"
- /// Portuguese
- Portuguese = 0x7470 // "pt"
- /// Punjabi
- Punjabi = 0x6170 // "pa"
- /// Romanian
- Romanian = 0x6F72 // "ro"
- /// Russian
- Russian = 0x7572 // "ru"
- /// Sanskrit
- Sanskrit = 0x6173 // "sa"
- /// Serbian
- Serbian = 0x7273 // "sr"
- /// Shona
- Shona = 0x6E73 // "sn"
- /// Sindhi
- Sindhi = 0x6473 // "sd"
- /// Sinhala
- Sinhala = 0x6973 // "si"
- /// Slovak
- Slovak = 0x6B73 // "sk"
- /// Slovenian
- Slovenian = 0x6C73 // "sl"
- /// Somali
- Somali = 0x6F73 // "so"
- /// Spanish
- Spanish = 0x7365 // "es"
- /// Sundanese
- Sundanese = 0x7573 // "su"
- /// Swahili
- Swahili = 0x7773 // "sw"
- /// Swedish
- Swedish = 0x7673 // "sv"
- /// Tagalog
- Tagalog = 0x6C74 // "tl"
- /// Tajik
- Tajik = 0x6774 // "tg"
- /// Tamil
- Tamil = 0x6174 // "ta"
- /// Tatar
- Tatar = 0x7474 // "tt"
- /// Telugu
- Telugu = 0x6574 // "te"
- /// Thai
- Thai = 0x6874 // "th"
- /// Tibetan
- Tibetan = 0x6F62 // "bo"
- /// Turkish
- Turkish = 0x7274 // "tr"
- /// Turkmen
- Turkmen = 0x6B74 // "tk"
- /// Ukrainian
- Ukrainian = 0x6B75 // "uk"
- /// Urdu
- Urdu = 0x7275 // "ur"
- /// Uzbek
- Uzbek = 0x7A75 // "uz"
- /// Vietnamese
- Vietnamese = 0x6976 // "vi"
- /// Welsh
- Welsh = 0x7963 // "cy"
- /// Yiddish
- Yiddish = 0x6979 // "yi"
- /// Yoruba
- Yoruba = 0x6F79 // "yo"
-)