Compare commits

..

7 Commits

Author SHA1 Message Date
Łukasz Kwiecień
6289e7f401 Use stdin instead of a server 2024-04-14 15:59:29 +02:00
Łukasz Kwiecień
2d44f72466
Update release.yml 2024-04-03 23:09:07 +02:00
Lukasz
8275ad584b Fix ms false positive again 2024-04-03 22:34:14 +02:00
Lukasz
1546e3f145 Move language map to json file in order to not trigger microsoft antivirus 2024-03-19 18:35:24 +01:00
Łukasz Kwiecień
fc48cca110
Feat/accept cmd args (#2)
* make working dir as executable dir

* Add support for cli args
2024-03-14 21:06:34 +01:00
Lukasz
c4daf3ec71 Transcribe using file buffer straight from form 2024-03-11 20:55:47 +01:00
Łukasz Kwiecień
dde206facd
Merge pull request #1 from lukaskwkw/chore/update-readme-build
Add pre build step
2024-03-11 11:18:20 +01:00
14 changed files with 219 additions and 625 deletions

View File

@ -1,10 +1,9 @@
name: Build and Release name: goreleaser
on: on:
workflow_dispatch:
push: push:
tags: tags:
- "v[0-9]+.[0-9]+.[0-9]+" - "*"
permissions: permissions:
contents: write contents: write

View File

@ -9,20 +9,15 @@ This API server enables audio transcription using the OpenAI Whisper models.
- Download `.exe` from [Releases](https://github.com/xzeldon/whisper-api-server/releases/latest) - Download `.exe` from [Releases](https://github.com/xzeldon/whisper-api-server/releases/latest)
- Just run it! - Just run it!
# Build from source (Windows) # Build from source
## Prerequisites
- GCC Compiler Installed in your PATH (You can get it from [here](https://github.com/niXman/mingw-builds-binaries))
- Install Go (https://go.dev/doc/install)
Before build make sure that **CGO_ENABLED** env is set to **1** Before build make sure that **CGO_ENABLED** env is set to **1**
``` ```
$env:CGO_ENABLED = "1" set CGO_ENABLED 1
``` ```
you can check this with this command or preferable set it parament. Then check it via
``` ```
go env go env
@ -42,7 +37,7 @@ go build -ldflags "-s -w" -o server.exe main.go
Make a request to the server using the following command: Make a request to the server using the following command:
```sh ```sh
curl http://localhost:3000/v1/audio/transcriptions \ curl http://localhost:3031/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \ -H "Content-Type: multipart/form-data" \
-F file="@/path/to/file/audio.mp3" \ -F file="@/path/to/file/audio.mp3" \
``` ```
@ -51,7 +46,7 @@ Receive a response in JSON format:
```json ```json
{ {
"text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that." "text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that."
} }
``` ```
@ -61,7 +56,7 @@ Receive a response in JSON format:
2. Open the plugin's settings. 2. Open the plugin's settings.
3. Set the following values: 3. Set the following values:
- API KEY: `sk-1` - API KEY: `sk-1`
- API URL: `http://localhost:3000/v1/audio/transcriptions` - API URL: `http://localhost:3031/v1/audio/transcriptions`
- Model: `whisper-1` - Model: `whisper-1`
# Roadmap # Roadmap
@ -70,8 +65,9 @@ Receive a response in JSON format:
- [x] Implement automatic `Whisper.dll` downloading from [Guthub releases](https://github.com/Const-me/Whisper/releases) - [x] Implement automatic `Whisper.dll` downloading from [Guthub releases](https://github.com/Const-me/Whisper/releases)
- [x] Provide prebuilt binaries for Windows - [x] Provide prebuilt binaries for Windows
- [ ] Include instructions for running on Linux with Wine (likely possible). - [ ] Include instructions for running on Linux with Wine (likely possible).
- [x] Use flags to override the model path - [ ] Use flags to override the model path
- [x] Use flags to override the port - [ ] Use flags to override the model type (when downloading the model)
- [ ] Use flags to override the port
# Credits # Credits

5
go.mod
View File

@ -10,20 +10,17 @@ require (
require ( require (
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/rivo/uniseg v0.2.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/term v0.10.0 // indirect golang.org/x/term v0.10.0 // indirect
golang.org/x/time v0.3.0 // indirect golang.org/x/time v0.3.0 // indirect
) )
require ( require (
github.com/labstack/gommon v0.4.0 // indirect github.com/labstack/gommon v0.4.0
github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-isatty v0.0.19 // indirect
github.com/spf13/cobra v1.8.1
github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/fasttemplate v1.2.2 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect
golang.org/x/crypto v0.11.0 // indirect golang.org/x/crypto v0.11.0 // indirect

8
go.sum
View File

@ -1,11 +1,8 @@
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/labstack/echo/v4 v4.11.1 h1:dEpLU2FLg4UVmvCGPuk/APjlH6GDpbEPti61srUUUs4= github.com/labstack/echo/v4 v4.11.1 h1:dEpLU2FLg4UVmvCGPuk/APjlH6GDpbEPti61srUUUs4=
github.com/labstack/echo/v4 v4.11.1/go.mod h1:YuYRTSM3CHs2ybfrL8Px48bO6BAnYIN4l8wSTMP6BDQ= github.com/labstack/echo/v4 v4.11.1/go.mod h1:YuYRTSM3CHs2ybfrL8Px48bO6BAnYIN4l8wSTMP6BDQ=
@ -27,13 +24,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE= github.com/schollz/progressbar/v3 v3.13.1 h1:o8rySDYiQ59Mwzy2FELeHY5ZARXZTVJC7iHD6PEFUiE=
github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ= github.com/schollz/progressbar/v3 v3.13.1/go.mod h1:xvrbki8kfT1fzWzBT/UZd9L6GA+jdL7HAgq2RFnO6fQ=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=

View File

@ -50,6 +50,34 @@ func TranscribeFromFile(c echo.Context, whisperState *WhisperState) error {
return c.JSON(http.StatusOK, response) return c.JSON(http.StatusOK, response)
} }
func TranscribeBytes(buffer []byte, whisperState *WhisperState) string {
whisperState.mutex.Lock()
defer whisperState.mutex.Unlock()
bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true)
if err != nil {
println("Error loading audio file data: ", err)
// return err
}
err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial)
if err != nil {
println("Error processing audio: ", err)
// return err
}
result, err := getResult(whisperState.context)
if err != nil {
println("Error getting result: ", err)
// return err
}
trimed := strings.TrimLeft(result, " ")
return trimed
}
func Transcribe(c echo.Context, whisperState *WhisperState) error { func Transcribe(c echo.Context, whisperState *WhisperState) error {
// Get the file header // Get the file header
fileHeader, err := c.FormFile("file") fileHeader, err := c.FormFile("file")

View File

@ -1,105 +0,0 @@
package resources
import (
_ "embed"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"github.com/spf13/cobra"
)
//go:embed languageMap.json
var languageMapData []byte // Embedded language map file as a byte slice
// Arguments holds the parsed CLI arguments
type Arguments struct {
Language string
ModelPath string
Port int
}
// ParsedArguments holds the processed arguments
type ParsedArguments struct {
Language int32
ModelPath string
Port int
}
// LanguageMap represents the mapping of languages to their hex codes
type LanguageMap map[string]string
func processLanguageAndCode(language string) (int32, error) {
var languageMap LanguageMap
err := json.Unmarshal(languageMapData, &languageMap)
if err != nil {
return 0x6E65, fmt.Errorf("error parsing language map: %w", err)
}
hexCode, ok := languageMap[strings.ToLower(language)]
if !ok {
return 0x6E65, fmt.Errorf("unsupported language")
}
fmt.Printf("Hex Code Found: %s\n", hexCode)
languageCode, err := strconv.ParseInt(hexCode, 0, 32)
if err != nil {
return 0x6E65, fmt.Errorf("error converting hex code: %w", err)
}
return int32(languageCode), nil
}
func ApplyExitOnHelp(c *cobra.Command, exitCode int) {
helpFunc := c.HelpFunc()
c.SetHelpFunc(func(c *cobra.Command, s []string) {
helpFunc(c, s)
os.Exit(exitCode)
})
}
func ParseFlags() (*ParsedArguments, error) {
args := &Arguments{}
var parsedArgs *ParsedArguments
cobra.MousetrapHelpText = ""
rootCmd := &cobra.Command{
Use: "whisper",
Short: "Audio transcription using the OpenAI Whisper models",
RunE: func(cmd *cobra.Command, _ []string) error {
// Process language code with fallback
languageCode, err := processLanguageAndCode(args.Language)
if err != nil {
fmt.Println("Error setting language, defaulting to English")
// Default to English
languageCode = 0x6E65
}
parsedArgs = &ParsedArguments{
Language: languageCode,
ModelPath: args.ModelPath,
Port: args.Port,
}
return nil
},
}
rootCmd.Flags().StringVarP(&args.Language, "language", "l", "", "Language to be processed")
rootCmd.Flags().StringVarP(&args.ModelPath, "modelPath", "m", "ggml-medium.bin", "Path to the model file (required)")
rootCmd.Flags().IntVarP(&args.Port, "port", "p", 3000, "Port to start the server on")
ApplyExitOnHelp(rootCmd, 0)
err := rootCmd.Execute()
if err != nil {
return nil, err
}
return parsedArgs, nil
}

View File

@ -0,0 +1,110 @@
package resources
import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
)
// Arguments defines the structure to hold parsed arguments
type Arguments struct {
Language string
ModelPath string
Port int
}
type ParsedArguments struct {
Language int32
ModelPath string
Port int
// Buffer []byte
}
type LanguageMap map[string]string
func processLanguageAndCode(args *Arguments) (int32, error) {
// Read the language map from JSON file
jsonFile, err := os.Open("languageMap.json")
if err != nil {
return 0x6E65, fmt.Errorf("error opening language map: %w", err) // Wrap error for context
}
defer jsonFile.Close()
byteData, err := io.ReadAll(jsonFile)
if err != nil {
return 0x6E65, fmt.Errorf("error reading language map: %w", err)
}
var languageMap LanguageMap
err = json.Unmarshal(byteData, &languageMap)
if err != nil {
return 0x6E65, fmt.Errorf("error parsing language map: %w", err)
}
hexCode, ok := languageMap[strings.ToLower(args.Language)]
if !ok {
return 0x6E65, fmt.Errorf("unsupported language: %s", args.Language)
}
languageCode, err := strconv.ParseInt(hexCode, 0, 32)
if err != nil {
return 0x6E65, fmt.Errorf("error converting hex code: %w", err)
}
return int32(languageCode), nil
}
// ParseFlags parses command line arguments and returns an Arguments struct
func ParseFlags() (*ParsedArguments, error) {
args := &Arguments{}
flag.StringVar(&args.Language, "l", "", "Language to be processed")
flag.StringVar(&args.Language, "language", "", "Language to be processed") // Optional: Redundant to demonstrate
flag.StringVar(&args.ModelPath, "m", "", "Path to the model file (required)")
flag.StringVar(&args.ModelPath, "modelPath", "", "Path to the model file (required)") // Optional: Redundant
// bufferArg := flag.String("buffer", "", "Base64-encoded buffer data")
flag.IntVar(&args.Port, "p", 3031, "Port to start the server on")
flag.IntVar(&args.Port, "port", 3031, "Port to start the server on") // Optional: Redundant
flag.Usage = func() {
fmt.Println("Usage: your_program [OPTIONS]")
fmt.Println("Options:")
flag.PrintDefaults() // Print default values for all flags
}
// Parsing flags
flag.Parse()
// if *bufferArg != "" {
// decodedBuffer, err := base64.StdEncoding.DecodeString(*bufferArg)
// if err != nil {
// fmt.Println("Error decoding buffer:", err)
// return nil, err
// }
// // Process the decoded buffer (e.g., print its contents)
// fmt.Println("Decoded Buffer:", string(decodedBuffer))
// }
args.Language = strings.ToLower(args.Language)
if args.ModelPath == "" {
return nil, fmt.Errorf("modelPath argument is required")
}
languageCode, err := processLanguageAndCode(args)
if err != nil {
fmt.Println("Error setting language, defaulting to English:", err)
// Use default language code directly as the result here
}
return &ParsedArguments{
Language: languageCode,
ModelPath: args.ModelPath,
Port: args.Port,
// Buffer: []byte(*bufferArg),
}, nil
}

View File

@ -1,143 +0,0 @@
package resources
import (
"archive/zip"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"github.com/schollz/progressbar/v3"
)
func GetModel(modelType string) (string, error) {
fileURL := fmt.Sprintf("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/%s", modelType)
filePath := modelType
isModelFileExists := IsFileExists(filePath)
if !isModelFileExists {
fmt.Println("Model not found.")
err := DownloadFile(fileURL, filePath)
if err != nil {
return "", err
}
}
absPath, err := filepath.Abs(filePath)
if err != nil {
return "", err
}
fmt.Printf("Model found: %s\n", absPath)
return filePath, nil
}
func DownloadFile(url string, filepath string) error {
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
fileSize := resp.ContentLength
bar := progressbar.DefaultBytes(
fileSize,
"Downloading",
)
writer := io.MultiWriter(out, bar)
_, err = io.Copy(writer, resp.Body)
if err != nil {
return err
}
return nil
}
func GetWhisperDll(version string) (string, error) {
fileUrl := fmt.Sprintf("https://github.com/Const-me/Whisper/releases/download/%s/Library.zip", version)
fileToExtract := "Binary/Whisper.dll"
isWhisperDllExists := IsFileExists("Whisper.dll")
if !isWhisperDllExists {
fmt.Println("Whisper DLL not found.")
archivePath, err := os.CreateTemp("", "WhisperLibrary-*.zip")
if err != nil {
return "", err
}
defer archivePath.Close()
err = DownloadFile(fileUrl, archivePath.Name())
if err != nil {
return "", err
}
err = extractFile(archivePath.Name(), fileToExtract)
if err != nil {
return "", err
}
}
absPath, err := filepath.Abs("Whisper.dll")
if err != nil {
return "", err
}
fmt.Printf("Library found: %s\n", absPath)
return "Whisper.dll", nil
}
func extractFile(archivePath string, fileToExtract string) error {
reader, err := zip.OpenReader(archivePath)
if err != nil {
return err
}
defer reader.Close()
for _, file := range reader.File {
if file.Name == fileToExtract {
targetPath := filepath.Base(fileToExtract)
writer, err := os.Create(targetPath)
if err != nil {
return err
}
defer writer.Close()
src, err := file.Open()
if err != nil {
return err
}
defer src.Close()
_, err = io.Copy(writer, src)
if err != nil {
return err
}
return nil
}
}
return fmt.Errorf("File not found in the archive")
}
func IsFileExists(filename string) bool {
_, err := os.Stat(filename)
if err != nil {
if os.IsNotExist(err) {
return false
}
}
return true
}

View File

@ -1,76 +0,0 @@
package resources
import (
"bufio"
"fmt"
"os"
"path/filepath"
"strings"
)
// PromptUser prompts the user with a question and returns true if they agree
func PromptUser(question string) bool {
fmt.Printf("%s (y/n): ", question)
reader := bufio.NewReader(os.Stdin)
response, err := reader.ReadString('\n')
if err != nil {
fmt.Println("Error reading input:", err)
return false
}
response = strings.TrimSpace(strings.ToLower(response))
return response == "y" || response == "yes"
}
// HandleWhisperDll checks if Whisper.dll exists or prompts the user to download it
func HandleWhisperDll(version string) (string, error) {
if IsFileExists("Whisper.dll") {
absPath, err := filepath.Abs("Whisper.dll")
if err != nil {
return "", err
}
fmt.Printf("Library found: %s\n", absPath)
return "Whisper.dll", nil
}
fmt.Println("Whisper DLL not found.")
if PromptUser("Do you want to download Whisper.dll automatically?") {
path, err := GetWhisperDll(version)
if err != nil {
return "", fmt.Errorf("failed to download Whisper.dll: %w", err)
}
return path, nil
}
fmt.Println("To use Whisper, download the DLL manually:")
fmt.Printf("URL: https://github.com/Const-me/Whisper/releases/download/%s/Library.zip\n", version)
fmt.Println("Extract 'Binary/Whisper.dll' from the archive and place it in the executable's directory.")
fmt.Println("You can manually specify path to .dll file using cli arguments, use --help to print available cli flags")
return "", fmt.Errorf("whisper.dll not found and user chose not to download")
}
// HandleDefaultModel checks if the default model exists or prompts the user to download it
func HandleDefaultModel(modelType string) (string, error) {
if IsFileExists(modelType) {
absPath, err := filepath.Abs(modelType)
if err != nil {
return "", err
}
fmt.Printf("Model found: %s\n", absPath)
return modelType, nil
}
fmt.Println("Default model not found.")
if PromptUser("Do you want to download the default model (ggml-medium.bin) automatically?") {
path, err := GetModel(modelType)
if err != nil {
return "", fmt.Errorf("failed to download the default model: %w", err)
}
return path, nil
}
fmt.Println("To use Whisper, download the model manually:")
fmt.Println("URL: https://huggingface.co/ggerganov/whisper.cpp/tree/main")
fmt.Println("Place the model file in the executable's directory or specify its path using cli arguments.")
fmt.Println("You can manually specify path to model file using cli arguments, use --help to print available cli flags")
return "", fmt.Errorf("default model not found and user chose not to download")
}

87
main.go
View File

@ -1,31 +1,33 @@
package main package main
import ( import (
"bufio"
"encoding/base64"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
"github.com/xzeldon/whisper-api-server/internal/api" "github.com/xzeldon/whisper-api-server/internal/api"
"github.com/xzeldon/whisper-api-server/internal/resources" "github.com/xzeldon/whisper-api-server/internal/resources"
) )
const ( // begin delimiter const
defaultModelType = "ggml-medium.bin" const beginDelimiter = "[begin]"
defaultWhisperVersion = "1.12.0" const endDelimiter = "[end]"
)
func changeWorkingDirectory(e *echo.Echo) { func change_working_directory() {
exePath, err := os.Executable() exePath, errs := os.Executable()
if err != nil { if errs != nil {
e.Logger.Error("Error getting executable path: ", err) println("Error getting executable path")
return return
} }
exeDir := filepath.Dir(exePath) exeDir := filepath.Dir(exePath)
if err := os.Chdir(exeDir); err != nil {
e.Logger.Error("Error changing working directory: ", err) // Change the working directory to the executable directory
errs = os.Chdir(exeDir)
if errs != nil {
println("Error changing working directory")
return return
} }
@ -34,38 +36,47 @@ func changeWorkingDirectory(e *echo.Echo) {
} }
func main() { func main() {
e := echo.New()
e.HideBanner = true
changeWorkingDirectory(e)
args, err := resources.ParseFlags() change_working_directory()
if err != nil {
e.Logger.Error("Error parsing flags: ", err) args, errParsing := resources.ParseFlags()
if errParsing != nil {
println("Error parsing flags: ", errParsing)
return return
} }
if _, err := resources.HandleWhisperDll(defaultWhisperVersion); err != nil {
e.Logger.Error("Error handling Whisper.dll: ", err)
return
}
if _, err := resources.HandleDefaultModel(defaultModelType); err != nil {
e.Logger.Error("Error handling model file: ", err)
return
}
e.Use(middleware.CORS())
whisperState, err := api.InitializeWhisperState(args.ModelPath, args.Language) whisperState, err := api.InitializeWhisperState(args.ModelPath, args.Language)
if err != nil { if err != nil {
e.Logger.Error("Error initializing Whisper state: ", err) println("Error initializing whisper state: ", err)
return }
const maxCapacity = 2048 * 10240
scanner := bufio.NewScanner(os.Stdin)
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
println("waiting_for_input")
if scanner.Scan() {
base64Data := scanner.Text()
decodedBuffer, err := base64.StdEncoding.DecodeString(base64Data)
if err != nil {
fmt.Println("Error decoding buffer:", err)
return
}
result := api.TranscribeBytes(decodedBuffer, whisperState)
println(beginDelimiter + result + endDelimiter)
println("finished")
// Process the decodedBuffer (e.g., print its length)
fmt.Println("Received buffer size:", len(decodedBuffer))
// Send a response back to Node.js (optional)
fmt.Fprintln(os.Stdout, "Buffer received successfully!")
} else if err := scanner.Err(); err != nil {
fmt.Println("Error reading from stdin:", err)
} }
e.POST("/v1/audio/transcriptions", func(c echo.Context) error { // e.Logger.Fatal(e.Start(fmt.Sprintf("127.0.0.1:%d", args.Port)))
return api.TranscribeFromFile(c, whisperState)
})
address := fmt.Sprintf("127.0.0.1:%d", args.Port)
e.Logger.Fatal(e.Start(address))
} }

View File

@ -93,7 +93,7 @@ func (this *FullParams) SetLanguage(language int32) {
return return
} }
this.cStruct.Language = eLanguage(language) this.cStruct.Language = language
} }
/*using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;*/ /*using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;*/
@ -178,7 +178,7 @@ type _FullParams struct {
offset_ms int32 offset_ms int32
duration_ms int32 duration_ms int32
Flags eFullParamsFlags Flags eFullParamsFlags
Language eLanguage Language int32
thold_pt float32 thold_pt float32
thold_ptsum float32 thold_ptsum float32

View File

@ -1,207 +0,0 @@
package whisper
// https://github.com/Const-me/Whisper/blob/master/WhisperNet/API/eLanguage.cs
type eLanguage int32
const (
Auto eLanguage = -1 // "af"
Afrikaans = 0x6661 // "af"
/// <summary>Albanian</summary>
Albanian = 0x7173 // "sq"
/// <summary>Amharic</summary>
Amharic = 0x6D61 // "am"
/// <summary>Arabic</summary>
Arabic = 0x7261 // "ar"
/// <summary>Armenian</summary>
Armenian = 0x7968 // "hy"
/// <summary>Assamese</summary>
Assamese = 0x7361 // "as"
/// <summary>Azerbaijani</summary>
Azerbaijani = 0x7A61 // "az"
/// <summary>Bashkir</summary>
Bashkir = 0x6162 // "ba"
/// <summary>Basque</summary>
Basque = 0x7565 // "eu"
/// <summary>Belarusian</summary>
Belarusian = 0x6562 // "be"
/// <summary>Bengali</summary>
Bengali = 0x6E62 // "bn"
/// <summary>Bosnian</summary>
Bosnian = 0x7362 // "bs"
/// <summary>Breton</summary>
Breton = 0x7262 // "br"
/// <summary>Bulgarian</summary>
Bulgarian = 0x6762 // "bg"
/// <summary>Catalan</summary>
Catalan = 0x6163 // "ca"
/// <summary>Chinese</summary>
Chinese = 0x687A // "zh"
/// <summary>Croatian</summary>
Croatian = 0x7268 // "hr"
/// <summary>Czech</summary>
Czech = 0x7363 // "cs"
/// <summary>Danish</summary>
Danish = 0x6164 // "da"
/// <summary>Dutch</summary>
Dutch = 0x6C6E // "nl"
/// <summary>English</summary>
English = 0x6E65 // "en"
/// <summary>Estonian</summary>
Estonian = 0x7465 // "et"
/// <summary>Faroese</summary>
Faroese = 0x6F66 // "fo"
/// <summary>Finnish</summary>
Finnish = 0x6966 // "fi"
/// <summary>French</summary>
French = 0x7266 // "fr"
/// <summary>Galician</summary>
Galician = 0x6C67 // "gl"
/// <summary>Georgian</summary>
Georgian = 0x616B // "ka"
/// <summary>German</summary>
German = 0x6564 // "de"
/// <summary>Greek</summary>
Greek = 0x6C65 // "el"
/// <summary>Gujarati</summary>
Gujarati = 0x7567 // "gu"
/// <summary>Haitian Creole</summary>
HaitianCreole = 0x7468 // "ht"
/// <summary>Hausa</summary>
Hausa = 0x6168 // "ha"
/// <summary>Hawaiian</summary>
Hawaiian = 0x776168 // "haw"
/// <summary>Hebrew</summary>
Hebrew = 0x7769 // "iw"
/// <summary>Hindi</summary>
Hindi = 0x6968 // "hi"
/// <summary>Hungarian</summary>
Hungarian = 0x7568 // "hu"
/// <summary>Icelandic</summary>
Icelandic = 0x7369 // "is"
/// <summary>Indonesian</summary>
Indonesian = 0x6469 // "id"
/// <summary>Italian</summary>
Italian = 0x7469 // "it"
/// <summary>Japanese</summary>
Japanese = 0x616A // "ja"
/// <summary>Javanese</summary>
Javanese = 0x776A // "jw"
/// <summary>Kannada</summary>
Kannada = 0x6E6B // "kn"
/// <summary>Kazakh</summary>
Kazakh = 0x6B6B // "kk"
/// <summary>Khmer</summary>
Khmer = 0x6D6B // "km"
/// <summary>Korean</summary>
Korean = 0x6F6B // "ko"
/// <summary>Lao</summary>
Lao = 0x6F6C // "lo"
/// <summary>Latin</summary>
Latin = 0x616C // "la"
/// <summary>Latvian</summary>
Latvian = 0x766C // "lv"
/// <summary>Lingala</summary>
Lingala = 0x6E6C // "ln"
/// <summary>Lithuanian</summary>
Lithuanian = 0x746C // "lt"
/// <summary>Luxembourgish</summary>
Luxembourgish = 0x626C // "lb"
/// <summary>Macedonian</summary>
Macedonian = 0x6B6D // "mk"
/// <summary>Malagasy</summary>
Malagasy = 0x676D // "mg"
/// <summary>Malay</summary>
Malay = 0x736D // "ms"
/// <summary>Malayalam</summary>
Malayalam = 0x6C6D // "ml"
/// <summary>Maltese</summary>
Maltese = 0x746D // "mt"
/// <summary>Maori</summary>
Maori = 0x696D // "mi"
/// <summary>Marathi</summary>
Marathi = 0x726D // "mr"
/// <summary>Mongolian</summary>
Mongolian = 0x6E6D // "mn"
/// <summary>Myanmar</summary>
Myanmar = 0x796D // "my"
/// <summary>Nepali</summary>
Nepali = 0x656E // "ne"
/// <summary>Norwegian</summary>
Norwegian = 0x6F6E // "no"
/// <summary>Nynorsk</summary>
Nynorsk = 0x6E6E // "nn"
/// <summary>Occitan</summary>
Occitan = 0x636F // "oc"
/// <summary>Pashto</summary>
Pashto = 0x7370 // "ps"
/// <summary>Persian</summary>
Persian = 0x6166 // "fa"
/// <summary>Polish</summary>
Polish = 0x6C70 // "pl"
/// <summary>Portuguese</summary>
Portuguese = 0x7470 // "pt"
/// <summary>Punjabi</summary>
Punjabi = 0x6170 // "pa"
/// <summary>Romanian</summary>
Romanian = 0x6F72 // "ro"
/// <summary>Russian</summary>
Russian = 0x7572 // "ru"
/// <summary>Sanskrit</summary>
Sanskrit = 0x6173 // "sa"
/// <summary>Serbian</summary>
Serbian = 0x7273 // "sr"
/// <summary>Shona</summary>
Shona = 0x6E73 // "sn"
/// <summary>Sindhi</summary>
Sindhi = 0x6473 // "sd"
/// <summary>Sinhala</summary>
Sinhala = 0x6973 // "si"
/// <summary>Slovak</summary>
Slovak = 0x6B73 // "sk"
/// <summary>Slovenian</summary>
Slovenian = 0x6C73 // "sl"
/// <summary>Somali</summary>
Somali = 0x6F73 // "so"
/// <summary>Spanish</summary>
Spanish = 0x7365 // "es"
/// <summary>Sundanese</summary>
Sundanese = 0x7573 // "su"
/// <summary>Swahili</summary>
Swahili = 0x7773 // "sw"
/// <summary>Swedish</summary>
Swedish = 0x7673 // "sv"
/// <summary>Tagalog</summary>
Tagalog = 0x6C74 // "tl"
/// <summary>Tajik</summary>
Tajik = 0x6774 // "tg"
/// <summary>Tamil</summary>
Tamil = 0x6174 // "ta"
/// <summary>Tatar</summary>
Tatar = 0x7474 // "tt"
/// <summary>Telugu</summary>
Telugu = 0x6574 // "te"
/// <summary>Thai</summary>
Thai = 0x6874 // "th"
/// <summary>Tibetan</summary>
Tibetan = 0x6F62 // "bo"
/// <summary>Turkish</summary>
Turkish = 0x7274 // "tr"
/// <summary>Turkmen</summary>
Turkmen = 0x6B74 // "tk"
/// <summary>Ukrainian</summary>
Ukrainian = 0x6B75 // "uk"
/// <summary>Urdu</summary>
Urdu = 0x7275 // "ur"
/// <summary>Uzbek</summary>
Uzbek = 0x7A75 // "uz"
/// <summary>Vietnamese</summary>
Vietnamese = 0x6976 // "vi"
/// <summary>Welsh</summary>
Welsh = 0x7963 // "cy"
/// <summary>Yiddish</summary>
Yiddish = 0x6979 // "yi"
/// <summary>Yoruba</summary>
Yoruba = 0x6F79 // "yo"
)

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD // Use of this source code is governed by a BSD
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// Adapted mainly from github.com/gonutz/w32 // Adapted mainly from github.com/gonutz/w32
//go:build windows //go:build windows
// +build windows // +build windows
@ -9,7 +10,7 @@ package whisper
import ( import (
"errors" "errors"
"fmt" "syscall"
"unsafe" "unsafe"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
@ -50,46 +51,34 @@ func (fi VS_FIXEDFILEINFO) FileVersion() uint64 {
return uint64(fi.FileVersionMS)<<32 | uint64(fi.FileVersionLS) return uint64(fi.FileVersionMS)<<32 | uint64(fi.FileVersionLS)
} }
func GetFileVersionInfoSize(path string) (uint32, error) { func GetFileVersionInfoSize(path string) uint32 {
pathPtr, err := windows.UTF16PtrFromString(path)
if err != nil {
return 0, err
}
ret, _, _ := getFileVersionInfoSize.Call( ret, _, _ := getFileVersionInfoSize.Call(
uintptr(unsafe.Pointer(pathPtr)), uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))),
0, 0,
) )
return uint32(ret), nil return uint32(ret)
} }
func GetFileVersionInfo(path string, data []byte) (bool, error) { func GetFileVersionInfo(path string, data []byte) bool {
pathPtr, err := windows.UTF16PtrFromString(path)
if err != nil {
return false, err
}
ret, _, _ := getFileVersionInfo.Call( ret, _, _ := getFileVersionInfo.Call(
uintptr(unsafe.Pointer(pathPtr)), uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(path))),
0, 0,
uintptr(len(data)), uintptr(len(data)),
uintptr(unsafe.Pointer(&data[0])), uintptr(unsafe.Pointer(&data[0])),
) )
return ret != 0, nil return ret != 0
} }
// VerQueryValueRoot calls VerQueryValue // VerQueryValueRoot calls VerQueryValue
// (https://msdn.microsoft.com/en-us/library/windows/desktop/ms647464(v=vs.85).aspx) // (https://msdn.microsoft.com/en-us/library/windows/desktop/ms647464(v=vs.85).aspx)
// with \ (root) to retrieve the VS_FIXEDFILEINFO. // with `\` (root) to retieve the VS_FIXEDFILEINFO.
func VerQueryValueRoot(block []byte) (VS_FIXEDFILEINFO, error) { func VerQueryValueRoot(block []byte) (VS_FIXEDFILEINFO, error) {
var offset uintptr var offset uintptr
var length uint var length uint
blockStart := unsafe.Pointer(&block[0]) blockStart := unsafe.Pointer(&block[0])
rootPathPtr, err := windows.UTF16PtrFromString(`\`)
if err != nil {
return VS_FIXEDFILEINFO{}, err
}
ret, _, _ := verQueryValue.Call( ret, _, _ := verQueryValue.Call(
uintptr(blockStart), uintptr(blockStart),
uintptr(unsafe.Pointer(rootPathPtr)), uintptr(unsafe.Pointer(syscall.StringToUTF16Ptr(`\`))),
uintptr(unsafe.Pointer(&offset)), uintptr(unsafe.Pointer(&offset)),
uintptr(unsafe.Pointer(&length)), uintptr(unsafe.Pointer(&length)),
) )
@ -108,24 +97,27 @@ func VerQueryValueRoot(block []byte) (VS_FIXEDFILEINFO, error) {
func GetFileVersion(path string) (WinVersion, error) { func GetFileVersion(path string) (WinVersion, error) {
var result WinVersion var result WinVersion
size, err := GetFileVersionInfoSize(path) size := GetFileVersionInfoSize(path)
fmt.Println(path) if size <= 0 {
if err != nil || size <= 0 {
return result, errors.New("GetFileVersionInfoSize failed") return result, errors.New("GetFileVersionInfoSize failed")
} }
info := make([]byte, size) info := make([]byte, size)
ok, err := GetFileVersionInfo(path, info) ok := GetFileVersionInfo(path, info)
if err != nil || !ok { if !ok {
return result, errors.New("GetFileVersionInfo failed") return result, errors.New("GetFileVersionInfo failed")
} }
fixed, err := VerQueryValueRoot(info) fixed, err := VerQueryValueRoot(info)
if err != nil { if err != nil {
return result, err return result, err
} }
version := fixed.FileVersion() version := fixed.FileVersion()
result.Major = uint32(version & 0xFFFF000000000000 >> 48) result.Major = uint32(version & 0xFFFF000000000000 >> 48)
result.Minor = uint32(version & 0x0000FFFF00000000 >> 32) result.Minor = uint32(version & 0x0000FFFF00000000 >> 32)
result.Patch = uint32(version & 0x00000000FFFF0000 >> 16) result.Patch = uint32(version & 0x00000000FFFF0000 >> 16)
result.Build = uint32(version & 0x000000000000FFFF) result.Build = uint32(version & 0x000000000000FFFF)
return result, nil return result, nil
} }