mirror of
https://github.com/xzeldon/whisper-api-server.git
synced 2025-04-20 17:05:46 +03:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6289e7f401 | ||
![]() |
2d44f72466 | ||
![]() |
8275ad584b | ||
![]() |
1546e3f145 | ||
![]() |
fc48cca110 | ||
![]() |
c4daf3ec71 | ||
![]() |
dde206facd |
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@ -29,4 +29,4 @@ jobs:
|
|||||||
version: ${{ env.GITHUB_REF_NAME }}
|
version: ${{ env.GITHUB_REF_NAME }}
|
||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.PUBLISHER_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,5 +1,6 @@
|
|||||||
Whisper.dll
|
Whisper.dll
|
||||||
ggml-medium.bin
|
ggml-*
|
||||||
|
*.exe
|
||||||
|
|
||||||
whisper-api-server.exe
|
whisper-api-server.exe
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ go build -ldflags "-s -w" -o server.exe main.go
|
|||||||
Make a request to the server using the following command:
|
Make a request to the server using the following command:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
curl http://localhost:3000/v1/audio/transcriptions \
|
curl http://localhost:3031/v1/audio/transcriptions \
|
||||||
-H "Content-Type: multipart/form-data" \
|
-H "Content-Type: multipart/form-data" \
|
||||||
-F file="@/path/to/file/audio.mp3" \
|
-F file="@/path/to/file/audio.mp3" \
|
||||||
```
|
```
|
||||||
@ -56,7 +56,7 @@ Receive a response in JSON format:
|
|||||||
2. Open the plugin's settings.
|
2. Open the plugin's settings.
|
||||||
3. Set the following values:
|
3. Set the following values:
|
||||||
- API KEY: `sk-1`
|
- API KEY: `sk-1`
|
||||||
- API URL: `http://localhost:3000/v1/audio/transcriptions`
|
- API URL: `http://localhost:3031/v1/audio/transcriptions`
|
||||||
- Model: `whisper-1`
|
- Model: `whisper-1`
|
||||||
|
|
||||||
# Roadmap
|
# Roadmap
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -11,7 +12,7 @@ type TranscribeResponse struct {
|
|||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func Transcribe(c echo.Context, whisperState *WhisperState) error {
|
func TranscribeFromFile(c echo.Context, whisperState *WhisperState) error {
|
||||||
audioPath, err := saveFormFile("file", c)
|
audioPath, err := saveFormFile("file", c)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger().Errorf("Error reading file: %s", err)
|
c.Logger().Errorf("Error reading file: %s", err)
|
||||||
@ -26,6 +27,11 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error {
|
|||||||
|
|
||||||
err = whisperState.context.RunFull(whisperState.params, buffer)
|
err = whisperState.context.RunFull(whisperState.params, buffer)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error processing audio: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
result, err := getResult(whisperState.context)
|
result, err := getResult(whisperState.context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Logger().Error(err)
|
c.Logger().Error(err)
|
||||||
@ -43,3 +49,87 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error {
|
|||||||
|
|
||||||
return c.JSON(http.StatusOK, response)
|
return c.JSON(http.StatusOK, response)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TranscribeBytes(buffer []byte, whisperState *WhisperState) string {
|
||||||
|
whisperState.mutex.Lock()
|
||||||
|
defer whisperState.mutex.Unlock()
|
||||||
|
|
||||||
|
bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
println("Error loading audio file data: ", err)
|
||||||
|
// return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial)
|
||||||
|
if err != nil {
|
||||||
|
println("Error processing audio: ", err)
|
||||||
|
// return err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := getResult(whisperState.context)
|
||||||
|
if err != nil {
|
||||||
|
println("Error getting result: ", err)
|
||||||
|
// return err
|
||||||
|
}
|
||||||
|
|
||||||
|
trimed := strings.TrimLeft(result, " ")
|
||||||
|
return trimed
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func Transcribe(c echo.Context, whisperState *WhisperState) error {
|
||||||
|
// Get the file header
|
||||||
|
fileHeader, err := c.FormFile("file")
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error retrieving the file: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the file
|
||||||
|
file, err := fileHeader.Open()
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error opening the file: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
// Read the file into a buffer
|
||||||
|
buffer, err := io.ReadAll(file)
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error reading the file into buffer: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
whisperState.mutex.Lock()
|
||||||
|
defer whisperState.mutex.Unlock()
|
||||||
|
|
||||||
|
bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error loading audio file data: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial)
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Errorf("Error processing audio: %s", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := getResult(whisperState.context)
|
||||||
|
if err != nil {
|
||||||
|
c.Logger().Error(err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result) == 0 {
|
||||||
|
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "Internal server error"})
|
||||||
|
}
|
||||||
|
|
||||||
|
response := TranscribeResponse{
|
||||||
|
Text: strings.TrimLeft(result, " "),
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.JSON(http.StatusOK, response)
|
||||||
|
}
|
||||||
|
@ -15,7 +15,7 @@ type WhisperState struct {
|
|||||||
mutex sync.Mutex
|
mutex sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func InitializeWhisperState(modelPath string) (*WhisperState, error) {
|
func InitializeWhisperState(modelPath string, lang int32) (*WhisperState, error) {
|
||||||
lib, err := whisper.New(whisper.LlDebug, whisper.LfUseStandardError, nil)
|
lib, err := whisper.New(whisper.LlDebug, whisper.LfUseStandardError, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -41,6 +41,8 @@ func InitializeWhisperState(modelPath string) (*WhisperState, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
params.SetLanguage(lang)
|
||||||
|
|
||||||
fmt.Printf("Params CPU Threads : %d\n", params.CpuThreads())
|
fmt.Printf("Params CPU Threads : %d\n", params.CpuThreads())
|
||||||
|
|
||||||
return &WhisperState{
|
return &WhisperState{
|
||||||
|
110
internal/resources/cli_arguments.go
Normal file
110
internal/resources/cli_arguments.go
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
package resources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Arguments defines the structure to hold parsed arguments
|
||||||
|
type Arguments struct {
|
||||||
|
Language string
|
||||||
|
ModelPath string
|
||||||
|
Port int
|
||||||
|
}
|
||||||
|
type ParsedArguments struct {
|
||||||
|
Language int32
|
||||||
|
ModelPath string
|
||||||
|
Port int
|
||||||
|
// Buffer []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type LanguageMap map[string]string
|
||||||
|
|
||||||
|
func processLanguageAndCode(args *Arguments) (int32, error) {
|
||||||
|
// Read the language map from JSON file
|
||||||
|
jsonFile, err := os.Open("languageMap.json")
|
||||||
|
if err != nil {
|
||||||
|
return 0x6E65, fmt.Errorf("error opening language map: %w", err) // Wrap error for context
|
||||||
|
}
|
||||||
|
defer jsonFile.Close()
|
||||||
|
|
||||||
|
byteData, err := io.ReadAll(jsonFile)
|
||||||
|
if err != nil {
|
||||||
|
return 0x6E65, fmt.Errorf("error reading language map: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var languageMap LanguageMap
|
||||||
|
err = json.Unmarshal(byteData, &languageMap)
|
||||||
|
if err != nil {
|
||||||
|
return 0x6E65, fmt.Errorf("error parsing language map: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
hexCode, ok := languageMap[strings.ToLower(args.Language)]
|
||||||
|
if !ok {
|
||||||
|
return 0x6E65, fmt.Errorf("unsupported language: %s", args.Language)
|
||||||
|
}
|
||||||
|
|
||||||
|
languageCode, err := strconv.ParseInt(hexCode, 0, 32)
|
||||||
|
if err != nil {
|
||||||
|
return 0x6E65, fmt.Errorf("error converting hex code: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int32(languageCode), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseFlags parses command line arguments and returns an Arguments struct
|
||||||
|
func ParseFlags() (*ParsedArguments, error) {
|
||||||
|
args := &Arguments{}
|
||||||
|
|
||||||
|
flag.StringVar(&args.Language, "l", "", "Language to be processed")
|
||||||
|
flag.StringVar(&args.Language, "language", "", "Language to be processed") // Optional: Redundant to demonstrate
|
||||||
|
flag.StringVar(&args.ModelPath, "m", "", "Path to the model file (required)")
|
||||||
|
flag.StringVar(&args.ModelPath, "modelPath", "", "Path to the model file (required)") // Optional: Redundant
|
||||||
|
// bufferArg := flag.String("buffer", "", "Base64-encoded buffer data")
|
||||||
|
|
||||||
|
flag.IntVar(&args.Port, "p", 3031, "Port to start the server on")
|
||||||
|
flag.IntVar(&args.Port, "port", 3031, "Port to start the server on") // Optional: Redundant
|
||||||
|
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Println("Usage: your_program [OPTIONS]")
|
||||||
|
fmt.Println("Options:")
|
||||||
|
flag.PrintDefaults() // Print default values for all flags
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parsing flags
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// if *bufferArg != "" {
|
||||||
|
// decodedBuffer, err := base64.StdEncoding.DecodeString(*bufferArg)
|
||||||
|
// if err != nil {
|
||||||
|
// fmt.Println("Error decoding buffer:", err)
|
||||||
|
// return nil, err
|
||||||
|
// }
|
||||||
|
// // Process the decoded buffer (e.g., print its contents)
|
||||||
|
// fmt.Println("Decoded Buffer:", string(decodedBuffer))
|
||||||
|
// }
|
||||||
|
|
||||||
|
args.Language = strings.ToLower(args.Language)
|
||||||
|
|
||||||
|
if args.ModelPath == "" {
|
||||||
|
return nil, fmt.Errorf("modelPath argument is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
languageCode, err := processLanguageAndCode(args)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error setting language, defaulting to English:", err)
|
||||||
|
// Use default language code directly as the result here
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ParsedArguments{
|
||||||
|
Language: languageCode,
|
||||||
|
ModelPath: args.ModelPath,
|
||||||
|
Port: args.Port,
|
||||||
|
// Buffer: []byte(*bufferArg),
|
||||||
|
}, nil
|
||||||
|
}
|
@ -1,38 +0,0 @@
|
|||||||
package resources
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/schollz/progressbar/v3"
|
|
||||||
)
|
|
||||||
|
|
||||||
func DownloadFile(url string, filepath string) error {
|
|
||||||
out, err := os.Create(filepath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer out.Close()
|
|
||||||
|
|
||||||
resp, err := http.Get(url)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
fileSize := resp.ContentLength
|
|
||||||
bar := progressbar.DefaultBytes(
|
|
||||||
fileSize,
|
|
||||||
"Downloading",
|
|
||||||
)
|
|
||||||
|
|
||||||
writer := io.MultiWriter(out, bar)
|
|
||||||
|
|
||||||
_, err = io.Copy(writer, resp.Body)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
@ -1,29 +0,0 @@
|
|||||||
package resources
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"path/filepath"
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetModel(modelType string) (string, error) {
|
|
||||||
fileURL := fmt.Sprintf("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/%s", modelType)
|
|
||||||
filePath := modelType
|
|
||||||
|
|
||||||
isModelFileExists := IsFileExists(filePath)
|
|
||||||
|
|
||||||
if !isModelFileExists {
|
|
||||||
fmt.Println("Model not found.")
|
|
||||||
err := DownloadFile(fileURL, filePath)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
absPath, err := filepath.Abs(filePath)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("Model found: %s\n", absPath)
|
|
||||||
return filePath, nil
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package resources
|
|
||||||
|
|
||||||
import "os"
|
|
||||||
|
|
||||||
func IsFileExists(filename string) bool {
|
|
||||||
_, err := os.Stat(filename)
|
|
||||||
if err != nil {
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
@ -1,78 +0,0 @@
|
|||||||
package resources
|
|
||||||
|
|
||||||
import (
|
|
||||||
"archive/zip"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
)
|
|
||||||
|
|
||||||
func GetWhisperDll(version string) (string, error) {
|
|
||||||
fileUrl := fmt.Sprintf("https://github.com/Const-me/Whisper/releases/download/%s/Library.zip", version)
|
|
||||||
fileToExtract := "Binary/Whisper.dll"
|
|
||||||
|
|
||||||
isWhisperDllExists := IsFileExists("Whisper.dll")
|
|
||||||
|
|
||||||
if !isWhisperDllExists {
|
|
||||||
fmt.Println("Whisper DLL not found.")
|
|
||||||
archivePath, err := os.CreateTemp("", "WhisperLibrary-*.zip")
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer archivePath.Close()
|
|
||||||
|
|
||||||
err = DownloadFile(fileUrl, archivePath.Name())
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = extractFile(archivePath.Name(), fileToExtract)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
absPath, err := filepath.Abs("Whisper.dll")
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("Library found: %s\n", absPath)
|
|
||||||
return "Whisper.dll", nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func extractFile(archivePath string, fileToExtract string) error {
|
|
||||||
reader, err := zip.OpenReader(archivePath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer reader.Close()
|
|
||||||
|
|
||||||
for _, file := range reader.File {
|
|
||||||
if file.Name == fileToExtract {
|
|
||||||
targetPath := filepath.Base(fileToExtract)
|
|
||||||
|
|
||||||
writer, err := os.Create(targetPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer writer.Close()
|
|
||||||
|
|
||||||
src, err := file.Open()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer src.Close()
|
|
||||||
|
|
||||||
_, err = io.Copy(writer, src)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("File not found in the archive")
|
|
||||||
}
|
|
84
languageMap.json
Normal file
84
languageMap.json
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
{
|
||||||
|
"af": "0x6661",
|
||||||
|
"sq": "0x7173",
|
||||||
|
"am": "0x6D61",
|
||||||
|
"ar": "0x7261",
|
||||||
|
"hy": "0x7968",
|
||||||
|
"as": "0x7361",
|
||||||
|
"az": "0x7A61",
|
||||||
|
"ba": "0x6162",
|
||||||
|
"eu": "0x7565",
|
||||||
|
"be": "0x6562",
|
||||||
|
"bn": "0x6E62",
|
||||||
|
"bs": "0x7362",
|
||||||
|
"br": "0x7262",
|
||||||
|
"bg": "0x6762",
|
||||||
|
"ca": "0x6163",
|
||||||
|
"zh": "0x687A",
|
||||||
|
"hr": "0x7268",
|
||||||
|
"cs": "0x7363",
|
||||||
|
"da": "0x6164",
|
||||||
|
"nl": "0x6C6E",
|
||||||
|
"en": "0x6E65",
|
||||||
|
"et": "0x7465",
|
||||||
|
"fo": "0x6F66",
|
||||||
|
"fi": "0x6966",
|
||||||
|
"fr": "0x7266",
|
||||||
|
"gl": "0x6C67",
|
||||||
|
"ka": "0x616B",
|
||||||
|
"de": "0x7265",
|
||||||
|
"el": "0x6C61",
|
||||||
|
"gu": "0x7567",
|
||||||
|
"he": "0x6568",
|
||||||
|
"hi": "0x6968",
|
||||||
|
"hu": "0x7568",
|
||||||
|
"is": "0x7369",
|
||||||
|
"id": "0x6469",
|
||||||
|
"it": "0x7469",
|
||||||
|
"ja": "0x616A",
|
||||||
|
"kn": "0x6E6B",
|
||||||
|
"kk": "0x6B6B",
|
||||||
|
"km": "0x6D6B",
|
||||||
|
"ko": "0x6F6B",
|
||||||
|
"ky": "0x796B",
|
||||||
|
"lo": "0x6F6C",
|
||||||
|
"lv": "0x766C",
|
||||||
|
"lt": "0x746C",
|
||||||
|
"mk": "0x6B6D",
|
||||||
|
"ms": "0x736D",
|
||||||
|
"ml": "0x6C6D",
|
||||||
|
"mr": "0x726D",
|
||||||
|
"mn": "0x6E6D",
|
||||||
|
"ne": "0x6570",
|
||||||
|
"no": "0x6F6E",
|
||||||
|
"or": "0x726F",
|
||||||
|
"ps": "0x7368",
|
||||||
|
"fa": "0x6172",
|
||||||
|
"pl": "0x6C70",
|
||||||
|
"pt": "0x7470",
|
||||||
|
"pa": "0x6170",
|
||||||
|
"ro": "0x6F72",
|
||||||
|
"ru": "0x7572",
|
||||||
|
"sa": "0x6173",
|
||||||
|
"sr": "0x7273",
|
||||||
|
"sd": "0x6473",
|
||||||
|
"si": "0x6973",
|
||||||
|
"sk": "0x6B73",
|
||||||
|
"sl": "0x6C73",
|
||||||
|
"es": "0x6573",
|
||||||
|
"sw": "0x7773",
|
||||||
|
"sv": "0x6576",
|
||||||
|
"tg": "0x6769",
|
||||||
|
"ta": "0x6174",
|
||||||
|
"te": "0x6574",
|
||||||
|
"th": "0x6874",
|
||||||
|
"tr": "0x7274",
|
||||||
|
"uk": "0x6B75",
|
||||||
|
"ur": "0x7275",
|
||||||
|
"uz": "0x7A75",
|
||||||
|
"vi": "0x6976",
|
||||||
|
"cy": "0x7963",
|
||||||
|
"xh": "0x6877",
|
||||||
|
"yi": "0x6979",
|
||||||
|
"yo": "0x6F79"
|
||||||
|
}
|
97
main.go
97
main.go
@ -1,41 +1,82 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/labstack/echo/v4"
|
"bufio"
|
||||||
"github.com/labstack/echo/v4/middleware"
|
"encoding/base64"
|
||||||
"github.com/labstack/gommon/log"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/xzeldon/whisper-api-server/internal/api"
|
"github.com/xzeldon/whisper-api-server/internal/api"
|
||||||
"github.com/xzeldon/whisper-api-server/internal/resources"
|
"github.com/xzeldon/whisper-api-server/internal/resources"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
// begin delimiter const
|
||||||
e := echo.New()
|
const beginDelimiter = "[begin]"
|
||||||
e.HideBanner = true
|
const endDelimiter = "[end]"
|
||||||
|
|
||||||
e.Use(middleware.CORS())
|
func change_working_directory() {
|
||||||
|
exePath, errs := os.Executable()
|
||||||
if l, ok := e.Logger.(*log.Logger); ok {
|
if errs != nil {
|
||||||
l.SetHeader("${time_rfc3339} ${level}")
|
println("Error getting executable path")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := resources.GetWhisperDll("1.12.0")
|
exeDir := filepath.Dir(exePath)
|
||||||
if err != nil {
|
|
||||||
e.Logger.Error(err)
|
// Change the working directory to the executable directory
|
||||||
|
errs = os.Chdir(exeDir)
|
||||||
|
if errs != nil {
|
||||||
|
println("Error changing working directory")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := resources.GetModel("ggml-medium.bin")
|
cwd, _ := os.Getwd()
|
||||||
if err != nil {
|
fmt.Println("Current working directory:", cwd)
|
||||||
e.Logger.Error(err)
|
}
|
||||||
}
|
|
||||||
|
func main() {
|
||||||
whisperState, err := api.InitializeWhisperState(model)
|
|
||||||
if err != nil {
|
change_working_directory()
|
||||||
e.Logger.Error(err)
|
|
||||||
}
|
args, errParsing := resources.ParseFlags()
|
||||||
|
if errParsing != nil {
|
||||||
e.POST("/v1/audio/transcriptions", func(c echo.Context) error {
|
println("Error parsing flags: ", errParsing)
|
||||||
return api.Transcribe(c, whisperState)
|
return
|
||||||
})
|
}
|
||||||
|
|
||||||
e.Logger.Fatal(e.Start("127.0.0.1:3000"))
|
whisperState, err := api.InitializeWhisperState(args.ModelPath, args.Language)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
println("Error initializing whisper state: ", err)
|
||||||
|
}
|
||||||
|
const maxCapacity = 2048 * 10240
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(os.Stdin)
|
||||||
|
buf := make([]byte, maxCapacity)
|
||||||
|
scanner.Buffer(buf, maxCapacity)
|
||||||
|
|
||||||
|
println("waiting_for_input")
|
||||||
|
if scanner.Scan() {
|
||||||
|
base64Data := scanner.Text()
|
||||||
|
decodedBuffer, err := base64.StdEncoding.DecodeString(base64Data)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error decoding buffer:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
result := api.TranscribeBytes(decodedBuffer, whisperState)
|
||||||
|
println(beginDelimiter + result + endDelimiter)
|
||||||
|
println("finished")
|
||||||
|
|
||||||
|
// Process the decodedBuffer (e.g., print its length)
|
||||||
|
fmt.Println("Received buffer size:", len(decodedBuffer))
|
||||||
|
|
||||||
|
// Send a response back to Node.js (optional)
|
||||||
|
fmt.Fprintln(os.Stdout, "Buffer received successfully!")
|
||||||
|
} else if err := scanner.Err(); err != nil {
|
||||||
|
fmt.Println("Error reading from stdin:", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// e.Logger.Fatal(e.Start(fmt.Sprintf("127.0.0.1:%d", args.Port)))
|
||||||
}
|
}
|
||||||
|
@ -86,7 +86,7 @@ func (this *FullParams) RemoveFlags(newflag eFullParamsFlags) {
|
|||||||
this.cStruct.Flags = this.cStruct.Flags ^ newflag
|
this.cStruct.Flags = this.cStruct.Flags ^ newflag
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *FullParams) SetLanguage(language eLanguage) {
|
func (this *FullParams) SetLanguage(language int32) {
|
||||||
if this == nil {
|
if this == nil {
|
||||||
return
|
return
|
||||||
} else if this.cStruct == nil {
|
} else if this.cStruct == nil {
|
||||||
@ -146,9 +146,9 @@ func (this *FullParams) TestDefaultsOK() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if this.cStruct.Language != English {
|
// if this.cStruct.Language != English {
|
||||||
return false
|
// return false
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Todo ... why do these not line up as expected.. is our struct out of alignment ?
|
// Todo ... why do these not line up as expected.. is our struct out of alignment ?
|
||||||
/*
|
/*
|
||||||
@ -178,7 +178,7 @@ type _FullParams struct {
|
|||||||
offset_ms int32
|
offset_ms int32
|
||||||
duration_ms int32
|
duration_ms int32
|
||||||
Flags eFullParamsFlags
|
Flags eFullParamsFlags
|
||||||
Language eLanguage
|
Language int32
|
||||||
|
|
||||||
thold_pt float32
|
thold_pt float32
|
||||||
thold_ptsum float32
|
thold_ptsum float32
|
||||||
@ -214,6 +214,7 @@ func NewFullParams(cstruct *_FullParams) *FullParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _newFullParams_cStruct() *_FullParams {
|
func _newFullParams_cStruct() *_FullParams {
|
||||||
|
|
||||||
return &_FullParams{
|
return &_FullParams{
|
||||||
|
|
||||||
strategy: 0,
|
strategy: 0,
|
||||||
|
@ -207,6 +207,7 @@ func (context *IContext) FullDefaultParams(strategy eSamplingStrategy) (*FullPar
|
|||||||
return nil, errors.New("FullDefaultParams did not return params")
|
return nil, errors.New("FullDefaultParams did not return params")
|
||||||
}
|
}
|
||||||
ParamObj := NewFullParams(params)
|
ParamObj := NewFullParams(params)
|
||||||
|
// ParamObj.SetLanguage(Polish)
|
||||||
|
|
||||||
if ParamObj.TestDefaultsOK() {
|
if ParamObj.TestDefaultsOK() {
|
||||||
return ParamObj, nil
|
return ParamObj, nil
|
||||||
|
@ -1,207 +0,0 @@
|
|||||||
package whisper
|
|
||||||
|
|
||||||
// https://github.com/Const-me/Whisper/blob/master/WhisperNet/API/eLanguage.cs
|
|
||||||
|
|
||||||
type eLanguage int32
|
|
||||||
|
|
||||||
const (
|
|
||||||
Auto eLanguage = -1 // "af"
|
|
||||||
|
|
||||||
Afrikaans = 0x6661 // "af"
|
|
||||||
/// <summary>Albanian</summary>
|
|
||||||
Albanian = 0x7173 // "sq"
|
|
||||||
/// <summary>Amharic</summary>
|
|
||||||
Amharic = 0x6D61 // "am"
|
|
||||||
/// <summary>Arabic</summary>
|
|
||||||
Arabic = 0x7261 // "ar"
|
|
||||||
/// <summary>Armenian</summary>
|
|
||||||
Armenian = 0x7968 // "hy"
|
|
||||||
/// <summary>Assamese</summary>
|
|
||||||
Assamese = 0x7361 // "as"
|
|
||||||
/// <summary>Azerbaijani</summary>
|
|
||||||
Azerbaijani = 0x7A61 // "az"
|
|
||||||
/// <summary>Bashkir</summary>
|
|
||||||
Bashkir = 0x6162 // "ba"
|
|
||||||
/// <summary>Basque</summary>
|
|
||||||
Basque = 0x7565 // "eu"
|
|
||||||
/// <summary>Belarusian</summary>
|
|
||||||
Belarusian = 0x6562 // "be"
|
|
||||||
/// <summary>Bengali</summary>
|
|
||||||
Bengali = 0x6E62 // "bn"
|
|
||||||
/// <summary>Bosnian</summary>
|
|
||||||
Bosnian = 0x7362 // "bs"
|
|
||||||
/// <summary>Breton</summary>
|
|
||||||
Breton = 0x7262 // "br"
|
|
||||||
/// <summary>Bulgarian</summary>
|
|
||||||
Bulgarian = 0x6762 // "bg"
|
|
||||||
/// <summary>Catalan</summary>
|
|
||||||
Catalan = 0x6163 // "ca"
|
|
||||||
/// <summary>Chinese</summary>
|
|
||||||
Chinese = 0x687A // "zh"
|
|
||||||
/// <summary>Croatian</summary>
|
|
||||||
Croatian = 0x7268 // "hr"
|
|
||||||
/// <summary>Czech</summary>
|
|
||||||
Czech = 0x7363 // "cs"
|
|
||||||
/// <summary>Danish</summary>
|
|
||||||
Danish = 0x6164 // "da"
|
|
||||||
/// <summary>Dutch</summary>
|
|
||||||
Dutch = 0x6C6E // "nl"
|
|
||||||
/// <summary>English</summary>
|
|
||||||
English = 0x6E65 // "en"
|
|
||||||
/// <summary>Estonian</summary>
|
|
||||||
Estonian = 0x7465 // "et"
|
|
||||||
/// <summary>Faroese</summary>
|
|
||||||
Faroese = 0x6F66 // "fo"
|
|
||||||
/// <summary>Finnish</summary>
|
|
||||||
Finnish = 0x6966 // "fi"
|
|
||||||
/// <summary>French</summary>
|
|
||||||
French = 0x7266 // "fr"
|
|
||||||
/// <summary>Galician</summary>
|
|
||||||
Galician = 0x6C67 // "gl"
|
|
||||||
/// <summary>Georgian</summary>
|
|
||||||
Georgian = 0x616B // "ka"
|
|
||||||
/// <summary>German</summary>
|
|
||||||
German = 0x6564 // "de"
|
|
||||||
/// <summary>Greek</summary>
|
|
||||||
Greek = 0x6C65 // "el"
|
|
||||||
/// <summary>Gujarati</summary>
|
|
||||||
Gujarati = 0x7567 // "gu"
|
|
||||||
/// <summary>Haitian Creole</summary>
|
|
||||||
HaitianCreole = 0x7468 // "ht"
|
|
||||||
/// <summary>Hausa</summary>
|
|
||||||
Hausa = 0x6168 // "ha"
|
|
||||||
/// <summary>Hawaiian</summary>
|
|
||||||
Hawaiian = 0x776168 // "haw"
|
|
||||||
/// <summary>Hebrew</summary>
|
|
||||||
Hebrew = 0x7769 // "iw"
|
|
||||||
/// <summary>Hindi</summary>
|
|
||||||
Hindi = 0x6968 // "hi"
|
|
||||||
/// <summary>Hungarian</summary>
|
|
||||||
Hungarian = 0x7568 // "hu"
|
|
||||||
/// <summary>Icelandic</summary>
|
|
||||||
Icelandic = 0x7369 // "is"
|
|
||||||
/// <summary>Indonesian</summary>
|
|
||||||
Indonesian = 0x6469 // "id"
|
|
||||||
/// <summary>Italian</summary>
|
|
||||||
Italian = 0x7469 // "it"
|
|
||||||
/// <summary>Japanese</summary>
|
|
||||||
Japanese = 0x616A // "ja"
|
|
||||||
/// <summary>Javanese</summary>
|
|
||||||
Javanese = 0x776A // "jw"
|
|
||||||
/// <summary>Kannada</summary>
|
|
||||||
Kannada = 0x6E6B // "kn"
|
|
||||||
/// <summary>Kazakh</summary>
|
|
||||||
Kazakh = 0x6B6B // "kk"
|
|
||||||
/// <summary>Khmer</summary>
|
|
||||||
Khmer = 0x6D6B // "km"
|
|
||||||
/// <summary>Korean</summary>
|
|
||||||
Korean = 0x6F6B // "ko"
|
|
||||||
/// <summary>Lao</summary>
|
|
||||||
Lao = 0x6F6C // "lo"
|
|
||||||
/// <summary>Latin</summary>
|
|
||||||
Latin = 0x616C // "la"
|
|
||||||
/// <summary>Latvian</summary>
|
|
||||||
Latvian = 0x766C // "lv"
|
|
||||||
/// <summary>Lingala</summary>
|
|
||||||
Lingala = 0x6E6C // "ln"
|
|
||||||
/// <summary>Lithuanian</summary>
|
|
||||||
Lithuanian = 0x746C // "lt"
|
|
||||||
/// <summary>Luxembourgish</summary>
|
|
||||||
Luxembourgish = 0x626C // "lb"
|
|
||||||
/// <summary>Macedonian</summary>
|
|
||||||
Macedonian = 0x6B6D // "mk"
|
|
||||||
/// <summary>Malagasy</summary>
|
|
||||||
Malagasy = 0x676D // "mg"
|
|
||||||
/// <summary>Malay</summary>
|
|
||||||
Malay = 0x736D // "ms"
|
|
||||||
/// <summary>Malayalam</summary>
|
|
||||||
Malayalam = 0x6C6D // "ml"
|
|
||||||
/// <summary>Maltese</summary>
|
|
||||||
Maltese = 0x746D // "mt"
|
|
||||||
/// <summary>Maori</summary>
|
|
||||||
Maori = 0x696D // "mi"
|
|
||||||
/// <summary>Marathi</summary>
|
|
||||||
Marathi = 0x726D // "mr"
|
|
||||||
/// <summary>Mongolian</summary>
|
|
||||||
Mongolian = 0x6E6D // "mn"
|
|
||||||
/// <summary>Myanmar</summary>
|
|
||||||
Myanmar = 0x796D // "my"
|
|
||||||
/// <summary>Nepali</summary>
|
|
||||||
Nepali = 0x656E // "ne"
|
|
||||||
/// <summary>Norwegian</summary>
|
|
||||||
Norwegian = 0x6F6E // "no"
|
|
||||||
/// <summary>Nynorsk</summary>
|
|
||||||
Nynorsk = 0x6E6E // "nn"
|
|
||||||
/// <summary>Occitan</summary>
|
|
||||||
Occitan = 0x636F // "oc"
|
|
||||||
/// <summary>Pashto</summary>
|
|
||||||
Pashto = 0x7370 // "ps"
|
|
||||||
/// <summary>Persian</summary>
|
|
||||||
Persian = 0x6166 // "fa"
|
|
||||||
/// <summary>Polish</summary>
|
|
||||||
Polish = 0x6C70 // "pl"
|
|
||||||
/// <summary>Portuguese</summary>
|
|
||||||
Portuguese = 0x7470 // "pt"
|
|
||||||
/// <summary>Punjabi</summary>
|
|
||||||
Punjabi = 0x6170 // "pa"
|
|
||||||
/// <summary>Romanian</summary>
|
|
||||||
Romanian = 0x6F72 // "ro"
|
|
||||||
/// <summary>Russian</summary>
|
|
||||||
Russian = 0x7572 // "ru"
|
|
||||||
/// <summary>Sanskrit</summary>
|
|
||||||
Sanskrit = 0x6173 // "sa"
|
|
||||||
/// <summary>Serbian</summary>
|
|
||||||
Serbian = 0x7273 // "sr"
|
|
||||||
/// <summary>Shona</summary>
|
|
||||||
Shona = 0x6E73 // "sn"
|
|
||||||
/// <summary>Sindhi</summary>
|
|
||||||
Sindhi = 0x6473 // "sd"
|
|
||||||
/// <summary>Sinhala</summary>
|
|
||||||
Sinhala = 0x6973 // "si"
|
|
||||||
/// <summary>Slovak</summary>
|
|
||||||
Slovak = 0x6B73 // "sk"
|
|
||||||
/// <summary>Slovenian</summary>
|
|
||||||
Slovenian = 0x6C73 // "sl"
|
|
||||||
/// <summary>Somali</summary>
|
|
||||||
Somali = 0x6F73 // "so"
|
|
||||||
/// <summary>Spanish</summary>
|
|
||||||
Spanish = 0x7365 // "es"
|
|
||||||
/// <summary>Sundanese</summary>
|
|
||||||
Sundanese = 0x7573 // "su"
|
|
||||||
/// <summary>Swahili</summary>
|
|
||||||
Swahili = 0x7773 // "sw"
|
|
||||||
/// <summary>Swedish</summary>
|
|
||||||
Swedish = 0x7673 // "sv"
|
|
||||||
/// <summary>Tagalog</summary>
|
|
||||||
Tagalog = 0x6C74 // "tl"
|
|
||||||
/// <summary>Tajik</summary>
|
|
||||||
Tajik = 0x6774 // "tg"
|
|
||||||
/// <summary>Tamil</summary>
|
|
||||||
Tamil = 0x6174 // "ta"
|
|
||||||
/// <summary>Tatar</summary>
|
|
||||||
Tatar = 0x7474 // "tt"
|
|
||||||
/// <summary>Telugu</summary>
|
|
||||||
Telugu = 0x6574 // "te"
|
|
||||||
/// <summary>Thai</summary>
|
|
||||||
Thai = 0x6874 // "th"
|
|
||||||
/// <summary>Tibetan</summary>
|
|
||||||
Tibetan = 0x6F62 // "bo"
|
|
||||||
/// <summary>Turkish</summary>
|
|
||||||
Turkish = 0x7274 // "tr"
|
|
||||||
/// <summary>Turkmen</summary>
|
|
||||||
Turkmen = 0x6B74 // "tk"
|
|
||||||
/// <summary>Ukrainian</summary>
|
|
||||||
Ukrainian = 0x6B75 // "uk"
|
|
||||||
/// <summary>Urdu</summary>
|
|
||||||
Urdu = 0x7275 // "ur"
|
|
||||||
/// <summary>Uzbek</summary>
|
|
||||||
Uzbek = 0x7A75 // "uz"
|
|
||||||
/// <summary>Vietnamese</summary>
|
|
||||||
Vietnamese = 0x6976 // "vi"
|
|
||||||
/// <summary>Welsh</summary>
|
|
||||||
Welsh = 0x7963 // "cy"
|
|
||||||
/// <summary>Yiddish</summary>
|
|
||||||
Yiddish = 0x6979 // "yi"
|
|
||||||
/// <summary>Yoruba</summary>
|
|
||||||
Yoruba = 0x6F79 // "yo"
|
|
||||||
)
|
|
Loading…
x
Reference in New Issue
Block a user