5 Commits
0.1.2 ... 0.1.3

Author SHA1 Message Date
2d44f72466 Update release.yml 2024-04-03 23:09:07 +02:00
8275ad584b Fix ms false positive again 2024-04-03 22:34:14 +02:00
1546e3f145 Move language map to json file in order to not trigger microsoft antivirus 2024-03-19 18:35:24 +01:00
fc48cca110 Feat/accept cmd args (#2)
* make working dir as executable dir

* Add support for cli args
2024-03-14 21:06:34 +01:00
c4daf3ec71 Transcribe using file buffer straight from form 2024-03-11 20:55:47 +01:00
14 changed files with 293 additions and 180 deletions

View File

@ -29,4 +29,4 @@ jobs:
version: ${{ env.GITHUB_REF_NAME }}
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.PUBLISHER_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

3
.gitignore vendored
View File

@ -1,5 +1,6 @@
Whisper.dll
ggml-medium.bin
ggml-*
*.exe
whisper-api-server.exe

View File

@ -37,7 +37,7 @@ go build -ldflags "-s -w" -o server.exe main.go
Make a request to the server using the following command:
```sh
curl http://localhost:3000/v1/audio/transcriptions \
curl http://localhost:3031/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@/path/to/file/audio.mp3" \
```
@ -56,7 +56,7 @@ Receive a response in JSON format:
2. Open the plugin's settings.
3. Set the following values:
- API KEY: `sk-1`
- API URL: `http://localhost:3000/v1/audio/transcriptions`
- API URL: `http://localhost:3031/v1/audio/transcriptions`
- Model: `whisper-1`
# Roadmap

View File

@ -1,6 +1,7 @@
package api
import (
"io"
"net/http"
"strings"
@ -11,7 +12,7 @@ type TranscribeResponse struct {
Text string `json:"text"`
}
func Transcribe(c echo.Context, whisperState *WhisperState) error {
func TranscribeFromFile(c echo.Context, whisperState *WhisperState) error {
audioPath, err := saveFormFile("file", c)
if err != nil {
c.Logger().Errorf("Error reading file: %s", err)
@ -26,6 +27,11 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error {
err = whisperState.context.RunFull(whisperState.params, buffer)
if err != nil {
c.Logger().Errorf("Error processing audio: %s", err)
return err
}
result, err := getResult(whisperState.context)
if err != nil {
c.Logger().Error(err)
@ -43,3 +49,59 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error {
return c.JSON(http.StatusOK, response)
}
func Transcribe(c echo.Context, whisperState *WhisperState) error {
// Get the file header
fileHeader, err := c.FormFile("file")
if err != nil {
c.Logger().Errorf("Error retrieving the file: %s", err)
return err
}
// Open the file
file, err := fileHeader.Open()
if err != nil {
c.Logger().Errorf("Error opening the file: %s", err)
return err
}
defer file.Close()
// Read the file into a buffer
buffer, err := io.ReadAll(file)
if err != nil {
c.Logger().Errorf("Error reading the file into buffer: %s", err)
return err
}
whisperState.mutex.Lock()
defer whisperState.mutex.Unlock()
bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true)
if err != nil {
c.Logger().Errorf("Error loading audio file data: %s", err)
return err
}
err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial)
if err != nil {
c.Logger().Errorf("Error processing audio: %s", err)
return err
}
result, err := getResult(whisperState.context)
if err != nil {
c.Logger().Error(err)
return err
}
if len(result) == 0 {
return c.JSON(http.StatusInternalServerError, map[string]string{"error": "Internal server error"})
}
response := TranscribeResponse{
Text: strings.TrimLeft(result, " "),
}
return c.JSON(http.StatusOK, response)
}

View File

@ -15,7 +15,7 @@ type WhisperState struct {
mutex sync.Mutex
}
func InitializeWhisperState(modelPath string) (*WhisperState, error) {
func InitializeWhisperState(modelPath string, lang int32) (*WhisperState, error) {
lib, err := whisper.New(whisper.LlDebug, whisper.LfUseStandardError, nil)
if err != nil {
return nil, err
@ -41,6 +41,8 @@ func InitializeWhisperState(modelPath string) (*WhisperState, error) {
return nil, err
}
params.SetLanguage(lang)
fmt.Printf("Params CPU Threads : %d\n", params.CpuThreads())
return &WhisperState{

View File

@ -0,0 +1,96 @@
package resources
import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
)
// Arguments defines the structure to hold parsed arguments
type Arguments struct {
Language string
ModelPath string
Port int
}
type ParsedArguments struct {
Language int32
ModelPath string
Port int
}
type LanguageMap map[string]string
func processLanguageAndCode(args *Arguments) (int32, error) {
// Read the language map from JSON file
jsonFile, err := os.Open("languageMap.json")
if err != nil {
return 0x6E65, fmt.Errorf("error opening language map: %w", err) // Wrap error for context
}
defer jsonFile.Close()
byteData, err := io.ReadAll(jsonFile)
if err != nil {
return 0x6E65, fmt.Errorf("error reading language map: %w", err)
}
var languageMap LanguageMap
err = json.Unmarshal(byteData, &languageMap)
if err != nil {
return 0x6E65, fmt.Errorf("error parsing language map: %w", err)
}
hexCode, ok := languageMap[strings.ToLower(args.Language)]
if !ok {
return 0x6E65, fmt.Errorf("unsupported language: %s", args.Language)
}
languageCode, err := strconv.ParseInt(hexCode, 0, 32)
if err != nil {
return 0x6E65, fmt.Errorf("error converting hex code: %w", err)
}
return int32(languageCode), nil
}
// ParseFlags parses command line arguments and returns an Arguments struct
func ParseFlags() (*ParsedArguments, error) {
args := &Arguments{}
flag.StringVar(&args.Language, "l", "", "Language to be processed")
flag.StringVar(&args.Language, "language", "", "Language to be processed") // Optional: Redundant to demonstrate
flag.StringVar(&args.ModelPath, "m", "", "Path to the model file (required)")
flag.StringVar(&args.ModelPath, "modelPath", "", "Path to the model file (required)") // Optional: Redundant
flag.IntVar(&args.Port, "p", 3031, "Port to start the server on")
flag.IntVar(&args.Port, "port", 3031, "Port to start the server on") // Optional: Redundant
flag.Usage = func() {
fmt.Println("Usage: your_program [OPTIONS]")
fmt.Println("Options:")
flag.PrintDefaults() // Print default values for all flags
}
// Parsing flags
flag.Parse()
args.Language = strings.ToLower(args.Language)
if args.ModelPath == "" {
return nil, fmt.Errorf("modelPath argument is required")
}
languageCode, err := processLanguageAndCode(args)
if err != nil {
fmt.Println("Error setting language, defaulting to English:", err)
// Use default language code directly as the result here
}
return &ParsedArguments{
Language: languageCode,
ModelPath: args.ModelPath,
Port: args.Port,
}, nil
}

View File

@ -1,38 +0,0 @@
package resources
import (
"io"
"net/http"
"os"
"github.com/schollz/progressbar/v3"
)
func DownloadFile(url string, filepath string) error {
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
fileSize := resp.ContentLength
bar := progressbar.DefaultBytes(
fileSize,
"Downloading",
)
writer := io.MultiWriter(out, bar)
_, err = io.Copy(writer, resp.Body)
if err != nil {
return err
}
return nil
}

View File

@ -1,29 +0,0 @@
package resources
import (
"fmt"
"path/filepath"
)
func GetModel(modelType string) (string, error) {
fileURL := fmt.Sprintf("https://huggingface.co/ggerganov/whisper.cpp/resolve/main/%s", modelType)
filePath := modelType
isModelFileExists := IsFileExists(filePath)
if !isModelFileExists {
fmt.Println("Model not found.")
err := DownloadFile(fileURL, filePath)
if err != nil {
return "", err
}
}
absPath, err := filepath.Abs(filePath)
if err != nil {
return "", err
}
fmt.Printf("Model found: %s\n", absPath)
return filePath, nil
}

View File

@ -1,13 +0,0 @@
package resources
import "os"
func IsFileExists(filename string) bool {
_, err := os.Stat(filename)
if err != nil {
if os.IsNotExist(err) {
return false
}
}
return true
}

View File

@ -1,78 +0,0 @@
package resources
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
)
func GetWhisperDll(version string) (string, error) {
fileUrl := fmt.Sprintf("https://github.com/Const-me/Whisper/releases/download/%s/Library.zip", version)
fileToExtract := "Binary/Whisper.dll"
isWhisperDllExists := IsFileExists("Whisper.dll")
if !isWhisperDllExists {
fmt.Println("Whisper DLL not found.")
archivePath, err := os.CreateTemp("", "WhisperLibrary-*.zip")
if err != nil {
return "", err
}
defer archivePath.Close()
err = DownloadFile(fileUrl, archivePath.Name())
if err != nil {
return "", err
}
err = extractFile(archivePath.Name(), fileToExtract)
if err != nil {
return "", err
}
}
absPath, err := filepath.Abs("Whisper.dll")
if err != nil {
return "", err
}
fmt.Printf("Library found: %s\n", absPath)
return "Whisper.dll", nil
}
func extractFile(archivePath string, fileToExtract string) error {
reader, err := zip.OpenReader(archivePath)
if err != nil {
return err
}
defer reader.Close()
for _, file := range reader.File {
if file.Name == fileToExtract {
targetPath := filepath.Base(fileToExtract)
writer, err := os.Create(targetPath)
if err != nil {
return err
}
defer writer.Close()
src, err := file.Open()
if err != nil {
return err
}
defer src.Close()
_, err = io.Copy(writer, src)
if err != nil {
return err
}
return nil
}
}
return fmt.Errorf("File not found in the archive")
}

84
languageMap.json Normal file
View File

@ -0,0 +1,84 @@
{
"af": "0x6661",
"sq": "0x7173",
"am": "0x6D61",
"ar": "0x7261",
"hy": "0x7968",
"as": "0x7361",
"az": "0x7A61",
"ba": "0x6162",
"eu": "0x7565",
"be": "0x6562",
"bn": "0x6E62",
"bs": "0x7362",
"br": "0x7262",
"bg": "0x6762",
"ca": "0x6163",
"zh": "0x687A",
"hr": "0x7268",
"cs": "0x7363",
"da": "0x6164",
"nl": "0x6C6E",
"en": "0x6E65",
"et": "0x7465",
"fo": "0x6F66",
"fi": "0x6966",
"fr": "0x7266",
"gl": "0x6C67",
"ka": "0x616B",
"de": "0x7265",
"el": "0x6C61",
"gu": "0x7567",
"he": "0x6568",
"hi": "0x6968",
"hu": "0x7568",
"is": "0x7369",
"id": "0x6469",
"it": "0x7469",
"ja": "0x616A",
"kn": "0x6E6B",
"kk": "0x6B6B",
"km": "0x6D6B",
"ko": "0x6F6B",
"ky": "0x796B",
"lo": "0x6F6C",
"lv": "0x766C",
"lt": "0x746C",
"mk": "0x6B6D",
"ms": "0x736D",
"ml": "0x6C6D",
"mr": "0x726D",
"mn": "0x6E6D",
"ne": "0x6570",
"no": "0x6F6E",
"or": "0x726F",
"ps": "0x7368",
"fa": "0x6172",
"pl": "0x6C70",
"pt": "0x7470",
"pa": "0x6170",
"ro": "0x6F72",
"ru": "0x7572",
"sa": "0x6173",
"sr": "0x7273",
"sd": "0x6473",
"si": "0x6973",
"sk": "0x6B73",
"sl": "0x6C73",
"es": "0x6573",
"sw": "0x7773",
"sv": "0x6576",
"tg": "0x6769",
"ta": "0x6174",
"te": "0x6574",
"th": "0x6874",
"tr": "0x7274",
"uk": "0x6B75",
"ur": "0x7275",
"uz": "0x7A75",
"vi": "0x6976",
"cy": "0x7963",
"xh": "0x6877",
"yi": "0x6979",
"yo": "0x6F79"
}

46
main.go
View File

@ -1,6 +1,10 @@
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
"github.com/labstack/gommon/log"
@ -8,9 +12,37 @@ import (
"github.com/xzeldon/whisper-api-server/internal/resources"
)
func change_working_directory(e *echo.Echo) {
exePath, errs := os.Executable()
if errs != nil {
e.Logger.Error(errs)
return
}
exeDir := filepath.Dir(exePath)
// Change the working directory to the executable directory
errs = os.Chdir(exeDir)
if errs != nil {
e.Logger.Error(errs)
return
}
cwd, _ := os.Getwd()
fmt.Println("Current working directory:", cwd)
}
func main() {
e := echo.New()
e.HideBanner = true
change_working_directory(e)
args, errParsing := resources.ParseFlags()
if errParsing != nil {
e.Logger.Error("Error parsing flags: ", errParsing)
return
}
e.Use(middleware.CORS())
@ -18,24 +50,16 @@ func main() {
l.SetHeader("${time_rfc3339} ${level}")
}
_, err := resources.GetWhisperDll("1.12.0")
if err != nil {
e.Logger.Error(err)
}
whisperState, err := api.InitializeWhisperState(args.ModelPath, args.Language)
model, err := resources.GetModel("ggml-medium.bin")
if err != nil {
e.Logger.Error(err)
}
whisperState, err := api.InitializeWhisperState(model)
if err != nil {
e.Logger.Error(err)
}
e.POST("/v1/audio/transcriptions", func(c echo.Context) error {
return api.Transcribe(c, whisperState)
})
e.Logger.Fatal(e.Start("127.0.0.1:3000"))
e.Logger.Fatal(e.Start(fmt.Sprintf("127.0.0.1:%d", args.Port)))
}

View File

@ -86,14 +86,14 @@ func (this *FullParams) RemoveFlags(newflag eFullParamsFlags) {
this.cStruct.Flags = this.cStruct.Flags ^ newflag
}
func (this *FullParams) SetLanguage(language eLanguage) {
func (this *FullParams) SetLanguage(language int32) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.Language = language
this.cStruct.Language = eLanguage(language)
}
/*using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;*/
@ -146,9 +146,9 @@ func (this *FullParams) TestDefaultsOK() bool {
return false
}
if this.cStruct.Language != English {
return false
}
// if this.cStruct.Language != English {
// return false
// }
// Todo ... why do these not line up as expected.. is our struct out of alignment ?
/*
@ -214,6 +214,7 @@ func NewFullParams(cstruct *_FullParams) *FullParams {
}
func _newFullParams_cStruct() *_FullParams {
return &_FullParams{
strategy: 0,

View File

@ -207,6 +207,7 @@ func (context *IContext) FullDefaultParams(strategy eSamplingStrategy) (*FullPar
return nil, errors.New("FullDefaultParams did not return params")
}
ParamObj := NewFullParams(params)
// ParamObj.SetLanguage(Polish)
if ParamObj.TestDefaultsOK() {
return ParamObj, nil