diff --git a/.gitignore b/.gitignore index 536a6e9..2474ef0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ Whisper.dll -ggml-medium.bin +ggml-* +*.exe whisper-api-server.exe diff --git a/internal/api/handler.go b/internal/api/handler.go index 965f31c..8ae58f4 100644 --- a/internal/api/handler.go +++ b/internal/api/handler.go @@ -1,6 +1,7 @@ package api import ( + "io" "net/http" "strings" @@ -11,7 +12,7 @@ type TranscribeResponse struct { Text string `json:"text"` } -func Transcribe(c echo.Context, whisperState *WhisperState) error { +func TranscribeFromFile(c echo.Context, whisperState *WhisperState) error { audioPath, err := saveFormFile("file", c) if err != nil { c.Logger().Errorf("Error reading file: %s", err) @@ -26,6 +27,11 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error { err = whisperState.context.RunFull(whisperState.params, buffer) + if err != nil { + c.Logger().Errorf("Error processing audio: %s", err) + return err + } + result, err := getResult(whisperState.context) if err != nil { c.Logger().Error(err) @@ -43,3 +49,59 @@ func Transcribe(c echo.Context, whisperState *WhisperState) error { return c.JSON(http.StatusOK, response) } + +func Transcribe(c echo.Context, whisperState *WhisperState) error { + // Get the file header + fileHeader, err := c.FormFile("file") + if err != nil { + c.Logger().Errorf("Error retrieving the file: %s", err) + return err + } + + // Open the file + file, err := fileHeader.Open() + if err != nil { + c.Logger().Errorf("Error opening the file: %s", err) + return err + } + defer file.Close() + + // Read the file into a buffer + buffer, err := io.ReadAll(file) + if err != nil { + c.Logger().Errorf("Error reading the file into buffer: %s", err) + return err + } + + whisperState.mutex.Lock() + defer whisperState.mutex.Unlock() + + bufferSpecial, err := whisperState.media.LoadAudioFileData(&buffer, true) + + if err != nil { + c.Logger().Errorf("Error loading audio file data: %s", err) + return err + } + + err = whisperState.context.RunStreamed(whisperState.params, bufferSpecial) + if err != nil { + c.Logger().Errorf("Error processing audio: %s", err) + return err + } + + result, err := getResult(whisperState.context) + if err != nil { + c.Logger().Error(err) + return err + } + + if len(result) == 0 { + return c.JSON(http.StatusInternalServerError, map[string]string{"error": "Internal server error"}) + } + + response := TranscribeResponse{ + Text: strings.TrimLeft(result, " "), + } + + return c.JSON(http.StatusOK, response) +} diff --git a/pkg/whisper/FullParams.go b/pkg/whisper/FullParams.go index 70cc963..cda84e2 100644 --- a/pkg/whisper/FullParams.go +++ b/pkg/whisper/FullParams.go @@ -146,9 +146,9 @@ func (this *FullParams) TestDefaultsOK() bool { return false } - if this.cStruct.Language != English { - return false - } + // if this.cStruct.Language != English { + // return false + // } // Todo ... why do these not line up as expected.. is our struct out of alignment ? /* @@ -214,6 +214,7 @@ func NewFullParams(cstruct *_FullParams) *FullParams { } func _newFullParams_cStruct() *_FullParams { + return &_FullParams{ strategy: 0, diff --git a/pkg/whisper/context.go b/pkg/whisper/context.go index 8c469f6..3de77fa 100644 --- a/pkg/whisper/context.go +++ b/pkg/whisper/context.go @@ -207,6 +207,7 @@ func (context *IContext) FullDefaultParams(strategy eSamplingStrategy) (*FullPar return nil, errors.New("FullDefaultParams did not return params") } ParamObj := NewFullParams(params) + // ParamObj.SetLanguage(Polish) if ParamObj.TestDefaultsOK() { return ParamObj, nil