whisper-api-server/pkg/whisper/FullParams.go

249 lines
5.0 KiB
Go

package whisper
import (
"syscall"
"unsafe"
)
// https://github.com/Const-me/Whisper/blob/master/Whisper/API/sFullParams.h
// https://github.com/Const-me/Whisper/blob/master/WhisperNet/API/Parameters.cs
type eSamplingStrategy uint32
const (
SsGreedy eSamplingStrategy = iota
SsBeamSearch
SsINVALIDARG
)
type eFullParamsFlags uint32
const (
FlagNone eFullParamsFlags = 0
FlagTranslate = 1 << 0
FlagNoContext = 1 << 1
FlagSingleSegment = 1 << 2
FlagPrintSpecial = 1 << 3
FlagPrintProgress = 1 << 4
FlagPrintRealtime = 1 << 5
FlagPrintTimestamps = 1 << 6
FlagTokenTimestamps = 1 << 7 // Experimental
FlagSpeedupAudio = 1 << 8
)
type EWhisperHWND uintptr
const (
S_OK EWhisperHWND = 0
S_FALSE EWhisperHWND = 1
)
type FullParams struct {
cStruct *_FullParams
}
func (this *FullParams) CpuThreads() int32 {
if this == nil {
return 0
} else if this.cStruct == nil {
return 0
}
return this.cStruct.cpuThreads
}
func (this *FullParams) setCpuThreads(val int32) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.cpuThreads = val
}
func (this *FullParams) SetMaxTextCTX(val int32) {
this.cStruct.n_max_text_ctx = val
}
func (this *FullParams) AddFlags(newflag eFullParamsFlags) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.Flags = this.cStruct.Flags | newflag
}
func (this *FullParams) RemoveFlags(newflag eFullParamsFlags) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.Flags = this.cStruct.Flags ^ newflag
}
/*using pfnNewSegment = HRESULT( __cdecl* )( iContext* ctx, uint32_t n_new, void* user_data ) noexcept;*/
type NewSegmentCallback_Type func(context *IContext, n_new uint32, user_data unsafe.Pointer) EWhisperHWND
func (this *FullParams) SetNewSegmentCallback(cb NewSegmentCallback_Type) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.new_segment_callback = syscall.NewCallback(cb)
}
/*
Return S_OK to proceed, or S_FALSE to stop the process
*/
type EncoderBeginCallback_Type func(context *IContext, user_data unsafe.Pointer) EWhisperHWND
func (this *FullParams) SetEncoderBeginCallback(cb EncoderBeginCallback_Type) {
if this == nil {
return
} else if this.cStruct == nil {
return
}
this.cStruct.encoder_begin_callback = syscall.NewCallback(cb)
}
func (this *FullParams) TestDefaultsOK() bool {
if this == nil {
return false
} else if this.cStruct == nil {
return false
}
if this.cStruct.n_max_text_ctx != 16384 {
return false
}
if this.cStruct.Flags != (FlagPrintProgress | FlagPrintTimestamps) {
return false
}
if this.cStruct.thold_pt != 0.01 {
return false
}
if this.cStruct.thold_ptsum != 0.01 {
return false
}
if this.cStruct.Language != English {
return false
}
// Todo ... why do these not line up as expected.. is our struct out of alignment ?
/*
if this.cStruct.strategy == ssGreedy {
if this.cStruct.beam_search.n_past != -1 ||
this.cStruct.beam_search.beam_width != -1 ||
this.cStruct.beam_search.n_best != -1 {
return false
}
} else if this.cStruct.strategy == ssBeamSearch {
if this.cStruct.greedy.n_past != -1 ||
this.cStruct.beam_search.beam_width != 10 ||
this.cStruct.beam_search.n_best != 5 {
return false
}
}
*/
return true
}
type _FullParams struct {
strategy eSamplingStrategy
cpuThreads int32
n_max_text_ctx int32
offset_ms int32
duration_ms int32
Flags eFullParamsFlags
Language eLanguage
thold_pt float32
thold_ptsum float32
max_len int32
max_tokens int32
greedy struct{ n_past int32 }
beam_search struct {
n_past int32
beam_width int32
n_best int32
}
audio_ctx int32 // overwrite the audio context size (0 = use default)
prompt_tokens uintptr
prompt_n_tokens int32
new_segment_callback uintptr
new_segment_callback_user_data uintptr
encoder_begin_callback uintptr
encoder_begin_callback_user_data uintptr
// Are these needed ?? Jay
// setFlag uintptr
}
func NewFullParams(cstruct *_FullParams) *FullParams {
this := FullParams{}
this.cStruct = cstruct
return &this
}
func _newFullParams_cStruct() *_FullParams {
return &_FullParams{
strategy: 0,
cpuThreads: 0,
n_max_text_ctx: 0,
offset_ms: 0,
duration_ms: 0,
Flags: 0,
Language: 0,
thold_pt: 0,
thold_ptsum: 0,
max_len: 0,
max_tokens: 0,
// anonymous int32
greedy: struct{ n_past int32 }{n_past: 0},
// anonymous struct
beam_search: struct {
n_past int32
beam_width int32
n_best int32
}{
n_past: 0,
beam_width: 0,
n_best: 0,
},
audio_ctx: 0,
prompt_tokens: 0,
prompt_n_tokens: 0,
new_segment_callback: 0,
new_segment_callback_user_data: 0,
encoder_begin_callback: 0,
encoder_begin_callback_user_data: 0,
}
}