-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integrate Whisper CPP and write a wrapper module in Aprapipes (#324)
* Add custom port vcpkg for whisper * Add whisper stream * Add whisper stream header * Add whisper cpp to Cmake list * Add test frame type and minor changes * Add whisper to vcpkg * Add vcpkg custom overlay ports to thirdparty * Modify with whisper option * Send whisper output as text frames * revert changes to sound record test * Add whisper UT * Fix PS to remove whisper from vcpkg json * Revert changes to OPTIONS section, remove WHISPER option, rename Whisper source files to generic AudioToTextXForm * Move pcm to git lfs * Add pcm and model bin file to lfs * Fix UT name * Throw AIP exception for unknown strategy * Revert sound_record_tests.cpp changes * Revert changes to vcpkg indentation and remove Whisper option * Linux -> OFF to ON Windows ON -> OFF * Add reserve statement for vector Move constructor impl * update submodule for pipeline to run * Update whisper port with install fix * update submodule * Update vcpkg version * Add changes to handle props change * Improve UT and refactor for changing sample strategy during run time. * Add apt-get install libx11-dev libgles2-mesa-dev for libepoxy error * Add memory type check in validate input pins and throw exception if model path changes. * update submodule * update vcpkg mysys2 * update submodule * Address nits * Export env variable overlay port for building in arm64 * added fix-for-arm64.patch for whisper * update fix-vcpkg-json.ps1 * update CMakeLists.txt * update vcpkg url for build * update whisper tests threshold * update code formatting * update whisper test * added EOS for small buffer size --------- Co-authored-by: Kushal Jain <[email protected]> Co-authored-by: Vinayak Y-B <[email protected]>
- Loading branch information
1 parent
110a2e2
commit 5358310
Showing
22 changed files
with
797 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
#pragma once | ||
|
||
#include "Module.h" | ||
|
||
// size of audio to process should be a parameter. | ||
// Cache variable to collect frames for processing | ||
|
||
class AudioToTextXFormProps : public ModuleProps | ||
{ | ||
public: | ||
enum DecoderSamplingStrategy { | ||
GREEDY, | ||
BEAM_SEARCH | ||
}; | ||
|
||
DecoderSamplingStrategy samplingStrategy; | ||
std::string modelPath; | ||
int bufferSize; | ||
|
||
AudioToTextXFormProps( | ||
DecoderSamplingStrategy _samplingStrategy, | ||
std::string _modelPath, | ||
int _bufferSize); | ||
size_t getSerializeSize(); | ||
|
||
|
||
private: | ||
friend class boost::serialization::access; | ||
|
||
template <class Archive> | ||
void serialize(Archive& ar, const unsigned int version); | ||
}; | ||
|
||
class AudioToTextXForm : public Module | ||
{ | ||
|
||
public: | ||
AudioToTextXForm(AudioToTextXFormProps _props); | ||
virtual ~AudioToTextXForm(); | ||
bool init(); | ||
bool term(); | ||
void setProps(AudioToTextXFormProps& props); | ||
AudioToTextXFormProps getProps(); | ||
|
||
protected: | ||
bool process(frame_container& frames); | ||
bool processSOS(frame_sp& frame); | ||
bool validateInputPins(); | ||
bool validateOutputPins(); | ||
void addInputPin(framemetadata_sp& metadata, string& pinId); | ||
bool handlePropsChange(frame_sp& frame); | ||
|
||
private: | ||
void setMetadata(framemetadata_sp& metadata); | ||
class Detail; | ||
boost::shared_ptr<Detail> mDetail; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.