From 4a84457c253713cb6c56a7a725bd88d3c11b8815 Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Wed, 11 Oct 2023 02:41:01 +0200 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=93=A3=20better=20logging?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/ipc/ipc_main.ts | 7 +--- app/main_process/index.ts | 10 ++++- app/main_process/server.ts | 5 ++- app/scripts/dev.js | 25 +++--------- app/src/index.tsx | 4 +- app/src/util/log.ts | 81 ++++++++++++++++++-------------------- 6 files changed, 59 insertions(+), 73 deletions(-) diff --git a/app/ipc/ipc_main.ts b/app/ipc/ipc_main.ts index a1d3c7e5..2abec826 100644 --- a/app/ipc/ipc_main.ts +++ b/app/ipc/ipc_main.ts @@ -6,7 +6,7 @@ import { setMenuBar, showMenuBar, showContextMenu } from '../main_process/menu'; import { sendAll } from '../main_process/windowList'; import { serverInfo } from '../main_process/server'; import { ServerInfo } from '../main_process/types'; -import { logLine, logFilePath, LogLevel, LogSource } from '../src/util/log'; +import { logFilePath } from '../src/util/log'; ipcMain.handle('open-file', (event, options) => { const win = BrowserWindow.fromWebContents(event.sender); @@ -104,8 +104,3 @@ export function exportDebugLog(window: BrowserWindow): void { ipcMain.handle('get-home-path', () => { return app.getPath('home'); }); - -ipcMain.handle('log-line', (_event, source: LogSource, level: LogLevel, ...args: any[]) => { - assertSome(logLine); - logLine(source, level, ...args.map((x) => JSON.stringify(x))); -}); diff --git a/app/main_process/index.ts b/app/main_process/index.ts index 601d4fd1..1a578553 100644 --- a/app/main_process/index.ts +++ b/app/main_process/index.ts @@ -24,6 +24,14 @@ export const createWindow = (): void => { show: false, }); + let dontSendLog = false; + window.webContents.on('console-message', (_e, level, message) => { + if (message == 'server stderr') dontSendLog = true; + logLine && !dontSendLog && logLine(LogSource.RendererProcess, NumericLogLevels[level], message); + + if (message == 'console.groupEnd') dontSendLog = false; + }); + window.webContents.on('new-window', (event, url, frameName, disposition, options) => { if (frameName === 'modal') { event.preventDefault(); @@ -151,4 +159,4 @@ import './server'; import { windowList } from './windowList'; import { applyMenuBar, setMenuBar } from './menu'; import { isRunningInTest } from '../src/util'; -import { initMainProcessLog } from '../src/util/log'; +import { LogSource, NumericLogLevels, initMainProcessLog, logLine } from '../src/util/log'; diff --git a/app/main_process/server.ts b/app/main_process/server.ts index 85b53d45..9137da2e 100644 --- a/app/main_process/server.ts +++ b/app/main_process/server.ts @@ -6,6 +6,7 @@ import { app, dialog } from 'electron'; import { publishServerInfo, publishServerStderr } from '../ipc/ipc_main'; import { ServerInfo } from './types'; import { isRunningInTest } from '../src/util'; +import { LogLevel, LogSource, logLine } from '../src/util/log'; function findServer() { const possibilities = [ @@ -74,7 +75,7 @@ function startServer() { return; } serverProcess.stdout.on('data', (data: Buffer) => { - console.log('server-stdout', data.toString()); + logLine && logLine(LogSource.ServerProcess, LogLevel.Log, data); try { const parsed_data: ServerStartingMessage | ServerStartedMessage = JSON.parse(data.toString()); if (parsed_data.msg == 'server_starting') { @@ -88,7 +89,7 @@ function startServer() { }); serverProcess.stderr.on('data', (data: Buffer) => { - console.log(`server-stderr: \n${data}`); + logLine && logLine(LogSource.ServerProcess, LogLevel.Error, data); publishServerStderr(data.toString()); }); diff --git a/app/scripts/dev.js b/app/scripts/dev.js index 36eee4d9..2e3f5e79 100644 --- a/app/scripts/dev.js +++ b/app/scripts/dev.js @@ -1,14 +1,13 @@ -const { createServer, build, createLogger } = require('vite'); +const { createServer, build } = require('vite'); const electronPath = require('electron'); const { spawn } = require('child_process'); const mode = (process.env.MODE = process.env.MODE || 'development'); -const LOG_LEVEL = 'warn'; const sharedConfig = { mode, build: { watch: {}, }, - logLevel: LOG_LEVEL, + logLevel: 'warn', }; const getWatcher = ({ name, configFile, writeBundle }) => { @@ -29,10 +28,6 @@ const setupMainPackageWatcher = (viteDevServer) => { process.env.VITE_DEV_SERVER_URL = `${protocol}//${host}:${port}${path}`; } - const logger = createLogger(LOG_LEVEL, { - prefix: '[main]', - }); - let spawnProcess = null; return getWatcher({ @@ -44,18 +39,10 @@ const setupMainPackageWatcher = (viteDevServer) => { spawnProcess = null; } - spawnProcess = spawn(String(electronPath), [ - `${dir}/start.cjs.js`, - `--remote-debugging-port=${process.env.DEBUGGER_PORT}`, - ]); - - spawnProcess.stdout.on( - 'data', - (d) => d.toString().trim() && logger.warn(d.toString(), { timestamp: true }) - ); - spawnProcess.stderr.on( - 'data', - (d) => d.toString().trim() && logger.error(d.toString(), { timestamp: true }) + spawnProcess = spawn( + String(electronPath), + [`${dir}/start.cjs.js`, `--remote-debugging-port=${process.env.DEBUGGER_PORT}`], + { stdio: 'inherit' } ); }, }); diff --git a/app/src/index.tsx b/app/src/index.tsx index e8c20ad5..2da3faf1 100644 --- a/app/src/index.tsx +++ b/app/src/index.tsx @@ -4,11 +4,9 @@ import * as ReactDOM from 'react-dom'; import './index.css'; import App from './components/App'; -import { exportDebugLogsToDisk, initRendererLog } from './util/log'; +import { exportDebugLogsToDisk } from './util/log'; import { subscribeExportDebugLog } from '../ipc/ipc_renderer'; -initRendererLog(); - subscribeExportDebugLog((event, mainProcessLogPath) => exportDebugLogsToDisk(mainProcessLogPath)); const anyModule = module as any; diff --git a/app/src/util/log.ts b/app/src/util/log.ts index d243528a..21459ef3 100644 --- a/app/src/util/log.ts +++ b/app/src/util/log.ts @@ -1,29 +1,41 @@ import fs, { createWriteStream } from 'fs'; import path from 'path'; import JSZip from 'jszip'; -import { getHomePath, saveFile, sendLogLine } from '../../ipc/ipc_renderer'; +import { getHomePath, saveFile } from '../../ipc/ipc_renderer'; import { isRunningInTest } from './index'; import glob from 'glob'; import { app } from 'electron'; export enum LogLevel { Log, - Trace, - Debug, Info, Warn, Error, - GroupCollapsed, - GroupEnd, } +export const NumericLogLevels = [LogLevel.Log, LogLevel.Info, LogLevel.Warn, LogLevel.Error]; + export enum LogSource { MainProcess, RendererProcess, + ServerProcess, } export let logFilePath: string | null = null; -let oldLog: ((...args: any[]) => void) | null = null; + +const buffer: string[] = []; +function write(str: string) { + buffer.push(str); + + const try_fn = () => { + if (process.stdout.writableLength == 0) { + process.stdout.write(buffer.shift() || ''); + } else { + setTimeout(try_fn, 10); + } + }; + try_fn(); +} function log(file: number, source: LogSource, level: LogLevel, ...args: any[]) { const date = new Date().toISOString(); @@ -36,7 +48,23 @@ function log(file: number, source: LogSource, level: LogLevel, ...args: any[]) { level: level_str, args: string_args, }); - if (oldLog !== null) oldLog(log_line); + + const FgGreen = '\x1b[32m'; + const FgBlue = '\x1b[34m'; + const FgYellow = '\x1b[33m'; + const Reset = '\x1b[0m'; + const source_color = [FgGreen, FgBlue, FgYellow][source]; + + write( + args + .join('\n') + .split('\n') + .map( + (line) => + `${source_color}[${source_str.substring(0, 4)}]${Reset} ${level_str.padEnd(5)} | ${line}` + ) + .join('\n') + '\n' + ); fs.writeSync(file, log_line + '\n'); fs.fsyncSync(file); } @@ -64,24 +92,15 @@ export function initMainProcessLog(): void { logFilePath = path.join(log_dir, fileName); const file = fs.openSync(logFilePath, 'w'); console.log('Init logging into', logFilePath); - oldLog = console.log; console.log = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args); - console.trace = (...args) => log(file, LogSource.MainProcess, LogLevel.Trace, ...args); - console.debug = (...args) => log(file, LogSource.MainProcess, LogLevel.Debug, ...args); + console.trace = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args); + console.debug = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args); console.info = (...args) => log(file, LogSource.MainProcess, LogLevel.Info, ...args); console.warn = (...args) => log(file, LogSource.MainProcess, LogLevel.Warn, ...args); console.error = (...args) => log(file, LogSource.MainProcess, LogLevel.Error, ...args); logLine = (...args) => log(file, ...args); - const oldGroupCollapsed = console.groupCollapsed; - console.groupCollapsed = (...args) => { - log(file, LogSource.MainProcess, LogLevel.GroupCollapsed, ...args); - oldGroupCollapsed(...args); - }; - const oldGroupEnd = console.groupEnd; - console.groupEnd = (...args) => { - log(file, LogSource.MainProcess, LogLevel.GroupEnd, ...args); - oldGroupEnd(...args); - }; + console.groupCollapsed = () => {}; + console.groupEnd = () => {}; } export async function exportDebugLogsToDisk(file: string): Promise { @@ -108,25 +127,3 @@ export async function exportDebugLogsToDisk(file: string): Promise { .on('error', reject); }); } - -type KeyOfType = keyof { - [P in keyof T as T[P] extends V ? P : never]: any; -}; - -function _mapLogFn(key: KeyOfType void>, level: LogLevel) { - const _oldFn: (...args: any[]) => void = console[key]; - console[key] = (...args: any[]) => { - _oldFn(...args); - sendLogLine(level, ...args); - }; -} -export function initRendererLog(): void { - _mapLogFn('log', LogLevel.Log); - _mapLogFn('trace', LogLevel.Trace); - _mapLogFn('debug', LogLevel.Debug); - _mapLogFn('info', LogLevel.Info); - _mapLogFn('warn', LogLevel.Warn); - _mapLogFn('error', LogLevel.Error); - _mapLogFn('groupCollapsed', LogLevel.GroupCollapsed); - _mapLogFn('groupEnd', LogLevel.GroupEnd); -} From 6d1b7f34b5902441757535c5c7d7b678c3138117 Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Wed, 11 Oct 2023 19:47:20 +0200 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=90=9B=20fix=20bugs=20in=20LanguageSe?= =?UTF-8?q?ttings=20page?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/src/pages/LanguageSettings.tsx | 40 ++++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/app/src/pages/LanguageSettings.tsx b/app/src/pages/LanguageSettings.tsx index 88839b9c..d93c290b 100644 --- a/app/src/pages/LanguageSettings.tsx +++ b/app/src/pages/LanguageSettings.tsx @@ -103,21 +103,23 @@ function ModelTable({ isDefault={null} action={ - dispatch(cancelDownload(model.task_uuid))} - hoverChild={} - defaultChild={ - - } - /> + + dispatch(cancelDownload(model.task_uuid))} + hoverChild={} + defaultChild={ + + } + /> + } key={model.model_id} @@ -221,19 +223,19 @@ function HoverSwitcher({ hoverChild: JSX.Element; defaultChild: JSX.Element; }): JSX.Element { - const [shownChild, setShownChild] = useState(defaultChild); + const [hover, setHover] = useState(false); return ( { - setShownChild(defaultChild); + setHover(false); }} onMouseOver={() => { - setShownChild(hoverChild); + setHover(true); }} > - {shownChild} + {hover ? hoverChild : defaultChild} ); } From cae71b5dc39e1bd0405ae67e24dd12bf69c40edf Mon Sep 17 00:00:00 2001 From: Jaro Habiger Date: Wed, 11 Oct 2023 19:50:52 +0200 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=91=82=20add=20whisper=20models=20for?= =?UTF-8?q?=20downloading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/src/pages/LanguageSettings.tsx | 10 + app/src/pages/ModelManager.tsx | 6 + app/src/state/models.ts | 1 + server/app/models.py | 95 ++++++---- server/app/models.yml | 248 ++++++++++++++++++------- server/scripts/generate_models_list.py | 40 +++- 6 files changed, 299 insertions(+), 101 deletions(-) diff --git a/app/src/pages/LanguageSettings.tsx b/app/src/pages/LanguageSettings.tsx index d93c290b..0a491d5c 100644 --- a/app/src/pages/LanguageSettings.tsx +++ b/app/src/pages/LanguageSettings.tsx @@ -264,6 +264,16 @@ export function LanguageSettingsPage(): JSX.Element { + + Whisper Models for {language.lang} + + + Transcription Models for {language.lang} diff --git a/app/src/pages/ModelManager.tsx b/app/src/pages/ModelManager.tsx index db6410e2..08f278b8 100644 --- a/app/src/pages/ModelManager.tsx +++ b/app/src/pages/ModelManager.tsx @@ -41,6 +41,7 @@ export function ModelManagerPage(): JSX.Element { Language Transcription Models + Whisper Models @@ -61,6 +62,11 @@ export function ModelManagerPage(): JSX.Element { lang={lang.lang} downloaded={downloaded} /> + diff --git a/app/src/state/models.ts b/app/src/state/models.ts index 672f1e13..db36bf6b 100644 --- a/app/src/state/models.ts +++ b/app/src/state/models.ts @@ -25,6 +25,7 @@ export interface Model { export interface Language { lang: string; transcription_models: Model[]; + whisper_models: Model[]; } export type DownloadingModel = Model & { diff --git a/server/app/models.py b/server/app/models.py index 0f0bcbc6..139805b3 100644 --- a/server/app/models.py +++ b/server/app/models.py @@ -8,6 +8,7 @@ from urllib.parse import urlparse from zipfile import ZipFile +import huggingface_hub import requests import yaml from vosk import Model @@ -40,7 +41,7 @@ class ModelDescription: size: str type: str lang: str - compressed: bool = field(default=False) + download_type: str = field(default=False) model_id: str = field(default=None) def __post_init__(self): @@ -58,9 +59,10 @@ def is_downloaded(self) -> bool: class Language: lang: str transcription_models: List[ModelDescription] = field(default_factory=list) + whisper_models: List[ModelDescription] = field(default_factory=list) def all_models(self): - return self.transcription_models + return self.transcription_models + self.whisper_models class ModelDefaultDict(defaultdict): @@ -81,6 +83,8 @@ def __init__(self): models[model_description.model_id] = model_description if model["type"] == "transcription": languages[lang].transcription_models.append(model_description) + elif model["type"] == "whisper": + languages[lang].whisper_models.append(model_description) self.available = dict(languages) self.model_descriptions = models @@ -122,38 +126,61 @@ def get(self, model_id: str) -> Union[Model]: def download(self, model_id: str, task_uuid: str): task: DownloadModelTask = tasks.get(task_uuid) model = self.get_model_description(model_id) - with tempfile.TemporaryFile(dir=CACHE_DIR) as f: - response = requests.get(model.url, stream=True) - task.total = int(response.headers.get("content-length")) - task.state = DownloadModelState.DOWNLOADING - - for data in response.iter_content( - chunk_size=max(int(task.total / 1000), 1024 * 1024) - ): - task.add_progress(len(data)) - - f.write(data) - if task.canceled: - return - - task.state = DownloadModelState.EXTRACTING - if model.compressed: - with ZipFile(f) as archive: - target_dir = model.path() - for info in archive.infolist(): - if info.is_dir(): - continue - path = target_dir / Path("/".join(info.filename.split("/")[1:])) - path.parent.mkdir(exist_ok=True, parents=True) - - source = archive.open(info.filename) - target = open(path, "wb") - with source, target: - shutil.copyfileobj(source, target) - else: - f.seek(0) - with open(model.path(), "wb") as target: - shutil.copyfileobj(f, target) + + if model.download_type.startswith("http"): + with tempfile.TemporaryFile(dir=CACHE_DIR) as f: + response = requests.get(model.url, stream=True) + task.total = int(response.headers.get("content-length")) + task.state = DownloadModelState.DOWNLOADING + + for data in response.iter_content( + chunk_size=max(int(task.total / 1000), 1024 * 1024) + ): + task.add_progress(len(data)) + + f.write(data) + if task.canceled: + return + + task.state = DownloadModelState.EXTRACTING + if model.download_type.endswith("+zip"): + with ZipFile(f) as archive: + target_dir = model.path() + for info in archive.infolist(): + if info.is_dir(): + continue + path = target_dir / Path( + "/".join(info.filename.split("/")[1:]) + ) + path.parent.mkdir(exist_ok=True, parents=True) + + source = archive.open(info.filename) + target = open(path, "wb") + with source, target: + shutil.copyfileobj(source, target) + else: + f.seek(0) + with open(model.path(), "wb") as target: + shutil.copyfileobj(f, target) + elif model.download_type == "huggingface": + api = huggingface_hub.HfApi() + repo_info = api.repo_info(model.url, files_metadata=True) + task.total = sum(f.size for f in repo_info.siblings) + with tempfile.TemporaryDirectory(dir=CACHE_DIR) as dir: + for f in repo_info.siblings: + url = huggingface_hub.hf_hub_url(model.url, f.rfilename) + with open(Path(dir) / f.rfilename, "wb") as file: + task.state = DownloadModelState.DOWNLOADING + response = requests.get(url, stream=True) + for data in response.iter_content( + chunk_size=max(int(task.total / 1000), 1024 * 1024) + ): + task.add_progress(len(data)) + + file.write(data) + if task.canceled: + return + shutil.copytree(dir, model.path()) task.state = DownloadModelState.DONE diff --git a/server/app/models.yml b/server/app/models.yml index 65659f2a..0560f6cd 100644 --- a/server/app/models.yml +++ b/server/app/models.yml @@ -1,253 +1,377 @@ # this file is autogenerated by the ../scripts/generate_models_list.py script. # do not edit manually! +Universal: +- name: whisper-tiny + url: guillaumekln/faster-whisper-tiny + description: Whisper model doing both transcription and punctuation reconstruction + size: 74M + type: whisper + download_type: huggingface +- name: whisper-base + url: guillaumekln/faster-whisper-base + description: Whisper model doing both transcription and punctuation reconstruction + size: 141M + type: whisper + download_type: huggingface +- name: whisper-small + url: guillaumekln/faster-whisper-small + description: Whisper model doing both transcription and punctuation reconstruction + size: 463M + type: whisper + download_type: huggingface +- name: whisper-medium + url: guillaumekln/faster-whisper-medium + description: Whisper model doing both transcription and punctuation reconstruction + size: 1459M + type: whisper + download_type: huggingface +- name: whisper-large-v1 + url: guillaumekln/faster-whisper-large-v1 + description: Whisper model doing both transcription and punctuation reconstruction + size: 2946M + type: whisper + download_type: huggingface +- name: whisper-large-v2 + url: guillaumekln/faster-whisper-large-v2 + description: Whisper model doing both transcription and punctuation reconstruction + size: 2946M + type: whisper + download_type: huggingface English: +- name: whisper-tiny.en + url: guillaumekln/faster-whisper-tiny.en + description: Whisper model doing both transcription and punctuation reconstruction + size: 74M + type: whisper + download_type: huggingface +- name: whisper-base.en + url: guillaumekln/faster-whisper-base.en + description: Whisper model doing both transcription and punctuation reconstruction + size: 140M + type: whisper + download_type: huggingface +- name: whisper-small.en + url: guillaumekln/faster-whisper-small.en + description: Whisper model doing both transcription and punctuation reconstruction + size: 463M + type: whisper + download_type: huggingface +- name: whisper-medium.en + url: guillaumekln/faster-whisper-medium.en + description: Whisper model doing both transcription and punctuation reconstruction + size: 1459M + type: whisper + download_type: huggingface - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip description: Lightweight wideband model for Android and RPi size: 40M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip description: Accurate generic US English model size: 1.8G type: transcription - compressed: true + download_type: http+zip - name: lgraph url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip description: Big US English model with dynamic graph size: 128M type: transcription - compressed: true + download_type: http+zip +- name: big-2 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip + description: Accurate generic US English model trained by Kaldi on Gigaspeech. + Mostly for podcasts, not for telephony + size: 2.3G + type: transcription + download_type: http+zip +- name: big-3 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905.zip + description: Wideband model for dictation from Kaldi-active-grammar + project + size: 1.0G + type: transcription + download_type: http+zip +- name: lgraph-2 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905-lgraph.zip + description: Wideband model for dictation from Kaldi-active-grammar + project with configurable graph + size: 129M + type: transcription + download_type: http+zip +- name: big-4 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-librispeech-0.2.zip + description: Repackaged Librispeech model from Kaldi, + not very accurate + size: 845M + type: transcription + download_type: http+zip +- name: small-2 + url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-zamia-0.5.zip + description: Repackaged Zamia model f_250, mainly for research + size: 49M + type: transcription + download_type: http+zip +- name: big-5 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-aspire-0.2.zip + description: Kaldi original ASPIRE model, not very accurate + size: 1.4G + type: transcription + download_type: http+zip +- name: big-6 + url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.21.zip + description: Wideband model previous generation + size: 1.6G + type: transcription + download_type: http+zip Indian English: - name: big url: https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip description: Generic Indian English model for telecom and broadcast size: 1G type: transcription - compressed: true + download_type: http+zip - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip description: Lightweight Indian English model for mobile applications size: 36M type: transcription - compressed: true + download_type: http+zip Chinese: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip description: Lightweight model for Android and RPi size: 42M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip description: Big generic Chinese model for server processing size: 1.3G type: transcription - compressed: true -Chinese Other: -- name: big + download_type: http+zip +- name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip description: Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM size: 1.5G type: transcription - compressed: true + download_type: http+zip Russian: - name: big url: https://alphacephei.com/vosk/models/vosk-model-ru-0.42.zip description: Big mixed band Russian model for servers size: 1.8G type: transcription - compressed: true + download_type: http+zip - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip description: Lightweight wideband model for Android/iOS and RPi size: 45M type: transcription - compressed: true -Russian Other: -- name: big + download_type: http+zip +- name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-ru-0.22.zip description: Big mixed band Russian model for servers size: 1.5G type: transcription - compressed: true -- name: big-2 + download_type: http+zip +- name: big-3 url: https://alphacephei.com/vosk/models/vosk-model-ru-0.10.zip description: Big narrowband Russian model for servers size: 2.5G type: transcription - compressed: true + download_type: http+zip French: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip description: Lightweight wideband model for Android/iOS and RPi size: 41M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip description: Big accurate model for servers size: 1.4G type: transcription - compressed: true -French Other: -- name: small + download_type: http+zip +- name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-fr-pguyot-0.3.zip description: Lightweight wideband model for Android and RPi trained by Paul Guyot size: 39M type: transcription - compressed: true + download_type: http+zip - name: linto-2.2 url: https://alphacephei.com/vosk/models/vosk-model-fr-0.6-linto-2.2.0.zip description: Model from LINTO project size: 1.5G type: transcription - compressed: true + download_type: http+zip German: - name: big url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip description: Big German model for telephony and server size: 1.9G type: transcription - compressed: true + download_type: http+zip - name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip description: Latest big wideband model from Tuda-DE project size: 4.4G type: transcription - compressed: true + download_type: http+zip - name: small + url: https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip + description: Zamia f_250 small model repackaged (not recommended) + size: 49M + type: transcription + download_type: http+zip +- name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip description: Lightweight wideband model for Android and RPi size: 45M type: transcription - compressed: true + download_type: http+zip Spanish: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip description: Lightweight wideband model for Android and RPi size: 39M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip description: Big model for Spanish size: 1.4G type: transcription - compressed: true + download_type: http+zip Portuguese/Brazilian Portuguese: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip description: Lightweight wideband model for Android and RPi size: 31M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip description: Big model from FalaBrazil size: 1.6G type: transcription - compressed: true + download_type: http+zip +Greek: +- name: big + url: https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip + description: Big narrowband Greek model for server processing, not extremely accurate + though + size: 1.1G + type: transcription + download_type: http+zip Turkish: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-tr-0.3.zip description: Lightweight wideband model for Android and RPi size: 35M type: transcription - compressed: true + download_type: http+zip Vietnamese: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip description: Lightweight Vietnamese model size: 32M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip description: Bigger Vietnamese model for server size: 78M type: transcription - compressed: true + download_type: http+zip Italian: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip description: Lightweight model for Android and RPi size: 48M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-it-0.22.zip description: Big generic Italian model for servers size: 1.2G type: transcription - compressed: true + download_type: http+zip Dutch: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip description: Lightweight model for Dutch size: 39M type: transcription - compressed: true -Dutch Other: + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6.zip description: Medium Dutch model from Kaldi_NL size: 860M type: transcription - compressed: true + download_type: http+zip - name: lgraph url: https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6-lgraph.zip description: Smaller model with dynamic graph size: 100M type: transcription - compressed: true + download_type: http+zip Catalan: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-ca-0.4.zip description: Lightweight wideband model for Android and RPi for Catalan size: 42M type: transcription - compressed: true + download_type: http+zip Arabic: - name: big url: https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip description: Repackaged Arabic model trained on MGB2 dataset from Kaldi size: 318M type: transcription - compressed: true + download_type: http+zip - name: big-2 url: https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip description: Big model from LINTO project size: 1.3G type: transcription - compressed: true + download_type: http+zip Farsi: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip description: Lightweight wideband model for Android and RPi for Farsi (Persian) size: 47M type: transcription - compressed: true + download_type: http+zip +- name: big + url: https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip + description: Model with large vocabulary, not yet accurate but better than before + (Persian) + size: 1G + type: transcription + download_type: http+zip - name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-fa-0.5.zip description: Bigger small model for desktop application (Persian) size: 60M type: transcription - compressed: true + download_type: http+zip Filipino: - name: big url: https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip description: Medium wideband model for Filipino (Tagalog) by feddybear size: 320M type: transcription - compressed: true + download_type: http+zip Ukrainian: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-nano.zip @@ -255,41 +379,41 @@ Ukrainian: Recognition for Ukrainian size: 73M type: transcription - compressed: true + download_type: http+zip - name: small-2 url: https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip description: Small model from Speech Recognition for Ukrainian size: 133M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip description: Bigger model from Speech Recognition for Ukrainian size: 343M type: transcription - compressed: true + download_type: http+zip - name: lgraph url: https://alphacephei.com/vosk/models/vosk-model-uk-v3-lgraph.zip description: Big dynamic model from Speech Recognition for Ukrainian size: 325M type: transcription - compressed: true + download_type: http+zip Kazakh: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-kz-0.15.zip description: Small mobile model from SAIDA_Kazakh size: 42M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-kz-0.15.zip description: Bigger wideband model SAIDA_Kazakh size: 378M type: transcription - compressed: true + download_type: http+zip Swedish: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip @@ -297,68 +421,68 @@ Swedish: project size: 289M type: transcription - compressed: true + download_type: http+zip Japanese: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip description: Lightweight wideband model for Japanese size: 48M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip description: Big model for Japanese size: 1Gb type: transcription - compressed: true + download_type: http+zip Esperanto: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-eo-0.42.zip description: Lightweight model for Esperanto size: 42M type: transcription - compressed: true + download_type: http+zip Hindi: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip description: Lightweight model for Hindi size: 42M type: transcription - compressed: true + download_type: http+zip - name: big url: https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip description: Big accurate model for servers size: 1.5Gb type: transcription - compressed: true + download_type: http+zip Czech: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip description: Lightweight model for Czech from Rhasspy project size: 44M type: transcription - compressed: true + download_type: http+zip Polish: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip description: Lightweight model for Polish size: 50M type: transcription - compressed: true + download_type: http+zip Uzbek: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip description: Lightweight model for Uzbek size: 49M type: transcription - compressed: true + download_type: http+zip Korean: - name: small url: https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip description: Lightweight model for Korean size: 82M type: transcription - compressed: true + download_type: http+zip Breton: - name: big url: https://alphacephei.com/vosk/models/vosk-model-br-0.8.zip @@ -366,4 +490,4 @@ Breton: project size: 70M type: transcription - compressed: true + download_type: http+zip diff --git a/server/scripts/generate_models_list.py b/server/scripts/generate_models_list.py index 2d3737ef..2092a5d6 100644 --- a/server/scripts/generate_models_list.py +++ b/server/scripts/generate_models_list.py @@ -1,12 +1,45 @@ from collections import defaultdict from pathlib import Path +import huggingface_hub import requests import yaml from bs4 import BeautifulSoup +WHISPER_MODELS = { + "tiny": "guillaumekln/faster-whisper-tiny", + "base": "guillaumekln/faster-whisper-base", + "small": "guillaumekln/faster-whisper-small", + "medium": "guillaumekln/faster-whisper-medium", + "large-v1": "guillaumekln/faster-whisper-large-v1", + "large-v2": "guillaumekln/faster-whisper-large-v2", + "tiny.en": "guillaumekln/faster-whisper-tiny.en", + "base.en": "guillaumekln/faster-whisper-base.en", + "small.en": "guillaumekln/faster-whisper-small.en", + "medium.en": "guillaumekln/faster-whisper-medium.en", +} + HARDCODED_MODELS = [] +models = [] + +api = huggingface_hub.HfApi() +for name, url in WHISPER_MODELS.items(): + repo_info = api.repo_info(url, files_metadata=True) + models.append( + { + "lang": "English" if name.endswith(".en") else "Universal", + "name": f"whisper-{name}", + "url": url, + "description": "Whisper model doing both transcription and punctuation reconstruction", + "size": f"{int(sum(f.size for f in repo_info.siblings) / 1024 / 1024)}M", + "type": "whisper", + "download_type": "huggingface", + }, + ) + +models.extend(HARDCODED_MODELS) + r = requests.get("https://alphacephei.com/vosk/models") assert r.status_code == 200 soup = BeautifulSoup(r.content, "html.parser") @@ -14,8 +47,6 @@ columns = [x.text for x in table.find_all("th")] rows = table.find("tbody").find_all("tr") - -models = HARDCODED_MODELS current_lang = None for row in rows: if strong := row.find("strong"): @@ -26,8 +57,7 @@ ), "no previous language heading found, probably the format changed :(" raw = {k: v for k, v in zip(columns, row.find_all("td"))} - if current_lang == "English Other" or "not" in raw["Notes"].text.lower(): - continue + current_lang = current_lang.replace("Other", "").strip() if current_lang == "Speaker identification model": continue @@ -46,7 +76,7 @@ description=raw["Notes"].decode_contents(), size=raw["Size"].text, type="transcription", - compressed=True, + download_type="http+zip", ) models += [model]