Add files via upload
This commit is contained in:
40
workspace/api.py
Normal file
40
workspace/api.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import uvicorn
|
||||
import torch
|
||||
from fastapi import FastAPI, status, Path, Query, File, UploadFile, Form, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from app import *
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
class HealthCheck(BaseModel):
|
||||
"""Response model to validate and return when performing a health check."""
|
||||
global request_id
|
||||
status: str = "OK"
|
||||
sttid: str = request_id
|
||||
timestamp: int = math.floor(time.time())
|
||||
|
||||
@app.get(
|
||||
"/health",
|
||||
tags=["healthcheck"],
|
||||
summary="Perform a Health Check",
|
||||
response_description="Return HTTP Status Code 200 (OK)",
|
||||
status_code=status.HTTP_200_OK,
|
||||
response_model=HealthCheck,
|
||||
)
|
||||
async def get_health() -> HealthCheck:
|
||||
"""
|
||||
## Perform a Health Check
|
||||
Endpoint to perform a healthcheck on. This endpoint can primarily be used Docker
|
||||
to ensure a robust container orchestration and management is in place. Other
|
||||
services which rely on proper functioning of the API service will not deploy if this
|
||||
endpoint returns any other HTTP status code except 200 (OK).
|
||||
Returns:
|
||||
HealthCheck: Returns a JSON response with the health status
|
||||
"""
|
||||
global request_id
|
||||
return HealthCheck(status="OK", sttid=request_id, timestamp=math.floor(time.time()))
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def read_root():
|
||||
return {"hello":"world!"}
|
||||
2
workspace/app.py
Normal file
2
workspace/app.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
147
workspace/config.json5
Normal file
147
workspace/config.json5
Normal file
@@ -0,0 +1,147 @@
|
||||
{
|
||||
"models": [
|
||||
// Configuration for the built-in models. You can remove any of these
|
||||
// if you don't want to use the default models.
|
||||
{
|
||||
"name": "tiny",
|
||||
"url": "tiny"
|
||||
},
|
||||
{
|
||||
"name": "base",
|
||||
"url": "base"
|
||||
},
|
||||
{
|
||||
"name": "small",
|
||||
"url": "small"
|
||||
},
|
||||
{
|
||||
"name": "medium",
|
||||
"url": "medium"
|
||||
},
|
||||
{
|
||||
"name": "large",
|
||||
"url": "large"
|
||||
},
|
||||
{
|
||||
"name": "large-v2",
|
||||
"url": "large-v2"
|
||||
},
|
||||
{
|
||||
"name": "large-v3",
|
||||
"url": "large-v3"
|
||||
},
|
||||
// Uncomment to add custom Japanese models
|
||||
//{
|
||||
// "name": "whisper-large-v2-mix-jp",
|
||||
// "url": "vumichien/whisper-large-v2-mix-jp",
|
||||
// // The type of the model. Can be "huggingface" or "whisper" - "whisper" is the default.
|
||||
// // HuggingFace models are loaded using the HuggingFace transformers library and then converted to Whisper models.
|
||||
// "type": "huggingface",
|
||||
//},
|
||||
//{
|
||||
// "name": "local-model",
|
||||
// "url": "path/to/local/model",
|
||||
//},
|
||||
//{
|
||||
// "name": "remote-model",
|
||||
// "url": "https://example.com/path/to/model",
|
||||
//}
|
||||
],
|
||||
// Configuration options that will be used if they are not specified in the command line arguments.
|
||||
|
||||
// * WEBUI options *
|
||||
|
||||
// Maximum audio file length in seconds, or -1 for no limit. Ignored by CLI.
|
||||
"input_audio_max_duration": 600,
|
||||
// True to share the app on HuggingFace.
|
||||
"share": false,
|
||||
// The host or IP to bind to. If None, bind to localhost.
|
||||
"server_name": null,
|
||||
// The port to bind to.
|
||||
"server_port": 7860,
|
||||
// The number of workers to use for the web server. Use -1 to disable queueing.
|
||||
"queue_concurrency_count": 1,
|
||||
// Whether or not to automatically delete all uploaded files, to save disk space
|
||||
"delete_uploaded_files": true,
|
||||
|
||||
// * General options *
|
||||
|
||||
// The default implementation to use for Whisper. Can be "whisper" or "faster-whisper".
|
||||
// Note that you must either install the requirements for faster-whisper (requirements-fasterWhisper.txt)
|
||||
// or whisper (requirements.txt)
|
||||
"whisper_implementation": "whisper",
|
||||
// "diarization": true
|
||||
|
||||
// The default model name.
|
||||
"default_model_name": "medium",
|
||||
// The default VAD.
|
||||
"default_vad": "silero-vad",
|
||||
// A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.
|
||||
"vad_parallel_devices": "",
|
||||
// The number of CPU cores to use for VAD pre-processing.
|
||||
"vad_cpu_cores": 1,
|
||||
// The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.
|
||||
"vad_process_timeout": 1800,
|
||||
// True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.
|
||||
"auto_parallel": false,
|
||||
// Directory to save the outputs (CLI will use the current directory if not specified)
|
||||
"output_dir": null,
|
||||
// The path to save model files; uses ~/.cache/whisper by default
|
||||
"model_dir": null,
|
||||
// Device to use for PyTorch inference, or Null to use the default device
|
||||
"device": null,
|
||||
// Whether to print out the progress and debug messages
|
||||
"verbose": true,
|
||||
// Whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')
|
||||
"task": "transcribe",
|
||||
// Language spoken in the audio, specify None to perform language detection
|
||||
// "language": "ko",
|
||||
"language": "",
|
||||
// The window size (in seconds) to merge voice segments
|
||||
"vad_merge_window": 5,
|
||||
// The maximum size (in seconds) of a voice segment
|
||||
"vad_max_merge_size": 30,
|
||||
// The padding (in seconds) to add to each voice segment
|
||||
"vad_padding": 1,
|
||||
// Whether or not to prepend the initial prompt to each VAD segment (prepend_all_segments), or just the first segment (prepend_first_segment)
|
||||
"vad_initial_prompt_mode": "prepend_first_segment",
|
||||
// The window size of the prompt to pass to Whisper
|
||||
"vad_prompt_window": 3,
|
||||
// Temperature to use for sampling
|
||||
"temperature": 0,
|
||||
// Number of candidates when sampling with non-zero temperature
|
||||
"best_of": 5,
|
||||
// Number of beams in beam search, only applicable when temperature is zero
|
||||
"beam_size": 5,
|
||||
// Optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
|
||||
"patience": 1,
|
||||
// Optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
|
||||
"length_penalty": null,
|
||||
// Comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations
|
||||
"suppress_tokens": "-1",
|
||||
// Optional text to provide as a prompt for the first window
|
||||
"initial_prompt": null,
|
||||
// If True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop
|
||||
"condition_on_previous_text": true,
|
||||
// Whether to perform inference in fp16; True by default
|
||||
"fp16": true,
|
||||
// The compute type used by faster-whisper. Can be "int8". "int16" or "float16".
|
||||
"compute_type": "auto",
|
||||
// Temperature to increase when falling back when the decoding fails to meet either of the thresholds below
|
||||
"temperature_increment_on_fallback": 0.2,
|
||||
// If the gzip compression ratio is higher than this value, treat the decoding as failed
|
||||
"compression_ratio_threshold": 2.4,
|
||||
// If the average log probability is lower than this value, treat the decoding as failed
|
||||
"logprob_threshold": -1.0,
|
||||
// If the probability of the <no-speech> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence
|
||||
"no_speech_threshold": 0.6,
|
||||
|
||||
// (experimental) extract word-level timestamps and refine the results based on them
|
||||
"word_timestamps": false,
|
||||
// if word_timestamps is True, merge these punctuation symbols with the next word
|
||||
"prepend_punctuations": "\"\'“¿([{-",
|
||||
// if word_timestamps is True, merge these punctuation symbols with the previous word
|
||||
"append_punctuations": "\"\'.。,,!!??::”)]}、",
|
||||
// (requires --word_timestamps True) underline each word as it is spoken in srt and vtt
|
||||
"highlight_words": false,
|
||||
}
|
||||
11
workspace/option.py
Normal file
11
workspace/option.py
Normal file
@@ -0,0 +1,11 @@
|
||||
import torch
|
||||
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
class dict_options:
|
||||
opt = {}
|
||||
opt["device"] =device
|
||||
opt["report_host"] = "http://localhost:7890"
|
||||
|
||||
def call_default(self):
|
||||
return self.opt
|
||||
Reference in New Issue
Block a user