Add files via upload

2024-12-06 11:08:58 +09:00
parent d3272c8749
commit 71ee45cbd4
4 changed files with 200 additions and 0 deletions
--- a/workspace/api.py
+++ b/workspace/api.py
@@ -0,0 +1,40 @@
+import uvicorn
+import torch
+from fastapi import FastAPI, status, Path, Query, File, UploadFile, Form, Request
+from fastapi.responses import HTMLResponse
+from app import *
+
+app = FastAPI()
+
+class HealthCheck(BaseModel):
+    """Response model to validate and return when performing a health check."""
+    global request_id
+    status: str = "OK"
+    sttid: str = request_id
+    timestamp: int = math.floor(time.time())
+
+@app.get(
+    "/health",
+    tags=["healthcheck"],
+    summary="Perform a Health Check",
+    response_description="Return HTTP Status Code 200 (OK)",
+    status_code=status.HTTP_200_OK,
+    response_model=HealthCheck,
+)
+async def get_health() -> HealthCheck:
+    """
+    ## Perform a Health Check
+    Endpoint to perform a healthcheck on. This endpoint can primarily be used Docker
+    to ensure a robust container orchestration and management is in place. Other
+    services which rely on proper functioning of the API service will not deploy if this
+    endpoint returns any other HTTP status code except 200 (OK).
+    Returns:
+        HealthCheck: Returns a JSON response with the health status
+    """
+    global request_id
+    return HealthCheck(status="OK", sttid=request_id, timestamp=math.floor(time.time()))
+
+
+@app.get("/")
+def read_root():
+    return {"hello":"world!"}
--- a/workspace/app.py
+++ b/workspace/app.py
@@ -0,0 +1,2 @@
+import numpy as np
+import torch
--- a/workspace/config.json5
+++ b/workspace/config.json5
@@ -0,0 +1,147 @@
+{
+    "models": [
+        // Configuration for the built-in models. You can remove any of these 
+        // if you don't want to use the default models.
+        {
+            "name": "tiny",
+            "url": "tiny" 
+        },
+        {
+            "name": "base",
+            "url": "base"
+        },
+        {
+            "name": "small",
+            "url": "small"
+        },
+        {
+            "name": "medium",
+            "url": "medium"
+        },
+        {
+            "name": "large",
+            "url": "large"
+        },
+        {
+            "name": "large-v2",
+            "url": "large-v2"
+        },
+        {
+            "name": "large-v3",
+            "url": "large-v3"
+        },
+        // Uncomment to add custom Japanese models
+        //{
+        //    "name": "whisper-large-v2-mix-jp",
+        //    "url": "vumichien/whisper-large-v2-mix-jp",
+        //    // The type of the model. Can be "huggingface" or "whisper" - "whisper" is the default.
+        //    // HuggingFace models are loaded using the HuggingFace transformers library and then converted to Whisper models.
+        //    "type": "huggingface",
+        //},
+        //{
+        //    "name": "local-model",
+        //    "url": "path/to/local/model",
+        //},
+        //{
+        //    "name": "remote-model",
+        //    "url": "https://example.com/path/to/model",
+        //}
+    ],
+    // Configuration options that will be used if they are not specified in the command line arguments.
+
+    // * WEBUI options *
+
+    // Maximum audio file length in seconds, or -1 for no limit. Ignored by CLI.
+    "input_audio_max_duration": 600,
+    // True to share the app on HuggingFace.
+    "share": false,
+    // The host or IP to bind to. If None, bind to localhost.
+    "server_name": null,
+    // The port to bind to.
+    "server_port": 7860,
+    // The number of workers to use for the web server. Use -1 to disable queueing.
+    "queue_concurrency_count": 1,
+    // Whether or not to automatically delete all uploaded files, to save disk space
+    "delete_uploaded_files": true,
+
+    // * General options *
+
+    // The default implementation to use for Whisper. Can be "whisper" or "faster-whisper".
+    // Note that you must either install the requirements for faster-whisper (requirements-fasterWhisper.txt) 
+    // or whisper (requirements.txt)
+    "whisper_implementation": "whisper",
+    // "diarization": true
+
+    // The default model name.
+    "default_model_name": "medium",
+    // The default VAD.
+    "default_vad": "silero-vad",
+    // A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.
+    "vad_parallel_devices": "",
+    // The number of CPU cores to use for VAD pre-processing.
+    "vad_cpu_cores": 1,
+    // The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.
+    "vad_process_timeout": 1800,
+    // True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.
+    "auto_parallel": false,
+    // Directory to save the outputs (CLI will use the current directory if not specified)
+    "output_dir": null,
+    // The path to save model files; uses ~/.cache/whisper by default
+    "model_dir": null,
+    // Device to use for PyTorch inference, or Null to use the default device
+    "device": null,
+    // Whether to print out the progress and debug messages
+    "verbose": true,
+    // Whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')
+    "task": "transcribe",
+    // Language spoken in the audio, specify None to perform language detection
+    // "language": "ko",
+    "language": "",
+    // The window size (in seconds) to merge voice segments
+    "vad_merge_window": 5,
+    // The maximum size (in seconds) of a voice segment
+    "vad_max_merge_size": 30,
+    // The padding (in seconds) to add to each voice segment
+    "vad_padding": 1,
+    // Whether or not to prepend the initial prompt to each VAD segment (prepend_all_segments), or just the first segment (prepend_first_segment)
+    "vad_initial_prompt_mode": "prepend_first_segment",
+    // The window size of the prompt to pass to Whisper
+    "vad_prompt_window": 3,
+    // Temperature to use for sampling
+    "temperature": 0,
+    // Number of candidates when sampling with non-zero temperature
+    "best_of": 5,
+    // Number of beams in beam search, only applicable when temperature is zero
+    "beam_size": 5,
+    // Optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search
+    "patience": 1,
+    // Optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default
+    "length_penalty": null,
+    // Comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations
+    "suppress_tokens": "-1",
+    // Optional text to provide as a prompt for the first window
+    "initial_prompt": null,
+    // If True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop
+    "condition_on_previous_text": true,
+    // Whether to perform inference in fp16; True by default
+    "fp16": true,
+    // The compute type used by faster-whisper. Can be "int8". "int16" or "float16".
+    "compute_type": "auto",
+    // Temperature to increase when falling back when the decoding fails to meet either of the thresholds below
+    "temperature_increment_on_fallback": 0.2,
+    // If the gzip compression ratio is higher than this value, treat the decoding as failed
+    "compression_ratio_threshold": 2.4,
+    // If the average log probability is lower than this value, treat the decoding as failed
+    "logprob_threshold": -1.0,
+    // If the probability of the <no-speech> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence
+    "no_speech_threshold": 0.6,
+
+    // (experimental) extract word-level timestamps and refine the results based on them
+    "word_timestamps": false, 
+    // if word_timestamps is True, merge these punctuation symbols with the next word
+    "prepend_punctuations": "\"\'“¿([{-",
+    // if word_timestamps is True, merge these punctuation symbols with the previous word
+    "append_punctuations": "\"\'.。,，!！?？:：”)]}、",
+    // (requires --word_timestamps True) underline each word as it is spoken in srt and vtt
+    "highlight_words": false,
+}
--- a/workspace/option.py
+++ b/workspace/option.py
@@ -0,0 +1,11 @@
+import torch
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+class dict_options:
+    opt = {}
+    opt["device"] =device
+    opt["report_host"] = "http://localhost:7890"
+
+    def call_default(self):
+        return self.opt