first commit

2025-01-08 14:01:53 +09:00
commit e387382951
21 changed files with 72432 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,26 @@
+## editors
+/.idea
+/.vscode
+
+## system files
+.DS_Store
+
+## npm
+/node_modules/
+/npm-debug.log
+
+## testing
+/coverage/
+
+## temp folders
+/.tmp/
+
+# build
+/_site/
+/dist/
+/out-tsc/
+
+storybook-static
+custom-elements.json
+
+yarn.lock
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Ed Summers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,36 @@
+# whisper-transcript
+
+이 웹 컴포넌트는 Whisper의 JSON 출력 결과를 그래픽으로 시각화하여 각 단어의 신뢰도와 타이밍을 확인할 수 있게 해줍니다.
+
+## 설치
+
+```bash
+yarn add whisper-transcript
+```
+
+## 사용법
+
+모듈을 로드합니다.:
+
+```html
+<script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"></script>
+
+그리고 아래와 같이 사용합니다.
+
+<whisper-transcript audio="media.mp3" url="media.json"></whisper-transcript>
+```
+
+비디오 파일도 가능합니다.:
+
+```html
+<whisper-transcript video="media.mp4" url="media.json"></whisper-transcript>
+```
+
+## Develop
+
+```bash
+yarn install
+yarn start
+```
+
+이 명령어는 demo 디렉토리에 있는 페이지를 엽니다. 필요하다면 audio.mp3와 audio.json 파일을 교체하여 생성한 스크립트를 테스트하고 확인할 수 있습니다.
--- a/demo/audio.json
+++ b/demo/audio.json
--- a/demo/audio.mp3
+++ b/demo/audio.mp3
--- a/demo/index.html
+++ b/demo/index.html
@@ -0,0 +1,33 @@
+<!doctype html>
+<html lang="en-GB">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
+  <style>
+    body {
+      background: #fafafa;
+      font-size: 1.5em;
+      font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Inter,Helvetica,Arial,sans-serif;
+      margin: 5% 10% 5% 10%;
+      background-color: black;
+    }
+  </style>
+</head>
+<body>
+  <div id="demo"></div>
+
+  <script type="module">
+    import { html, render } from 'lit';
+    import '../whisper-transcript.js';
+
+    render(
+      html`
+        <tooltip-color-guide></tooltip-color-guide>
+
+        <whisper-transcript audio="audio.mp3" url="audio.json"></whisper-transcript>
+      `,
+      document.querySelector('#demo')
+    );
+  </script>
+</body>
+</html>
--- a/docs/audio.json
+++ b/docs/audio.json
--- a/docs/audio.mp3
+++ b/docs/audio.mp3
--- a/docs/index.html
+++ b/docs/index.html
@@ -0,0 +1,51 @@
+<!doctype html>
+<html lang="en-GB">
+  <head>
+    <meta charset="utf-8">
+    <title>whisper-transcript demo</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
+    <style>
+      body {
+        background: #fafafa;
+        font-size: 1.5em;
+        font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Inter,Helvetica,Arial,sans-serif;
+        margin: 2% 10% 5% 10%;
+      }
+
+      h1 {
+        text-align: center;
+      }
+    </style>
+    <script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"></script>
+  </head>
+
+  <body>
+    <h1>&lt;whisper-transcript&gt;</h1>
+
+    <p>This is an example of using the <a
+      href="https://github.com/edsu/whisper-transcript">whisper-transcript</a>
+    <a href="https://en.wikipedia.org/wiki/Web_Components">web component</a> to
+    display a <a href="http://github.com/openai/whisper">Whisper</a>
+    speech-to-text transcript that was generated using word
+    timestamps in order to visualize the confidence scores in the context of the
+    words and their sound:</p>
+
+    <pre><code>$ whisper --model small audio.mp3 --word_timestamps True</code></pre>
+
+    <p>Running this command generates an <a href="audio.json">audio.json</a> file which contains
+    detailed timestamp information for each word, and also a probability score
+    that indicates the confidence in Whisper's transcription. Note: I
+    intentionally ran with the small model in order to get more varied
+    probabilities in this example.</p>
+
+    <p>View the HTML source to see how to use the whisper-transcript web
+    component in your own pages. But it's basically: </p>
+
+    <pre><code>
+      &lt;script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"&gt;&lt;/script&gt;
+      &lt;whisper-transcript audio="audio.mp3" url="audio.json"&gt;&lt;/whisper-transcript&gt;
+    </code></pre>
+
+    <whisper-transcript audio="audio.mp3" url="audio.json"></whisper-transcript>
+  </body>
+</html>
--- a/docs/index.js.map
+++ b/docs/index.js.map
--- a/docs/whisper-transcript.min.js
+++ b/docs/whisper-transcript.min.js
--- a/index.js
+++ b/index.js
@@ -0,0 +1,6 @@
+import './whisper-transcript'; 
+
+export { WhisperTranscript } from './src/WhisperTranscript.js';
+export { WhisperSegment } from './src/WhisperSegment.js';
+export { WhisperWord } from './src/WhisperWord.js';
+export { WhisperMedia } from './src/WhisperMedia.js';
--- a/package.json
+++ b/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "whisper-transcript",
+  "description": "A web-component for viewing a Whisper JSON transcript",
+  "repository": "github:edsu/whisper-transcript",
+  "license": "MIT",
+  "author": "whisper-transcript",
+  "version": "0.0.7",
+  "type": "module",
+  "main": "index.js",
+  "module": "index.js",
+  "exports": {
+    ".": "./index.js",
+    "./whisper-transcript.js": "./whisper-transcript.js"
+  },
+  "scripts": {
+    "analyze": "cem analyze --litelement",
+    "start": "web-dev-server",
+    "build": "rollup -c"
+  },
+  "dependencies": {
+    "lit": "^3.1.4",
+    "whisper-transcript": "^0.0.7"
+  },
+  "devDependencies": {
+    "@custom-elements-manifest/analyzer": "^0.10.3",
+    "@web/dev-server": "^0.4.6",
+    "koa-range": "^0.3.0",
+    "rollup": "^4.27.3",
+    "rollup-plugin-node-resolve": "^5.2.0"
+  },
+  "customElements": "custom-elements.json"
+}
--- a/rollup.config.js
+++ b/rollup.config.js
@@ -0,0 +1,14 @@
+import resolve from 'rollup-plugin-node-resolve';
+
+
+export default {
+  input: ['index.js'],
+  output: {
+    file: 'dist/index.js', 
+    format: 'es',
+    sourcemap: true
+  },
+  plugins: [
+    resolve()
+  ]
+};
--- a/src/WhisperMedia.js
+++ b/src/WhisperMedia.js
@@ -0,0 +1,51 @@
+import { html, css, LitElement } from 'lit';
+
+export class WhisperMedia extends LitElement {
+  static styles = css`
+    audio {
+      width: 100%;
+    }
+
+    video {
+      max-height: 200px;
+      margin-left: auto;
+      margin-right: auto;
+    }
+  `;
+
+  static properties = {
+    audio: {type: String},
+    video: {type: String}
+  };
+
+  updateTime(time) {
+    window.dispatchEvent(
+      new CustomEvent("update-time", {
+        detail: {
+          time
+        }
+      })
+    );
+  }
+
+  render() {
+    let media = null;
+    if (this.audio) {
+      media = document.createElement('audio', this.audio);
+      media.src = this.audio;
+    } else {
+      media = document.createElement('video', this.video);
+      media.src = this.video;
+    }
+
+    if (media) {
+      media.controls = true;
+      media.preload = "auto";
+      media.ontimeupdate = (_) => this.updateTime(media.currentTime);
+      window.addEventListener('update-player-time', e => media.currentTime = e.detail.time);
+
+      return html`${media}`;
+    }
+  }
+}
+
--- a/src/WhisperSegment.js
+++ b/src/WhisperSegment.js
@@ -0,0 +1,82 @@
+import { html, css, LitElement } from 'lit';
+
+export class WhisperSegment extends LitElement {
+  static properties = {
+    text: { type: String },
+    start: { type: Number },
+    end: { type: Number },
+    words: { type: Array },
+    selected: { type: Boolean }
+  };
+
+  static styles = css`
+    .segment {
+      border: 2px solid #333;
+      padding: 5px;
+      margin: 2px;
+      border-radius: 5px;
+      display: flex;
+      flex-direction: row;
+    }
+
+    .selected {
+      background-color: #555;
+      border-color: black;
+    }
+
+    .times {
+      width: 325px;
+      float: left;
+      color: lightgray;
+      margin-right: 10px;
+    }
+
+    .words {
+      width: 100%;
+    }
+  `
+
+  constructor() {
+    super();
+
+    this.selected = false;
+
+    const that = this;
+    window.addEventListener('update-time', e => that.updateTime(e.detail.time));
+  }
+
+  updateTime(time) {
+    if ((time >= this.start) && (time <= this.end)) {
+      this.selected = true;
+    } else {
+      this.selected = false;
+    }
+  }
+
+  render() {
+    if (this.words) {
+      return html`
+        <li class="${this.selected ? 'selected' : ''} segment">
+          <div class="times">${hms(this.start)} - ${hms(this.end)}</div>
+          <div class="words">
+          ${this.words.map(w =>
+            html`<whisper-word title="${w.confidence || w.probability}" word="${w.text || w.word}" start="${w.start}" end="${w.end}" probability="${w.confidence}" />`
+          )}
+          </div>
+        </li>
+      `;
+    }
+  }
+}
+
+function hms(secs) {
+  const h = Math.trunc(secs / 60 / 60);
+  const m = Math.trunc((secs - (h * 60)) / 60);
+  const s = Math.trunc(secs) - (h * 60 + m * 60);
+
+  return `${pad(h)}:${pad(m)}:${pad(s)}`;
+}
+
+function pad(i) {
+  return String(i).padStart(2, '0');
+}
--- a/src/WhisperTranscript.js
+++ b/src/WhisperTranscript.js
@@ -0,0 +1,79 @@
+import { html, css, LitElement } from 'lit';
+
+export class WhisperTranscript extends LitElement {
+  static styles = css`
+    :host {
+      display: block;
+      padding: 25px;
+      color: var(--whisper-transcript-text-color, #000);
+    }
+
+    ul {
+      list-style: none;
+      padding-left: 0;
+    }
+
+    .media {
+      text-align: center;
+    }
+
+    .whisper-transcript {
+      background: black;
+      color: white;
+    }
+  `;
+
+  static properties = {
+    url: {type: String},
+    audio: {type: String},
+    video: {type: String},
+    transcript: {type: Object, attribute: false},
+    time: {type: Number}
+  };
+
+  constructor() {
+    super();
+    this.time = 0;
+  }
+
+  connectedCallback() {
+    super.connectedCallback();
+    this.getTranscript();
+
+    const that = this;
+    window.addEventListener('update-time', e => that.time = e.detail.time);
+  }
+
+  async getTranscript() {
+    const resp = await fetch(this.url);
+    this.transcript = await resp.json();
+  }
+
+  render() {
+    if (! this.transcript) {
+      return html`Loading...`;
+    }
+
+    let media = null;
+
+
+    if (this.audio) {
+      media = html`<whisper-media audio="${this.audio}"></whisper-media>`;
+    } else {
+      media = html`<whisper-media video="${this.video}"></whisper-media>`;
+    }
+
+    return html`
+      <div class="whisper-transcript">
+        <div class="media">
+          ${media}
+        </div>
+        <ul>
+          ${this.transcript.segments.map(s =>
+            html`<whisper-segment .words="${s.words}" start="${s.start}" end="${s.end}" text="${s.text}" />`
+          )}
+        </ul>
+      </div>
+    `;
+  }
+}
--- a/src/WhisperWord.js
+++ b/src/WhisperWord.js
@@ -0,0 +1,91 @@
+import { html, css, LitElement } from 'lit';
+
+export class WhisperWord extends LitElement {
+
+  static properties = {
+    word: {type: String},
+    probability: {type: Number},
+    start: {type: Number},
+    end: {type: Number},
+    selected: {type: Boolean}
+  }
+
+  static styles = css`
+    span.word {
+      cursor: pointer;
+    }
+
+    span.selected {
+      text-decoration: underline;
+    }
+
+    span.mediocre {
+      color: yellow;
+    }
+
+    span.poor {
+      color: orange;
+    }
+
+    span.terrible {
+      color: red;
+    }
+  `;
+
+  constructor() {
+    super();
+
+    this.addEventListener('click', _ => this.updatePlayerTime());
+  }
+
+  connectedCallback() {
+    super.connectedCallback();
+
+    const that = this;
+    window.addEventListener('update-time', e => that.updateTime(e.detail.time));
+  }
+
+  updatePlayerTime() {
+    window.dispatchEvent(
+      new CustomEvent("update-player-time", {
+        detail: {
+          time: this.start
+        }
+      })
+    );
+  }
+
+  updateTime(time) {
+    if ((time >= this.start) && (time <= this.end)) {
+      this.selected = true;
+    } else {
+      this.selected = false;
+    }
+  }
+
+  getCssClass() {
+    let style = this.selected ? 'selected ' : '';
+    if (this.probability > .9) {
+      style += 'good';
+    } else if (this.probability > .7) {
+      style += 'mediocre';
+    } else if (this.probability > .5) {
+      style += 'poor';
+    } else {
+      style += 'terrible';
+    }
+
+    return style;
+  }
+
+  render() {
+    return html`
+      <span
+        data-start="${this.start}"
+        data-end="${this.end}"
+        class="word ${this.getCssClass()}">
+        ${this.word}
+      </span>
+    `
+  }
+}
--- a/src/tooltip.js
+++ b/src/tooltip.js
@@ -0,0 +1,56 @@
+import { html, css, LitElement } from 'lit';
+
+export class TooltipColorGuide extends LitElement {
+  static styles = css`
+    .tooltip {
+      position: relative;
+      display: inline-block;
+      cursor: pointer;
+    }
+
+    .tooltip-text {
+      visibility: hidden;
+      width: 200px;
+      background-color: black;
+      color: white;
+      text-align: left;
+      border-radius: 5px;
+      padding: 10px;
+      position: absolute;
+      z-index: 1;
+      top: 125%;
+      left: 50%;
+      transform: translateX(-50%);
+      opacity: 0;
+      transition: opacity 0.3s;
+    }
+    .tooltip span {
+      color : white;
+    }
+    .tooltip:hover .tooltip-text {
+      visibility: visible;
+      opacity: 1;
+    }
+
+    .color-box {
+      display: inline-block;
+      width: 12px;
+      height: 12px;
+      margin-right: 5px;
+    }
+  `;
+
+  render() {
+    return html`
+      <div class="tooltip">
+        <span>Color Guide</span>
+        <div class="tooltip-text">
+          <div><span class="color-box" style="background-color: white;"></span>this.probability > 0.9: (White)</div>
+          <div><span class="color-box" style="background-color: yellow;"></span>this.probability > 0.7: (Yellow)</div>
+          <div><span class="color-box" style="background-color: orange;"></span>this.probability > 0.5: (Orange)</div>
+          <div><span class="color-box" style="background-color: red;"></span>this.probability ≤ 0.5: (Red)</div>
+        </div>
+      </div>
+    `;
+  }
+}
--- a/web-dev-server.config.js
+++ b/web-dev-server.config.js
@@ -0,0 +1,33 @@
+// import { hmrPlugin, presets } from '@open-wc/dev-server-hmr';
+import range from 'koa-range';
+
+/** Use Hot Module replacement by adding --hmr to the start command */
+const hmr = process.argv.includes('--hmr');
+
+export default /** @type {import('@web/dev-server').DevServerConfig} */ ({
+  open: '/demo/',
+  /** Use regular watch mode if HMR is not enabled. */
+  watch: !hmr,
+  /** Resolve bare module imports */
+  nodeResolve: {
+    exportConditions: ['browser', 'development'],
+  },
+  middleware: [
+    // needed for audio to be served properly for chrome so that it can jump
+    // around in the audio stream when the user clicks
+    range
+  ],
+
+  /** Compile JS for older browsers. Requires @web/dev-server-esbuild plugin */
+  // esbuildTarget: 'auto'
+
+  /** Set appIndex to enable SPA routing */
+  // appIndex: 'demo/index.html',
+
+  plugins: [
+    /** Use Hot Module Replacement by uncommenting. Requires @open-wc/dev-server-hmr plugin */
+    // hmr && hmrPlugin({ exclude: ['**/*/node_modules/**/*'], presets: [presets.lit] }),
+  ],
+
+  // See documentation for all available options
+});
--- a/whisper-transcript.js
+++ b/whisper-transcript.js
@@ -0,0 +1,11 @@
+import { WhisperTranscript } from './src/WhisperTranscript.js';
+import { WhisperSegment } from './src/WhisperSegment.js';
+import { WhisperWord } from './src/WhisperWord.js';
+import { WhisperMedia } from './src/WhisperMedia.js';
+import { TooltipColorGuide } from './src/tooltip.js';
+
+window.customElements.define('tooltip-color-guide', TooltipColorGuide);
+window.customElements.define('whisper-transcript', WhisperTranscript);
+window.customElements.define('whisper-segment', WhisperSegment);
+window.customElements.define('whisper-word', WhisperWord);
+window.customElements.define('whisper-media', WhisperMedia);