first commit
This commit is contained in:
26
.gitignore
vendored
Normal file
26
.gitignore
vendored
Normal file
@@ -0,0 +1,26 @@
|
||||
## editors
|
||||
/.idea
|
||||
/.vscode
|
||||
|
||||
## system files
|
||||
.DS_Store
|
||||
|
||||
## npm
|
||||
/node_modules/
|
||||
/npm-debug.log
|
||||
|
||||
## testing
|
||||
/coverage/
|
||||
|
||||
## temp folders
|
||||
/.tmp/
|
||||
|
||||
# build
|
||||
/_site/
|
||||
/dist/
|
||||
/out-tsc/
|
||||
|
||||
storybook-static
|
||||
custom-elements.json
|
||||
|
||||
yarn.lock
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Ed Summers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
36
README.md
Normal file
36
README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# whisper-transcript
|
||||
|
||||
이 웹 컴포넌트는 Whisper의 JSON 출력 결과를 그래픽으로 시각화하여 각 단어의 신뢰도와 타이밍을 확인할 수 있게 해줍니다.
|
||||
|
||||
## 설치
|
||||
|
||||
```bash
|
||||
yarn add whisper-transcript
|
||||
```
|
||||
|
||||
## 사용법
|
||||
|
||||
모듈을 로드합니다.:
|
||||
|
||||
```html
|
||||
<script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"></script>
|
||||
|
||||
그리고 아래와 같이 사용합니다.
|
||||
|
||||
<whisper-transcript audio="media.mp3" url="media.json"></whisper-transcript>
|
||||
```
|
||||
|
||||
비디오 파일도 가능합니다.:
|
||||
|
||||
```html
|
||||
<whisper-transcript video="media.mp4" url="media.json"></whisper-transcript>
|
||||
```
|
||||
|
||||
## Develop
|
||||
|
||||
```bash
|
||||
yarn install
|
||||
yarn start
|
||||
```
|
||||
|
||||
이 명령어는 demo 디렉토리에 있는 페이지를 엽니다. 필요하다면 audio.mp3와 audio.json 파일을 교체하여 생성한 스크립트를 테스트하고 확인할 수 있습니다.
|
||||
7209
demo/audio.json
Normal file
7209
demo/audio.json
Normal file
File diff suppressed because one or more lines are too long
BIN
demo/audio.mp3
Normal file
BIN
demo/audio.mp3
Normal file
Binary file not shown.
33
demo/index.html
Normal file
33
demo/index.html
Normal file
@@ -0,0 +1,33 @@
|
||||
<!doctype html>
|
||||
<html lang="en-GB">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
|
||||
<style>
|
||||
body {
|
||||
background: #fafafa;
|
||||
font-size: 1.5em;
|
||||
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Inter,Helvetica,Arial,sans-serif;
|
||||
margin: 5% 10% 5% 10%;
|
||||
background-color: black;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="demo"></div>
|
||||
|
||||
<script type="module">
|
||||
import { html, render } from 'lit';
|
||||
import '../whisper-transcript.js';
|
||||
|
||||
render(
|
||||
html`
|
||||
<tooltip-color-guide></tooltip-color-guide>
|
||||
|
||||
<whisper-transcript audio="audio.mp3" url="audio.json"></whisper-transcript>
|
||||
`,
|
||||
document.querySelector('#demo')
|
||||
);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
64278
docs/audio.json
Normal file
64278
docs/audio.json
Normal file
File diff suppressed because one or more lines are too long
BIN
docs/audio.mp3
Normal file
BIN
docs/audio.mp3
Normal file
Binary file not shown.
51
docs/index.html
Normal file
51
docs/index.html
Normal file
@@ -0,0 +1,51 @@
|
||||
<!doctype html>
|
||||
<html lang="en-GB">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>whisper-transcript demo</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, viewport-fit=cover" />
|
||||
<style>
|
||||
body {
|
||||
background: #fafafa;
|
||||
font-size: 1.5em;
|
||||
font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Inter,Helvetica,Arial,sans-serif;
|
||||
margin: 2% 10% 5% 10%;
|
||||
}
|
||||
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
<script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"></script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1><whisper-transcript></h1>
|
||||
|
||||
<p>This is an example of using the <a
|
||||
href="https://github.com/edsu/whisper-transcript">whisper-transcript</a>
|
||||
<a href="https://en.wikipedia.org/wiki/Web_Components">web component</a> to
|
||||
display a <a href="http://github.com/openai/whisper">Whisper</a>
|
||||
speech-to-text transcript that was generated using word
|
||||
timestamps in order to visualize the confidence scores in the context of the
|
||||
words and their sound:</p>
|
||||
|
||||
<pre><code>$ whisper --model small audio.mp3 --word_timestamps True</code></pre>
|
||||
|
||||
<p>Running this command generates an <a href="audio.json">audio.json</a> file which contains
|
||||
detailed timestamp information for each word, and also a probability score
|
||||
that indicates the confidence in Whisper's transcription. Note: I
|
||||
intentionally ran with the small model in order to get more varied
|
||||
probabilities in this example.</p>
|
||||
|
||||
<p>View the HTML source to see how to use the whisper-transcript web
|
||||
component in your own pages. But it's basically: </p>
|
||||
|
||||
<pre><code>
|
||||
<script type="module" src="https://cdn.jsdelivr.net/npm/whisper-transcript@latest/dist/index.js"></script>
|
||||
<whisper-transcript audio="audio.mp3" url="audio.json"></whisper-transcript>
|
||||
</code></pre>
|
||||
|
||||
<whisper-transcript audio="audio.mp3" url="audio.json"></whisper-transcript>
|
||||
</body>
|
||||
</html>
|
||||
1
docs/index.js.map
Normal file
1
docs/index.js.map
Normal file
File diff suppressed because one or more lines are too long
322
docs/whisper-transcript.min.js
vendored
Normal file
322
docs/whisper-transcript.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
6
index.js
Normal file
6
index.js
Normal file
@@ -0,0 +1,6 @@
|
||||
import './whisper-transcript';
|
||||
|
||||
export { WhisperTranscript } from './src/WhisperTranscript.js';
|
||||
export { WhisperSegment } from './src/WhisperSegment.js';
|
||||
export { WhisperWord } from './src/WhisperWord.js';
|
||||
export { WhisperMedia } from './src/WhisperMedia.js';
|
||||
32
package.json
Normal file
32
package.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"name": "whisper-transcript",
|
||||
"description": "A web-component for viewing a Whisper JSON transcript",
|
||||
"repository": "github:edsu/whisper-transcript",
|
||||
"license": "MIT",
|
||||
"author": "whisper-transcript",
|
||||
"version": "0.0.7",
|
||||
"type": "module",
|
||||
"main": "index.js",
|
||||
"module": "index.js",
|
||||
"exports": {
|
||||
".": "./index.js",
|
||||
"./whisper-transcript.js": "./whisper-transcript.js"
|
||||
},
|
||||
"scripts": {
|
||||
"analyze": "cem analyze --litelement",
|
||||
"start": "web-dev-server",
|
||||
"build": "rollup -c"
|
||||
},
|
||||
"dependencies": {
|
||||
"lit": "^3.1.4",
|
||||
"whisper-transcript": "^0.0.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@custom-elements-manifest/analyzer": "^0.10.3",
|
||||
"@web/dev-server": "^0.4.6",
|
||||
"koa-range": "^0.3.0",
|
||||
"rollup": "^4.27.3",
|
||||
"rollup-plugin-node-resolve": "^5.2.0"
|
||||
},
|
||||
"customElements": "custom-elements.json"
|
||||
}
|
||||
14
rollup.config.js
Normal file
14
rollup.config.js
Normal file
@@ -0,0 +1,14 @@
|
||||
import resolve from 'rollup-plugin-node-resolve';
|
||||
|
||||
|
||||
export default {
|
||||
input: ['index.js'],
|
||||
output: {
|
||||
file: 'dist/index.js',
|
||||
format: 'es',
|
||||
sourcemap: true
|
||||
},
|
||||
plugins: [
|
||||
resolve()
|
||||
]
|
||||
};
|
||||
51
src/WhisperMedia.js
Normal file
51
src/WhisperMedia.js
Normal file
@@ -0,0 +1,51 @@
|
||||
import { html, css, LitElement } from 'lit';
|
||||
|
||||
export class WhisperMedia extends LitElement {
|
||||
static styles = css`
|
||||
audio {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
video {
|
||||
max-height: 200px;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
`;
|
||||
|
||||
static properties = {
|
||||
audio: {type: String},
|
||||
video: {type: String}
|
||||
};
|
||||
|
||||
updateTime(time) {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("update-time", {
|
||||
detail: {
|
||||
time
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
render() {
|
||||
let media = null;
|
||||
if (this.audio) {
|
||||
media = document.createElement('audio', this.audio);
|
||||
media.src = this.audio;
|
||||
} else {
|
||||
media = document.createElement('video', this.video);
|
||||
media.src = this.video;
|
||||
}
|
||||
|
||||
if (media) {
|
||||
media.controls = true;
|
||||
media.preload = "auto";
|
||||
media.ontimeupdate = (_) => this.updateTime(media.currentTime);
|
||||
window.addEventListener('update-player-time', e => media.currentTime = e.detail.time);
|
||||
|
||||
return html`${media}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
82
src/WhisperSegment.js
Normal file
82
src/WhisperSegment.js
Normal file
@@ -0,0 +1,82 @@
|
||||
import { html, css, LitElement } from 'lit';
|
||||
|
||||
export class WhisperSegment extends LitElement {
|
||||
static properties = {
|
||||
text: { type: String },
|
||||
start: { type: Number },
|
||||
end: { type: Number },
|
||||
words: { type: Array },
|
||||
selected: { type: Boolean }
|
||||
};
|
||||
|
||||
static styles = css`
|
||||
.segment {
|
||||
border: 2px solid #333;
|
||||
padding: 5px;
|
||||
margin: 2px;
|
||||
border-radius: 5px;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
}
|
||||
|
||||
.selected {
|
||||
background-color: #555;
|
||||
border-color: black;
|
||||
}
|
||||
|
||||
.times {
|
||||
width: 325px;
|
||||
float: left;
|
||||
color: lightgray;
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.words {
|
||||
width: 100%;
|
||||
}
|
||||
`
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.selected = false;
|
||||
|
||||
const that = this;
|
||||
window.addEventListener('update-time', e => that.updateTime(e.detail.time));
|
||||
}
|
||||
|
||||
updateTime(time) {
|
||||
if ((time >= this.start) && (time <= this.end)) {
|
||||
this.selected = true;
|
||||
} else {
|
||||
this.selected = false;
|
||||
}
|
||||
}
|
||||
|
||||
render() {
|
||||
if (this.words) {
|
||||
return html`
|
||||
<li class="${this.selected ? 'selected' : ''} segment">
|
||||
<div class="times">${hms(this.start)} - ${hms(this.end)}</div>
|
||||
<div class="words">
|
||||
${this.words.map(w =>
|
||||
html`<whisper-word title="${w.confidence || w.probability}" word="${w.text || w.word}" start="${w.start}" end="${w.end}" probability="${w.confidence}" />`
|
||||
)}
|
||||
</div>
|
||||
</li>
|
||||
`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function hms(secs) {
|
||||
const h = Math.trunc(secs / 60 / 60);
|
||||
const m = Math.trunc((secs - (h * 60)) / 60);
|
||||
const s = Math.trunc(secs) - (h * 60 + m * 60);
|
||||
|
||||
return `${pad(h)}:${pad(m)}:${pad(s)}`;
|
||||
}
|
||||
|
||||
function pad(i) {
|
||||
return String(i).padStart(2, '0');
|
||||
}
|
||||
79
src/WhisperTranscript.js
Normal file
79
src/WhisperTranscript.js
Normal file
@@ -0,0 +1,79 @@
|
||||
import { html, css, LitElement } from 'lit';
|
||||
|
||||
export class WhisperTranscript extends LitElement {
|
||||
static styles = css`
|
||||
:host {
|
||||
display: block;
|
||||
padding: 25px;
|
||||
color: var(--whisper-transcript-text-color, #000);
|
||||
}
|
||||
|
||||
ul {
|
||||
list-style: none;
|
||||
padding-left: 0;
|
||||
}
|
||||
|
||||
.media {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.whisper-transcript {
|
||||
background: black;
|
||||
color: white;
|
||||
}
|
||||
`;
|
||||
|
||||
static properties = {
|
||||
url: {type: String},
|
||||
audio: {type: String},
|
||||
video: {type: String},
|
||||
transcript: {type: Object, attribute: false},
|
||||
time: {type: Number}
|
||||
};
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.time = 0;
|
||||
}
|
||||
|
||||
connectedCallback() {
|
||||
super.connectedCallback();
|
||||
this.getTranscript();
|
||||
|
||||
const that = this;
|
||||
window.addEventListener('update-time', e => that.time = e.detail.time);
|
||||
}
|
||||
|
||||
async getTranscript() {
|
||||
const resp = await fetch(this.url);
|
||||
this.transcript = await resp.json();
|
||||
}
|
||||
|
||||
render() {
|
||||
if (! this.transcript) {
|
||||
return html`Loading...`;
|
||||
}
|
||||
|
||||
let media = null;
|
||||
|
||||
|
||||
if (this.audio) {
|
||||
media = html`<whisper-media audio="${this.audio}"></whisper-media>`;
|
||||
} else {
|
||||
media = html`<whisper-media video="${this.video}"></whisper-media>`;
|
||||
}
|
||||
|
||||
return html`
|
||||
<div class="whisper-transcript">
|
||||
<div class="media">
|
||||
${media}
|
||||
</div>
|
||||
<ul>
|
||||
${this.transcript.segments.map(s =>
|
||||
html`<whisper-segment .words="${s.words}" start="${s.start}" end="${s.end}" text="${s.text}" />`
|
||||
)}
|
||||
</ul>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
}
|
||||
91
src/WhisperWord.js
Normal file
91
src/WhisperWord.js
Normal file
@@ -0,0 +1,91 @@
|
||||
import { html, css, LitElement } from 'lit';
|
||||
|
||||
export class WhisperWord extends LitElement {
|
||||
|
||||
static properties = {
|
||||
word: {type: String},
|
||||
probability: {type: Number},
|
||||
start: {type: Number},
|
||||
end: {type: Number},
|
||||
selected: {type: Boolean}
|
||||
}
|
||||
|
||||
static styles = css`
|
||||
span.word {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
span.selected {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
span.mediocre {
|
||||
color: yellow;
|
||||
}
|
||||
|
||||
span.poor {
|
||||
color: orange;
|
||||
}
|
||||
|
||||
span.terrible {
|
||||
color: red;
|
||||
}
|
||||
`;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.addEventListener('click', _ => this.updatePlayerTime());
|
||||
}
|
||||
|
||||
connectedCallback() {
|
||||
super.connectedCallback();
|
||||
|
||||
const that = this;
|
||||
window.addEventListener('update-time', e => that.updateTime(e.detail.time));
|
||||
}
|
||||
|
||||
updatePlayerTime() {
|
||||
window.dispatchEvent(
|
||||
new CustomEvent("update-player-time", {
|
||||
detail: {
|
||||
time: this.start
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
updateTime(time) {
|
||||
if ((time >= this.start) && (time <= this.end)) {
|
||||
this.selected = true;
|
||||
} else {
|
||||
this.selected = false;
|
||||
}
|
||||
}
|
||||
|
||||
getCssClass() {
|
||||
let style = this.selected ? 'selected ' : '';
|
||||
if (this.probability > .9) {
|
||||
style += 'good';
|
||||
} else if (this.probability > .7) {
|
||||
style += 'mediocre';
|
||||
} else if (this.probability > .5) {
|
||||
style += 'poor';
|
||||
} else {
|
||||
style += 'terrible';
|
||||
}
|
||||
|
||||
return style;
|
||||
}
|
||||
|
||||
render() {
|
||||
return html`
|
||||
<span
|
||||
data-start="${this.start}"
|
||||
data-end="${this.end}"
|
||||
class="word ${this.getCssClass()}">
|
||||
${this.word}
|
||||
</span>
|
||||
`
|
||||
}
|
||||
}
|
||||
56
src/tooltip.js
Normal file
56
src/tooltip.js
Normal file
@@ -0,0 +1,56 @@
|
||||
import { html, css, LitElement } from 'lit';
|
||||
|
||||
export class TooltipColorGuide extends LitElement {
|
||||
static styles = css`
|
||||
.tooltip {
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.tooltip-text {
|
||||
visibility: hidden;
|
||||
width: 200px;
|
||||
background-color: black;
|
||||
color: white;
|
||||
text-align: left;
|
||||
border-radius: 5px;
|
||||
padding: 10px;
|
||||
position: absolute;
|
||||
z-index: 1;
|
||||
top: 125%;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s;
|
||||
}
|
||||
.tooltip span {
|
||||
color : white;
|
||||
}
|
||||
.tooltip:hover .tooltip-text {
|
||||
visibility: visible;
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.color-box {
|
||||
display: inline-block;
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
margin-right: 5px;
|
||||
}
|
||||
`;
|
||||
|
||||
render() {
|
||||
return html`
|
||||
<div class="tooltip">
|
||||
<span>Color Guide</span>
|
||||
<div class="tooltip-text">
|
||||
<div><span class="color-box" style="background-color: white;"></span>this.probability > 0.9: (White)</div>
|
||||
<div><span class="color-box" style="background-color: yellow;"></span>this.probability > 0.7: (Yellow)</div>
|
||||
<div><span class="color-box" style="background-color: orange;"></span>this.probability > 0.5: (Orange)</div>
|
||||
<div><span class="color-box" style="background-color: red;"></span>this.probability ≤ 0.5: (Red)</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
}
|
||||
33
web-dev-server.config.js
Normal file
33
web-dev-server.config.js
Normal file
@@ -0,0 +1,33 @@
|
||||
// import { hmrPlugin, presets } from '@open-wc/dev-server-hmr';
|
||||
import range from 'koa-range';
|
||||
|
||||
/** Use Hot Module replacement by adding --hmr to the start command */
|
||||
const hmr = process.argv.includes('--hmr');
|
||||
|
||||
export default /** @type {import('@web/dev-server').DevServerConfig} */ ({
|
||||
open: '/demo/',
|
||||
/** Use regular watch mode if HMR is not enabled. */
|
||||
watch: !hmr,
|
||||
/** Resolve bare module imports */
|
||||
nodeResolve: {
|
||||
exportConditions: ['browser', 'development'],
|
||||
},
|
||||
middleware: [
|
||||
// needed for audio to be served properly for chrome so that it can jump
|
||||
// around in the audio stream when the user clicks
|
||||
range
|
||||
],
|
||||
|
||||
/** Compile JS for older browsers. Requires @web/dev-server-esbuild plugin */
|
||||
// esbuildTarget: 'auto'
|
||||
|
||||
/** Set appIndex to enable SPA routing */
|
||||
// appIndex: 'demo/index.html',
|
||||
|
||||
plugins: [
|
||||
/** Use Hot Module Replacement by uncommenting. Requires @open-wc/dev-server-hmr plugin */
|
||||
// hmr && hmrPlugin({ exclude: ['**/*/node_modules/**/*'], presets: [presets.lit] }),
|
||||
],
|
||||
|
||||
// See documentation for all available options
|
||||
});
|
||||
11
whisper-transcript.js
Normal file
11
whisper-transcript.js
Normal file
@@ -0,0 +1,11 @@
|
||||
import { WhisperTranscript } from './src/WhisperTranscript.js';
|
||||
import { WhisperSegment } from './src/WhisperSegment.js';
|
||||
import { WhisperWord } from './src/WhisperWord.js';
|
||||
import { WhisperMedia } from './src/WhisperMedia.js';
|
||||
import { TooltipColorGuide } from './src/tooltip.js';
|
||||
|
||||
window.customElements.define('tooltip-color-guide', TooltipColorGuide);
|
||||
window.customElements.define('whisper-transcript', WhisperTranscript);
|
||||
window.customElements.define('whisper-segment', WhisperSegment);
|
||||
window.customElements.define('whisper-word', WhisperWord);
|
||||
window.customElements.define('whisper-media', WhisperMedia);
|
||||
Reference in New Issue
Block a user