Spaces:
Running
Running
Add mode switching: chunk mode vs overlap mode for seamless transcription
Browse files- index.html +320 -73
index.html
CHANGED
|
@@ -101,19 +101,19 @@
|
|
| 101 |
}
|
| 102 |
}
|
| 103 |
|
| 104 |
-
.
|
| 105 |
margin: 20px 0;
|
| 106 |
padding: 15px;
|
| 107 |
background: #16213e;
|
| 108 |
border-radius: 8px;
|
| 109 |
}
|
| 110 |
|
| 111 |
-
.
|
| 112 |
display: block;
|
| 113 |
margin-bottom: 10px;
|
| 114 |
}
|
| 115 |
|
| 116 |
-
.
|
| 117 |
width: 100%;
|
| 118 |
cursor: pointer;
|
| 119 |
}
|
|
@@ -126,6 +126,42 @@
|
|
| 126 |
margin-top: 5px;
|
| 127 |
}
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
#transcript {
|
| 130 |
background: #16213e;
|
| 131 |
border-radius: 12px;
|
|
@@ -213,9 +249,19 @@
|
|
| 213 |
|
| 214 |
<button id="startBtn" disabled>読み込み中...</button>
|
| 215 |
|
| 216 |
-
<div class="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
<label for="intervalSlider">
|
| 218 |
-
|
| 219 |
</label>
|
| 220 |
<input type="range" id="intervalSlider" min="1" max="6" step="0.5" value="3">
|
| 221 |
<div class="slider-labels">
|
|
@@ -230,9 +276,9 @@
|
|
| 230 |
<div class="info">
|
| 231 |
<strong>使い方:</strong><br>
|
| 232 |
1. モデルの読み込みを待つ(初回は数分かかります)<br>
|
| 233 |
-
2.
|
| 234 |
-
3.
|
| 235 |
-
4.
|
| 236 |
<strong>モデル:</strong> <a href="https://huggingface.co/wmoto-ai/moonshine-tiny-ja-ONNX" target="_blank">wmoto-ai/moonshine-tiny-ja-ONNX</a><br>
|
| 237 |
<strong>ベース:</strong> <a href="https://huggingface.co/UsefulSensors/moonshine-tiny-ja" target="_blank">UsefulSensors/moonshine-tiny-ja</a>
|
| 238 |
</div>
|
|
@@ -257,21 +303,67 @@
|
|
| 257 |
const progressBar = document.getElementById('progressBar');
|
| 258 |
const intervalSlider = document.getElementById('intervalSlider');
|
| 259 |
const intervalValue = document.getElementById('intervalValue');
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
|
| 265 |
let model = null;
|
| 266 |
let processor = null;
|
| 267 |
let tokenizer = null;
|
| 268 |
let isRecording = false;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
let mediaRecorder = null;
|
| 270 |
-
let audioContext = null;
|
| 271 |
let audioChunks = [];
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
const MODEL_ID = 'wmoto-ai/moonshine-tiny-ja-ONNX';
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
async function loadModel() {
|
| 276 |
try {
|
| 277 |
statusEl.textContent = 'モデルを読み込み中... (初回は数分かかることがあります)';
|
|
@@ -298,31 +390,69 @@
|
|
| 298 |
startBtn.textContent = '録音開始';
|
| 299 |
startBtn.disabled = false;
|
| 300 |
} catch (error) {
|
| 301 |
-
console.error('Model loading error:', error);
|
| 302 |
statusEl.textContent = `エラー: ${error.message}`;
|
| 303 |
statusEl.className = 'status error';
|
| 304 |
}
|
| 305 |
}
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
try {
|
| 311 |
currentTextEl.textContent = '処理中...';
|
| 312 |
|
| 313 |
const arrayBuffer = await audioBlob.arrayBuffer();
|
| 314 |
|
| 315 |
if (!audioContext) {
|
| 316 |
-
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate:
|
| 317 |
}
|
| 318 |
|
| 319 |
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
| 320 |
|
| 321 |
-
const targetSampleRate = 16000;
|
| 322 |
const offlineCtx = new OfflineAudioContext(
|
| 323 |
1,
|
| 324 |
-
Math.ceil(audioBuffer.duration *
|
| 325 |
-
|
| 326 |
);
|
| 327 |
|
| 328 |
const source = offlineCtx.createBufferSource();
|
|
@@ -333,65 +463,23 @@
|
|
| 333 |
const resampled = await offlineCtx.startRendering();
|
| 334 |
const audioData = resampled.getChannelData(0);
|
| 335 |
|
| 336 |
-
|
| 337 |
-
currentTextEl.textContent = '(音声が短すぎます)';
|
| 338 |
-
return;
|
| 339 |
-
}
|
| 340 |
-
|
| 341 |
-
let maxLevel = 0;
|
| 342 |
-
let sumSquares = 0;
|
| 343 |
-
for (let i = 0; i < audioData.length; i++) {
|
| 344 |
-
const abs = Math.abs(audioData[i]);
|
| 345 |
-
if (abs > maxLevel) maxLevel = abs;
|
| 346 |
-
sumSquares += audioData[i] * audioData[i];
|
| 347 |
-
}
|
| 348 |
-
const rms = Math.sqrt(sumSquares / audioData.length);
|
| 349 |
-
|
| 350 |
-
if (rms < 0.01 || maxLevel < 0.05) {
|
| 351 |
-
currentTextEl.textContent = '(音声が小さすぎます)';
|
| 352 |
-
return;
|
| 353 |
-
}
|
| 354 |
-
|
| 355 |
-
const inputs = await processor(audioData);
|
| 356 |
-
|
| 357 |
-
const intervalSec = parseFloat(intervalSlider.value);
|
| 358 |
-
const maxTokens = Math.min(Math.round(intervalSec * 25), 150);
|
| 359 |
-
const outputs = await model.generate({
|
| 360 |
-
...inputs,
|
| 361 |
-
max_new_tokens: maxTokens,
|
| 362 |
-
});
|
| 363 |
-
|
| 364 |
-
let text = tokenizer.decode(outputs[0], { skip_special_tokens: true }).trim();
|
| 365 |
|
| 366 |
-
|
| 367 |
-
if (repeatPattern.test(text)) {
|
| 368 |
-
text = text.replace(/(.{2,}?)\1{3,}/g, '$1');
|
| 369 |
-
}
|
| 370 |
-
|
| 371 |
-
const hallucinations = ['彼は私', '彼女は私', 'そう、そう'];
|
| 372 |
-
const isHallucination = hallucinations.some(h => text.includes(h) && text.length > 30);
|
| 373 |
-
|
| 374 |
-
if (text && !isHallucination) {
|
| 375 |
currentTextEl.textContent = text;
|
| 376 |
transcriptEl.textContent += text + '\n';
|
| 377 |
-
} else if (isHallucination) {
|
| 378 |
-
currentTextEl.textContent = '(ノイズ検出)';
|
| 379 |
} else {
|
| 380 |
currentTextEl.textContent = '(音声が検出されませんでした)';
|
| 381 |
}
|
| 382 |
} catch (error) {
|
| 383 |
-
console.error('Transcription error:', error);
|
| 384 |
currentTextEl.textContent = `エラー: ${error.message}`;
|
| 385 |
}
|
| 386 |
}
|
| 387 |
|
| 388 |
-
async function
|
| 389 |
try {
|
| 390 |
const stream = await navigator.mediaDevices.getUserMedia({
|
| 391 |
-
audio: {
|
| 392 |
-
channelCount: 1,
|
| 393 |
-
sampleRate: 16000,
|
| 394 |
-
}
|
| 395 |
});
|
| 396 |
|
| 397 |
audioChunks = [];
|
|
@@ -416,7 +504,7 @@
|
|
| 416 |
const audioBlob = new Blob(audioChunks, { type: 'audio/webm;codecs=opus' });
|
| 417 |
audioChunks = [];
|
| 418 |
|
| 419 |
-
await
|
| 420 |
|
| 421 |
if (isRecording && mediaRecorder.stream.active) {
|
| 422 |
const intervalMs = parseFloat(intervalSlider.value) * 1000;
|
|
@@ -432,30 +520,189 @@
|
|
| 432 |
const intervalMs = parseFloat(intervalSlider.value) * 1000;
|
| 433 |
setTimeout(processAndRestart, intervalMs);
|
| 434 |
|
| 435 |
-
|
| 436 |
-
statusEl.className = 'status recording';
|
| 437 |
-
startBtn.textContent = '録音停止';
|
| 438 |
-
startBtn.classList.add('recording');
|
| 439 |
} catch (error) {
|
| 440 |
-
console.error('Recording error:', error);
|
| 441 |
statusEl.textContent = `マイクエラー: ${error.message}`;
|
| 442 |
statusEl.className = 'status error';
|
| 443 |
}
|
| 444 |
}
|
| 445 |
|
| 446 |
-
function
|
| 447 |
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
|
| 448 |
mediaRecorder.stop();
|
| 449 |
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
| 450 |
}
|
| 451 |
isRecording = false;
|
| 452 |
audioChunks = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
statusEl.textContent = '録音停止。再開するにはボタンをクリック';
|
| 455 |
statusEl.className = 'status ready';
|
| 456 |
startBtn.textContent = '録音開始';
|
| 457 |
startBtn.classList.remove('recording');
|
| 458 |
currentTextEl.textContent = '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
}
|
| 460 |
|
| 461 |
startBtn.addEventListener('click', () => {
|
|
|
|
| 101 |
}
|
| 102 |
}
|
| 103 |
|
| 104 |
+
.control-container {
|
| 105 |
margin: 20px 0;
|
| 106 |
padding: 15px;
|
| 107 |
background: #16213e;
|
| 108 |
border-radius: 8px;
|
| 109 |
}
|
| 110 |
|
| 111 |
+
.control-container label {
|
| 112 |
display: block;
|
| 113 |
margin-bottom: 10px;
|
| 114 |
}
|
| 115 |
|
| 116 |
+
.control-container input[type="range"] {
|
| 117 |
width: 100%;
|
| 118 |
cursor: pointer;
|
| 119 |
}
|
|
|
|
| 126 |
margin-top: 5px;
|
| 127 |
}
|
| 128 |
|
| 129 |
+
.mode-switch {
|
| 130 |
+
display: flex;
|
| 131 |
+
gap: 10px;
|
| 132 |
+
margin-bottom: 15px;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.mode-switch button {
|
| 136 |
+
flex: 1;
|
| 137 |
+
width: auto;
|
| 138 |
+
margin: 0;
|
| 139 |
+
padding: 10px 15px;
|
| 140 |
+
font-size: 14px;
|
| 141 |
+
border-radius: 8px;
|
| 142 |
+
background: #0f0f23;
|
| 143 |
+
color: #888;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.mode-switch button.active {
|
| 147 |
+
background: linear-gradient(135deg, #00d4ff, #00ff88);
|
| 148 |
+
color: #1a1a2e;
|
| 149 |
+
font-weight: bold;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
.mode-switch button:disabled {
|
| 153 |
+
opacity: 0.5;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.mode-description {
|
| 157 |
+
font-size: 12px;
|
| 158 |
+
color: #888;
|
| 159 |
+
margin-top: 10px;
|
| 160 |
+
padding: 10px;
|
| 161 |
+
background: #0f0f23;
|
| 162 |
+
border-radius: 6px;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
#transcript {
|
| 166 |
background: #16213e;
|
| 167 |
border-radius: 12px;
|
|
|
|
| 249 |
|
| 250 |
<button id="startBtn" disabled>読み込み中...</button>
|
| 251 |
|
| 252 |
+
<div class="control-container">
|
| 253 |
+
<div class="mode-switch">
|
| 254 |
+
<button id="modeChunk" class="active">区切りモード</button>
|
| 255 |
+
<button id="modeOverlap">オーバーラップモード</button>
|
| 256 |
+
</div>
|
| 257 |
+
<div id="modeDescription" class="mode-description">
|
| 258 |
+
指定間隔ごとに録音を区切って処理。シンプルだが境界で言葉が途切れる可能性あり。
|
| 259 |
+
</div>
|
| 260 |
+
</div>
|
| 261 |
+
|
| 262 |
+
<div class="control-container">
|
| 263 |
<label for="intervalSlider">
|
| 264 |
+
<span id="intervalLabel">録音間隔</span>: <span id="intervalValue">3</span>秒
|
| 265 |
</label>
|
| 266 |
<input type="range" id="intervalSlider" min="1" max="6" step="0.5" value="3">
|
| 267 |
<div class="slider-labels">
|
|
|
|
| 276 |
<div class="info">
|
| 277 |
<strong>使い方:</strong><br>
|
| 278 |
1. モデルの読み込みを待つ(初回は数分かかります)<br>
|
| 279 |
+
2. モードを選択(オーバーラップ推奨)<br>
|
| 280 |
+
3. 「録音開始」ボタンをクリック<br>
|
| 281 |
+
4. マイクに向かって話す<br><br>
|
| 282 |
<strong>モデル:</strong> <a href="https://huggingface.co/wmoto-ai/moonshine-tiny-ja-ONNX" target="_blank">wmoto-ai/moonshine-tiny-ja-ONNX</a><br>
|
| 283 |
<strong>ベース:</strong> <a href="https://huggingface.co/UsefulSensors/moonshine-tiny-ja" target="_blank">UsefulSensors/moonshine-tiny-ja</a>
|
| 284 |
</div>
|
|
|
|
| 303 |
const progressBar = document.getElementById('progressBar');
|
| 304 |
const intervalSlider = document.getElementById('intervalSlider');
|
| 305 |
const intervalValue = document.getElementById('intervalValue');
|
| 306 |
+
const intervalLabel = document.getElementById('intervalLabel');
|
| 307 |
+
const modeChunkBtn = document.getElementById('modeChunk');
|
| 308 |
+
const modeOverlapBtn = document.getElementById('modeOverlap');
|
| 309 |
+
const modeDescription = document.getElementById('modeDescription');
|
| 310 |
|
| 311 |
let model = null;
|
| 312 |
let processor = null;
|
| 313 |
let tokenizer = null;
|
| 314 |
let isRecording = false;
|
| 315 |
+
let isProcessing = false;
|
| 316 |
+
let currentMode = 'chunk';
|
| 317 |
+
|
| 318 |
+
// Chunk mode variables
|
| 319 |
let mediaRecorder = null;
|
|
|
|
| 320 |
let audioChunks = [];
|
| 321 |
|
| 322 |
+
// Overlap mode variables
|
| 323 |
+
let audioContext = null;
|
| 324 |
+
let mediaStream = null;
|
| 325 |
+
let scriptProcessor = null;
|
| 326 |
+
let audioBuffer = [];
|
| 327 |
+
let processTimer = null;
|
| 328 |
+
let lastTranscript = '';
|
| 329 |
+
|
| 330 |
+
const SAMPLE_RATE = 16000;
|
| 331 |
+
const WINDOW_SEC = 4;
|
| 332 |
const MODEL_ID = 'wmoto-ai/moonshine-tiny-ja-ONNX';
|
| 333 |
|
| 334 |
+
// Mode switching
|
| 335 |
+
function setMode(mode) {
|
| 336 |
+
if (isRecording) return;
|
| 337 |
+
currentMode = mode;
|
| 338 |
+
|
| 339 |
+
if (mode === 'chunk') {
|
| 340 |
+
modeChunkBtn.classList.add('active');
|
| 341 |
+
modeOverlapBtn.classList.remove('active');
|
| 342 |
+
modeDescription.textContent = '指定間隔ごとに録音を区切って処理。シンプルだが境界で言葉が途切れる可能性あり。';
|
| 343 |
+
intervalLabel.textContent = '録音間隔';
|
| 344 |
+
intervalSlider.min = '1';
|
| 345 |
+
intervalSlider.max = '6';
|
| 346 |
+
intervalSlider.value = '3';
|
| 347 |
+
intervalValue.textContent = '3';
|
| 348 |
+
} else {
|
| 349 |
+
modeChunkBtn.classList.remove('active');
|
| 350 |
+
modeOverlapBtn.classList.add('active');
|
| 351 |
+
modeDescription.textContent = '連続バッファ + オーバーラップ処理。境界での途切れを防ぎ、滑らかな文字起こしを実現。';
|
| 352 |
+
intervalLabel.textContent = '処理間隔';
|
| 353 |
+
intervalSlider.min = '1';
|
| 354 |
+
intervalSlider.max = '4';
|
| 355 |
+
intervalSlider.value = '2';
|
| 356 |
+
intervalValue.textContent = '2';
|
| 357 |
+
}
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
modeChunkBtn.addEventListener('click', () => setMode('chunk'));
|
| 361 |
+
modeOverlapBtn.addEventListener('click', () => setMode('overlap'));
|
| 362 |
+
|
| 363 |
+
intervalSlider.addEventListener('input', () => {
|
| 364 |
+
intervalValue.textContent = intervalSlider.value;
|
| 365 |
+
});
|
| 366 |
+
|
| 367 |
async function loadModel() {
|
| 368 |
try {
|
| 369 |
statusEl.textContent = 'モデルを読み込み中... (初回は数分かかることがあります)';
|
|
|
|
| 390 |
startBtn.textContent = '録音開始';
|
| 391 |
startBtn.disabled = false;
|
| 392 |
} catch (error) {
|
|
|
|
| 393 |
statusEl.textContent = `エラー: ${error.message}`;
|
| 394 |
statusEl.className = 'status error';
|
| 395 |
}
|
| 396 |
}
|
| 397 |
|
| 398 |
+
// ============ Common transcription function ============
|
| 399 |
+
async function transcribe(audioData) {
|
| 400 |
+
if (!model || !processor || !tokenizer) return null;
|
| 401 |
+
|
| 402 |
+
if (audioData.length < 1600) return null;
|
| 403 |
+
|
| 404 |
+
let maxLevel = 0;
|
| 405 |
+
let sumSquares = 0;
|
| 406 |
+
for (let i = 0; i < audioData.length; i++) {
|
| 407 |
+
const abs = Math.abs(audioData[i]);
|
| 408 |
+
if (abs > maxLevel) maxLevel = abs;
|
| 409 |
+
sumSquares += audioData[i] * audioData[i];
|
| 410 |
+
}
|
| 411 |
+
const rms = Math.sqrt(sumSquares / audioData.length);
|
| 412 |
+
|
| 413 |
+
if (rms < 0.01 || maxLevel < 0.05) return null;
|
| 414 |
+
|
| 415 |
+
const inputs = await processor(audioData);
|
| 416 |
|
| 417 |
+
const audioDuration = audioData.length / SAMPLE_RATE;
|
| 418 |
+
const maxTokens = Math.min(Math.round(audioDuration * 25), 150);
|
| 419 |
+
const outputs = await model.generate({
|
| 420 |
+
...inputs,
|
| 421 |
+
max_new_tokens: maxTokens,
|
| 422 |
+
});
|
| 423 |
+
|
| 424 |
+
let text = tokenizer.decode(outputs[0], { skip_special_tokens: true }).trim();
|
| 425 |
+
|
| 426 |
+
const repeatPattern = /(.{2,}?)\1{4,}/;
|
| 427 |
+
if (repeatPattern.test(text)) {
|
| 428 |
+
text = text.replace(/(.{2,}?)\1{3,}/g, '$1');
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
const hallucinations = ['彼は私', '彼女は私', 'そう、そう'];
|
| 432 |
+
const isHallucination = hallucinations.some(h => text.includes(h) && text.length > 30);
|
| 433 |
+
|
| 434 |
+
if (isHallucination) return null;
|
| 435 |
+
|
| 436 |
+
return text;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
// ============ Chunk Mode ============
|
| 440 |
+
async function transcribeAudioBlob(audioBlob) {
|
| 441 |
try {
|
| 442 |
currentTextEl.textContent = '処理中...';
|
| 443 |
|
| 444 |
const arrayBuffer = await audioBlob.arrayBuffer();
|
| 445 |
|
| 446 |
if (!audioContext) {
|
| 447 |
+
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: SAMPLE_RATE });
|
| 448 |
}
|
| 449 |
|
| 450 |
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
|
| 451 |
|
|
|
|
| 452 |
const offlineCtx = new OfflineAudioContext(
|
| 453 |
1,
|
| 454 |
+
Math.ceil(audioBuffer.duration * SAMPLE_RATE),
|
| 455 |
+
SAMPLE_RATE
|
| 456 |
);
|
| 457 |
|
| 458 |
const source = offlineCtx.createBufferSource();
|
|
|
|
| 463 |
const resampled = await offlineCtx.startRendering();
|
| 464 |
const audioData = resampled.getChannelData(0);
|
| 465 |
|
| 466 |
+
const text = await transcribe(audioData);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
+
if (text) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
currentTextEl.textContent = text;
|
| 470 |
transcriptEl.textContent += text + '\n';
|
|
|
|
|
|
|
| 471 |
} else {
|
| 472 |
currentTextEl.textContent = '(音声が検出されませんでした)';
|
| 473 |
}
|
| 474 |
} catch (error) {
|
|
|
|
| 475 |
currentTextEl.textContent = `エラー: ${error.message}`;
|
| 476 |
}
|
| 477 |
}
|
| 478 |
|
| 479 |
+
async function startChunkRecording() {
|
| 480 |
try {
|
| 481 |
const stream = await navigator.mediaDevices.getUserMedia({
|
| 482 |
+
audio: { channelCount: 1, sampleRate: SAMPLE_RATE }
|
|
|
|
|
|
|
|
|
|
| 483 |
});
|
| 484 |
|
| 485 |
audioChunks = [];
|
|
|
|
| 504 |
const audioBlob = new Blob(audioChunks, { type: 'audio/webm;codecs=opus' });
|
| 505 |
audioChunks = [];
|
| 506 |
|
| 507 |
+
await transcribeAudioBlob(audioBlob);
|
| 508 |
|
| 509 |
if (isRecording && mediaRecorder.stream.active) {
|
| 510 |
const intervalMs = parseFloat(intervalSlider.value) * 1000;
|
|
|
|
| 520 |
const intervalMs = parseFloat(intervalSlider.value) * 1000;
|
| 521 |
setTimeout(processAndRestart, intervalMs);
|
| 522 |
|
| 523 |
+
updateRecordingUI();
|
|
|
|
|
|
|
|
|
|
| 524 |
} catch (error) {
|
|
|
|
| 525 |
statusEl.textContent = `マイクエラー: ${error.message}`;
|
| 526 |
statusEl.className = 'status error';
|
| 527 |
}
|
| 528 |
}
|
| 529 |
|
| 530 |
+
function stopChunkRecording() {
|
| 531 |
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
|
| 532 |
mediaRecorder.stop();
|
| 533 |
mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
| 534 |
}
|
| 535 |
isRecording = false;
|
| 536 |
audioChunks = [];
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
// ============ Overlap Mode ============
|
| 540 |
+
function removeDuplicateText(prevText, newText) {
|
| 541 |
+
if (!prevText || !newText) return newText;
|
| 542 |
|
| 543 |
+
const minOverlap = 2;
|
| 544 |
+
const maxOverlap = Math.min(prevText.length, newText.length, 20);
|
| 545 |
+
|
| 546 |
+
for (let len = maxOverlap; len >= minOverlap; len--) {
|
| 547 |
+
const prevEnd = prevText.slice(-len);
|
| 548 |
+
if (newText.startsWith(prevEnd)) {
|
| 549 |
+
return newText.slice(len);
|
| 550 |
+
}
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
return newText;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
async function processAudioWindow() {
|
| 557 |
+
if (!model || !processor || !tokenizer || !isRecording) return;
|
| 558 |
+
if (isProcessing) return;
|
| 559 |
+
|
| 560 |
+
isProcessing = true;
|
| 561 |
+
|
| 562 |
+
try {
|
| 563 |
+
const windowSamples = WINDOW_SEC * SAMPLE_RATE;
|
| 564 |
+
|
| 565 |
+
if (audioBuffer.length < windowSamples * 0.5) {
|
| 566 |
+
currentTextEl.textContent = '(音声を収集中...)';
|
| 567 |
+
return;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
const startIdx = Math.max(0, audioBuffer.length - windowSamples);
|
| 571 |
+
const audioData = new Float32Array(audioBuffer.slice(startIdx));
|
| 572 |
+
|
| 573 |
+
currentTextEl.textContent = '処理中...';
|
| 574 |
+
|
| 575 |
+
const text = await transcribe(audioData);
|
| 576 |
+
|
| 577 |
+
if (text) {
|
| 578 |
+
const uniqueText = removeDuplicateText(lastTranscript, text);
|
| 579 |
+
|
| 580 |
+
if (uniqueText && uniqueText.length > 0) {
|
| 581 |
+
currentTextEl.textContent = text;
|
| 582 |
+
transcriptEl.textContent += uniqueText;
|
| 583 |
+
lastTranscript = text;
|
| 584 |
+
}
|
| 585 |
+
} else {
|
| 586 |
+
currentTextEl.textContent = '(音声が検出されませんでした)';
|
| 587 |
+
}
|
| 588 |
+
} catch (error) {
|
| 589 |
+
currentTextEl.textContent = `エラー: ${error.message}`;
|
| 590 |
+
} finally {
|
| 591 |
+
isProcessing = false;
|
| 592 |
+
}
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
async function startOverlapRecording() {
|
| 596 |
+
try {
|
| 597 |
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
| 598 |
+
audio: { channelCount: 1, sampleRate: SAMPLE_RATE }
|
| 599 |
+
});
|
| 600 |
+
|
| 601 |
+
audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
| 602 |
+
sampleRate: SAMPLE_RATE
|
| 603 |
+
});
|
| 604 |
+
|
| 605 |
+
const source = audioContext.createMediaStreamSource(mediaStream);
|
| 606 |
+
|
| 607 |
+
const bufferSize = 4096;
|
| 608 |
+
scriptProcessor = audioContext.createScriptProcessor(bufferSize, 1, 1);
|
| 609 |
+
|
| 610 |
+
const maxBufferSize = SAMPLE_RATE * 10;
|
| 611 |
+
|
| 612 |
+
scriptProcessor.onaudioprocess = (e) => {
|
| 613 |
+
if (!isRecording) return;
|
| 614 |
+
|
| 615 |
+
const inputData = e.inputBuffer.getChannelData(0);
|
| 616 |
+
|
| 617 |
+
for (let i = 0; i < inputData.length; i++) {
|
| 618 |
+
audioBuffer.push(inputData[i]);
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
while (audioBuffer.length > maxBufferSize) {
|
| 622 |
+
audioBuffer.shift();
|
| 623 |
+
}
|
| 624 |
+
};
|
| 625 |
+
|
| 626 |
+
source.connect(scriptProcessor);
|
| 627 |
+
scriptProcessor.connect(audioContext.destination);
|
| 628 |
+
|
| 629 |
+
audioBuffer = [];
|
| 630 |
+
lastTranscript = '';
|
| 631 |
+
isRecording = true;
|
| 632 |
+
isProcessing = false;
|
| 633 |
+
|
| 634 |
+
const intervalMs = parseFloat(intervalSlider.value) * 1000;
|
| 635 |
+
processTimer = setInterval(processAudioWindow, intervalMs);
|
| 636 |
+
|
| 637 |
+
updateRecordingUI();
|
| 638 |
+
} catch (error) {
|
| 639 |
+
statusEl.textContent = `マイクエラー: ${error.message}`;
|
| 640 |
+
statusEl.className = 'status error';
|
| 641 |
+
}
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
function stopOverlapRecording() {
|
| 645 |
+
isRecording = false;
|
| 646 |
+
|
| 647 |
+
if (processTimer) {
|
| 648 |
+
clearInterval(processTimer);
|
| 649 |
+
processTimer = null;
|
| 650 |
+
}
|
| 651 |
+
|
| 652 |
+
if (scriptProcessor) {
|
| 653 |
+
scriptProcessor.disconnect();
|
| 654 |
+
scriptProcessor = null;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
if (audioContext) {
|
| 658 |
+
audioContext.close();
|
| 659 |
+
audioContext = null;
|
| 660 |
+
}
|
| 661 |
+
|
| 662 |
+
if (mediaStream) {
|
| 663 |
+
mediaStream.getTracks().forEach(track => track.stop());
|
| 664 |
+
mediaStream = null;
|
| 665 |
+
}
|
| 666 |
+
|
| 667 |
+
audioBuffer = [];
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
// ============ UI Helpers ============
|
| 671 |
+
function updateRecordingUI() {
|
| 672 |
+
statusEl.textContent = '録音中... マイクに向かって話してください';
|
| 673 |
+
statusEl.className = 'status recording';
|
| 674 |
+
startBtn.textContent = '録音停止';
|
| 675 |
+
startBtn.classList.add('recording');
|
| 676 |
+
modeChunkBtn.disabled = true;
|
| 677 |
+
modeOverlapBtn.disabled = true;
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
function updateStoppedUI() {
|
| 681 |
statusEl.textContent = '録音停止。再開するにはボタンをクリック';
|
| 682 |
statusEl.className = 'status ready';
|
| 683 |
startBtn.textContent = '録音開始';
|
| 684 |
startBtn.classList.remove('recording');
|
| 685 |
currentTextEl.textContent = '';
|
| 686 |
+
modeChunkBtn.disabled = false;
|
| 687 |
+
modeOverlapBtn.disabled = false;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
// ============ Main Controls ============
|
| 691 |
+
function startRecording() {
|
| 692 |
+
if (currentMode === 'chunk') {
|
| 693 |
+
startChunkRecording();
|
| 694 |
+
} else {
|
| 695 |
+
startOverlapRecording();
|
| 696 |
+
}
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
function stopRecording() {
|
| 700 |
+
if (currentMode === 'chunk') {
|
| 701 |
+
stopChunkRecording();
|
| 702 |
+
} else {
|
| 703 |
+
stopOverlapRecording();
|
| 704 |
+
}
|
| 705 |
+
updateStoppedUI();
|
| 706 |
}
|
| 707 |
|
| 708 |
startBtn.addEventListener('click', () => {
|