|
|
|
|
|
|
|
|
import {pipeline} from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class OnDeviceService { |
|
|
constructor({modelName = '', quantization = 'fp32'} = {}) { |
|
|
this.modelName = modelName; |
|
|
this.modelQuantization = quantization; |
|
|
this._ready = false; |
|
|
this._model = null; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async load(progressCb) { |
|
|
console.log(`β¬οΈ Download Model '${this.modelName}'...`); |
|
|
|
|
|
const defaultProgressCb = (progress) => { |
|
|
if (progress && typeof progress === 'object') { |
|
|
if (progress.status) { |
|
|
console.log(`[Model Loading] ${progress.status}`); |
|
|
} |
|
|
if (progress.loaded && progress.total) { |
|
|
const percent = ((progress.loaded / progress.total) * 100).toFixed(1); |
|
|
console.log(`[Model Loading] ${percent}% (${progress.loaded}/${progress.total} bytes)`); |
|
|
} |
|
|
} else { |
|
|
console.log(`[Model Loading] Progress:`, progress); |
|
|
} |
|
|
}; |
|
|
|
|
|
this._model = await pipeline('text-generation', this.modelName, { |
|
|
progress_callback: progressCb || defaultProgressCb, |
|
|
device: 'webgpu', |
|
|
dtype: this.modelQuantization, |
|
|
}); |
|
|
console.log(`β
Model '${this.modelName}' loaded and ready.`); |
|
|
this._ready = true; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
isReady() { |
|
|
return this._ready; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async infer(prompt, {maxNewTokens = 50} = {}) { |
|
|
if (!this._ready || !this._model) { |
|
|
console.log("model not ready:", this._ready, this._model); |
|
|
throw new Error('Model not loaded. Call load() first.'); |
|
|
} |
|
|
console.log("π Running inference on-device for prompt:\n", prompt); |
|
|
|
|
|
const messages = [ |
|
|
{ role: "user", content: prompt }, |
|
|
]; |
|
|
|
|
|
const output = await this._model(messages, { |
|
|
max_new_tokens: maxNewTokens, |
|
|
temperature: 0.2, |
|
|
}); |
|
|
|
|
|
console.log("β
Completed inference on-device for prompt:\n", prompt); |
|
|
|
|
|
|
|
|
const generated_output = output[0]?.generated_text; |
|
|
const text = generated_output[generated_output.length - 1]?.content.trim() || ''; |
|
|
|
|
|
|
|
|
return {answer: text, stats: {input_tokens: undefined, output_tokens: undefined}}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
updateConfig({modelName, quantization} = {}) { |
|
|
if (modelName) this.modelName = modelName; |
|
|
if (quantization) this.modelQuantization = quantization; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getModelName(){ |
|
|
return this.modelName; |
|
|
} |
|
|
} |