onnx quantized file for arm64
#7
by
wkpark
- opened
# under mac osx m1
from sentence_transformers import SentenceTransformer, export_dynamic_quantized_onnx_model
model_id = "dragonkue/BGE-m3-ko"
#output_dir = "models/BGE-m3-ko" # locally saved out_dir.
#onnx_model = SentenceTransformer(model_id)
#onnx_model.save(output_dir)
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={'provider':'CPUExecutionProvider'})
#onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"file_name": "onnx/model_qint8_arm64.onnx", 'provider':'CPUExecutionProvider'})
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config="arm64",
model_name_or_path="models/BGE-m3-ko",
)
you can get onnx quantized model named model_qint8_arm64.onnx in the models/KURE-v1/onnx/
wkpark
changed pull request status to
open