metadata
tags:
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:7059200
- loss:MultipleNegativesRankingLoss
base_model: Shuu12121/CodeModernBERT-Owl-3.0
widget:
- source_sentence: >-
The maximum value of the slider. (default 0) <P>
@return Returns the value of the attribute, or 0, if it hasn't been set by
the JSF file.
sentences:
- |-
@Override
public UpdateSmsChannelResult updateSmsChannel(UpdateSmsChannelRequest request) {
request = beforeClientExecution(request);
return executeUpdateSmsChannel(request);
}
- |-
async function isValidOrigin(origin, sourceOrigin) {
// This will fetch the caches from https://cdn.ampproject.org/caches.json the first time it's
// called. Subsequent calls will receive a cached version.
const officialCacheList = await caches.list();
// Calculate the cache specific origin
const cacheSubdomain = `https://${await createCacheSubdomain(sourceOrigin)}.`;
// Check all caches listed on ampproject.org
for (const cache of officialCacheList) {
const cachedOrigin = cacheSubdomain + cache.cacheDomain;
if (origin === cachedOrigin) {
return true;
}
}
return false;
}
- "public java.lang.Object getMin() {\n\t\treturn (java.lang.Object) getStateHelper().eval(PropertyKeys.min, 0);\n\t}"
- source_sentence: >-
The Method from the Date.getMinutes is deprecated. This is a
helper-Method.
@param date
The Date-object to get the minutes.
@return The minutes from the Date-object.
sentences:
- "public static int getMinutes(final Date date)\n\t{\n\t\tfinal Calendar calendar = Calendar.getInstance();\n\t\tcalendar.setTime(date);\n\t\treturn calendar.get(Calendar.MINUTE);\n\t}"
- "func (opts BeeOptions) Bind(name string, dst interface{}) error {\n\tv := opts.Value(name)\n\tif v == nil {\n\t\treturn errors.New(\"Option with name \" + name + \" not found\")\n\t}\n\n\treturn ConvertValue(v, dst)\n}"
- >-
public function createFor(Customer $customer, array $options = [], array
$filters = [])
{
$this->parentId = $customer->id;
return parent::rest_create($options, $filters);
}
- source_sentence: |-
Return a list of all dates from 11/12/2015 to the present.
Args:
boo: if true, list contains Numbers (20151230); if false, list contains Strings ("2015-12-30")
Returns:
list of either Numbers or Strings
sentences:
- |-
def all_days(boo):
earliest = datetime.strptime(('2015-11-12').replace('-', ' '), '%Y %m %d')
latest = datetime.strptime(datetime.today().date().isoformat().replace('-', ' '), '%Y %m %d')
num_days = (latest - earliest).days + 1
all_days = [latest - timedelta(days=x) for x in range(num_days)]
all_days.reverse()
output = []
if boo:
# Return as Integer, yyyymmdd
for d in all_days:
output.append(int(str(d).replace('-', '')[:8]))
else:
# Return as String, yyyy-mm-dd
for d in all_days:
output.append(str(d)[:10])
return output
- "public void setColSize3(Integer newColSize3) {\n\t\tInteger oldColSize3 = colSize3;\n\t\tcolSize3 = newColSize3;\n\t\tif (eNotificationRequired())\n\t\t\teNotify(new ENotificationImpl(this, Notification.SET, AfplibPackage.COLOR_SPECIFICATION__COL_SIZE3, oldColSize3, colSize3));\n\t}"
- >-
public function
deleteCompanyBusinessUnitStoreAddress(CompanyBusinessUnitStoreAddressTransfer
$companyBusinessUnitStoreAddressTransfer): void
{
$this->getFactory()
->createFosCompanyBusinessUnitStoreAddressQuery()
->findOneByIdCompanyBusinessUnitStoreAddress($companyBusinessUnitStoreAddressTransfer->getIdCompanyBusinessUnitStoreAddress())
->delete();
}
- source_sentence: |-
Returns array of basket oxarticle objects
@return array
sentences:
- |-
public function visit(NodeVisitorInterface $visitor)
{
foreach ($this->children as $child)
{
$child->visit($visitor);
}
}
- "func GetColDefaultValue(ctx sessionctx.Context, col *model.ColumnInfo) (types.Datum, error) {\n\treturn getColDefaultValue(ctx, col, col.GetDefaultValue())\n}"
- |-
public function getBasketArticles()
{
$aBasketArticles = [];
/** @var \oxBasketItem $oBasketItem */
foreach ($this->_aBasketContents as $sItemKey => $oBasketItem) {
try {
$oProduct = $oBasketItem->getArticle(true);
if (\OxidEsales\Eshop\Core\Registry::getConfig()->getConfigParam('bl_perfLoadSelectLists')) {
// marking chosen select list
$aSelList = $oBasketItem->getSelList();
if (is_array($aSelList) && ($aSelectlist = $oProduct->getSelectLists($sItemKey))) {
reset($aSelList);
foreach ($aSelList as $conkey => $iSel) {
$aSelectlist[$conkey][$iSel]->selected = 1;
}
$oProduct->setSelectlist($aSelectlist);
}
}
} catch (\OxidEsales\Eshop\Core\Exception\NoArticleException $oEx) {
\OxidEsales\Eshop\Core\Registry::getUtilsView()->addErrorToDisplay($oEx);
$this->removeItem($sItemKey);
$this->calculateBasket(true);
continue;
} catch (\OxidEsales\Eshop\Core\Exception\ArticleInputException $oEx) {
\OxidEsales\Eshop\Core\Registry::getUtilsView()->addErrorToDisplay($oEx);
$this->removeItem($sItemKey);
$this->calculateBasket(true);
continue;
}
$aBasketArticles[$sItemKey] = $oProduct;
}
return $aBasketArticles;
}
- source_sentence: get test root
sentences:
- |-
@CheckReturnValue
@SchedulerSupport(SchedulerSupport.NONE)
public final Maybe<T> doOnDispose(Action onDispose) {
return RxJavaPlugins.onAssembly(new MaybePeek<T>(this,
Functions.emptyConsumer(), // onSubscribe
Functions.emptyConsumer(), // onSuccess
Functions.emptyConsumer(), // onError
Functions.EMPTY_ACTION, // onComplete
Functions.EMPTY_ACTION, // (onSuccess | onError | onComplete) after
ObjectHelper.requireNonNull(onDispose, "onDispose is null")
));
}
- >-
protected Object parseKeyElement(Element keyEle, BeanDefinition bd,
String defaultKeyTypeName) {
NodeList nl = keyEle.getChildNodes();
Element subElement = null;
for (int i = 0; i < nl.getLength(); i++) {
Node node = nl.item(i);
if (node instanceof Element) {
// Child element is what we're looking for.
if (subElement != null)
error("<key> element must not contain more than one value sub-element", keyEle);
else subElement = (Element) node;
}
}
return parsePropertySubElement(subElement, bd, defaultKeyTypeName);
}
- |-
function getRootPath(){
var rootPath = path.resolve('.');
while(rootPath){
if(fs.existsSync(rootPath + '/config.json')){
break;
}
rootPath = rootPath.substring(0, rootPath.lastIndexOf(path.sep));
}
return rootPath;
}
pipeline_tag: sentence-similarity
library_name: sentence-transformers
datasets:
- code-search-net/code_search_net
- Shuu12121/python-codesearch-dedupe-filtered-v4
- Shuu12121/javascript-codesearch-dedupe-filtered-v4
- Shuu12121/java-codesearch-dedupe-filtered-v4
- Shuu12121/typescript-codesearch-dedupe-filtered-v4
- Shuu12121/php-codesearch-dedupe-filtered-v4
- Shuu12121/go-codesearch-dedupe-filtered-v4
- Shuu12121/ruby-codesearch-dedupe-filtered-v4
- Shuu12121/rust-codesearch-dedupe-filtered-v4
license: apache-2.0
language:
- en
🦉 CodeModernBERT‑Owl 3.0 SentenceTransformer
多言語・長文コードを対象としたエンコーダ CodeModernBERT‑Owl 3.0 をベースに Sentence Transformer(STS)形式で微調整したモデルです。1024 token までのソースコード/自然言語を 768 次元の密ベクトルに写像し、コード検索・類似度計算・クラスタリングなど幅広い下流タスクに活用できます。
A multilingual, long‑context SentenceTransformer fine‑tuned from CodeModernBERT‑Owl 3.0. It encodes code and natural‑language snippets (≤ 1024 tokens) into 768‑dimensional vectors for semantic search, similarity, clustering, and more.
🔥 ハイライト / Highlights
| ⚙️ 仕様 | 値 |
|---|---|
| 最大シーケンス長 | 1024 tokens |
| 埋め込み次元 | 768 d │ Cosine Similarity |
| プーリング | CLS トークン(pooling_mode_cls_token = True) |
| 学習データ | 7,059,200 正例ペア(CodeSearchNet + 自作データセット) |
| ロス関数 | MultipleNegativesRankingLoss (scale = 20.0) |
| 学習エポック | 3 epochs (@ batch size 200, fp16) |
| 基盤モデル | Shuu12121/CodeModernBERT‑Owl 3.0 |
📊 評価結果 / Evaluation
MTEB CodeSearchNet (CSN) ―
| Metric | COIR Version | CSN |
|---|---|---|
| Main Score (NDCG@10) | 0.8023 | 0.8928 |
| NDCG@1 | 0.7175 | 0.8125 |
| NDCG@3 | 0.7795 | 0.8798 |
| NDCG@5 | 0.7917 | 0.8879 |
| NDCG@20 | 0.8085 | 0.8950 |
| MAP@10 | 0.7759 | 0.8707 |
| Recall@10 | 0.8839 | 0.9593 |
| MRR@10 | 0.7759 | 0.8707 |
どちらも公式スコアに提出しているCodeSearch-ModernBERT-Crow-Plusと同等以上の成績を残しています.
🚀 使い方 / Quick Start
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer("Shuu12121/CodeModernBERT-Owl-3.0-ST")
queries = ["get test root"]
docs = [
"function getRootPath(){ … }",
"protected Object parseKeyElement(Element keyEle, …)",
]
q_emb = model.encode(queries, normalize_embeddings=True)
d_emb = model.encode(docs, normalize_embeddings=True)
scores = util.cos_sim(q_emb, d_emb)
print(scores)
🛠️ ファインチューニング / Fine‑tuning
ロス関数:
MultipleNegativesRankingLossはミニバッチ内のネガティブを暗黙的に構成するため大規模ペア生成が不要。ハイパーパラメータ (主要):
learning_rate: 5e‑5 per_device_train_batch_size: 200 fp16: true warmup_ratio: 0.0 max_grad_norm: 1.0
⚖️ ライセンス / License
Apache 2.0