Commit
·
f9987db
1
Parent(s):
4aeaa3b
Add language_threshold_percent parameter to predict_rows function
Browse files
main.py
CHANGED
|
@@ -251,6 +251,7 @@ async def predict_language(
|
|
| 251 |
int, Query(title="Max number of requests to datasets server", gt=0, le=30)
|
| 252 |
] = 10,
|
| 253 |
number_of_rows: int = 1000,
|
|
|
|
| 254 |
) -> dict[Any, Any] | None:
|
| 255 |
is_valid = datasets_server_valid_rows(hub_id)
|
| 256 |
if not is_valid:
|
|
@@ -289,7 +290,11 @@ async def predict_language(
|
|
| 289 |
split,
|
| 290 |
)
|
| 291 |
logger.info(f"Predicting language for {len(random_rows)} rows")
|
| 292 |
-
predictions = predict_rows(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
predictions["hub_id"] = hub_id
|
| 294 |
predictions["config"] = config
|
| 295 |
predictions["split"] = split
|
|
|
|
| 251 |
int, Query(title="Max number of requests to datasets server", gt=0, le=30)
|
| 252 |
] = 10,
|
| 253 |
number_of_rows: int = 1000,
|
| 254 |
+
language_threshold_percent: float = 0.2,
|
| 255 |
) -> dict[Any, Any] | None:
|
| 256 |
is_valid = datasets_server_valid_rows(hub_id)
|
| 257 |
if not is_valid:
|
|
|
|
| 290 |
split,
|
| 291 |
)
|
| 292 |
logger.info(f"Predicting language for {len(random_rows)} rows")
|
| 293 |
+
predictions = predict_rows(
|
| 294 |
+
random_rows,
|
| 295 |
+
target_column,
|
| 296 |
+
language_threshold_percent=language_threshold_percent,
|
| 297 |
+
)
|
| 298 |
predictions["hub_id"] = hub_id
|
| 299 |
predictions["config"] = config
|
| 300 |
predictions["split"] = split
|