Spaces:
Runtime error
Runtime error
| import sys | |
| from pathlib import Path | |
| from typing import Annotated, Tuple | |
| from pydantic import BaseModel, PlainValidator | |
| from pydantic_settings import BaseSettings, SettingsConfigDict | |
| def _validate_page_range(v: Tuple[int, int]) -> Tuple[int, int]: | |
| if v[0] < 1 or v[1] < v[0]: | |
| raise ValueError( | |
| "Invalid page range: start must be ≥ 1 and end must be ≥ start." | |
| ) | |
| return v | |
| PageRange = Annotated[Tuple[int, int], PlainValidator(_validate_page_range)] | |
| DEFAULT_PAGE_RANGE: PageRange = (1, sys.maxsize) | |
| class DocumentLimits(BaseModel): | |
| max_num_pages: int = sys.maxsize | |
| max_file_size: int = sys.maxsize | |
| page_range: PageRange = DEFAULT_PAGE_RANGE | |
| class BatchConcurrencySettings(BaseModel): | |
| doc_batch_size: int = 2 | |
| doc_batch_concurrency: int = 2 | |
| page_batch_size: int = 4 | |
| page_batch_concurrency: int = 2 | |
| elements_batch_size: int = 16 | |
| # doc_batch_size: int = 1 | |
| # doc_batch_concurrency: int = 1 | |
| # page_batch_size: int = 1 | |
| # page_batch_concurrency: int = 1 | |
| # model_concurrency: int = 2 | |
| # To force models into single core: export OMP_NUM_THREADS=1 | |
| class DebugSettings(BaseModel): | |
| visualize_cells: bool = False | |
| visualize_ocr: bool = False | |
| visualize_layout: bool = False | |
| visualize_raw_layout: bool = False | |
| visualize_tables: bool = False | |
| profile_pipeline_timings: bool = False | |
| # Path used to output debug information. | |
| debug_output_path: str = str(Path.cwd() / "debug") | |
| class AppSettings(BaseSettings): | |
| model_config = SettingsConfigDict(env_prefix="DOCLING_", env_nested_delimiter="_") | |
| perf: BatchConcurrencySettings | |
| debug: DebugSettings | |
| cache_dir: Path = Path.home() / ".cache" / "docling" | |
| settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings()) | |