{"$defs": {"AuthConfig": {"properties": {"key_prefix": {"default": "lmos", "title": "Key Prefix", "type": "string"}}, "title": "AuthConfig", "type": "object"}, "ExllamaV2Runner": {"description": "ExllamaV2 runner config", "properties": {"name": {"description": "Name of the service", "title": "Name", "type": "string"}, "alias": {"description": "List of aliases for the service", "items": {"type": "string"}, "title": "Alias", "type": "array"}, "model": {"description": "Path to the model folder or HF repository", "title": "Model", "type": "string"}, "type": {"const": "exl2", "enum": ["exl2"], "title": "Type", "type": "string"}}, "required": ["name", "model", "type"], "title": "ExllamaV2Runner", "type": "object"}, "ExternalService": {"description": "Config for external services", "properties": {"name": {"description": "Name of the service", "title": "Name", "type": "string"}, "alias": {"description": "List of aliases for the service", "items": {"type": "string"}, "title": "Alias", "type": "array"}, "type": {"const": "external", "enum": ["external"], "title": "Type", "type": "string"}, "api_key": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "description": "Optional API key for the service", "title": "Api Key"}, "endpoint": {"description": "Optional endpoint URL for the service", "format": "uri", "minLength": 1, "title": "Endpoint", "type": "string"}}, "required": ["name", "type", "endpoint"], "title": "ExternalService", "type": "object"}, "FasterWhisper": {"description": "FasterWhisper runner config", "properties": {"name": {"description": "Name of the service", "title": "Name", "type": "string"}, "alias": {"description": "List of aliases for the service", "items": {"type": "string"}, "title": "Alias", "type": "array"}, "model": {"description": "Path to the model folder or HF repository", "title": "Model", "type": "string"}, "type": {"const": "fasterwhisper", "enum": ["fasterwhisper"], "title": "Type", "type": "string"}}, "required": ["name", "model", "type"], "title": "FasterWhisper", "type": "object"}, "InternalConfiguration": {"description": "Config for services LMOS depends on.", "properties": {"redis": {"$ref": "#/$defs/RedisConfig", "description": "Redis Connection Configuration"}, "database": {"$ref": "#/$defs/RelationalDatabaseConfig", "description": "Database Connection Configuration"}}, "required": ["redis", "database"], "title": "InternalConfiguration", "type": "object"}, "RedisConfig": {"description": "Redis Configuration. This tells LMOS how to connect to your redis instance.", "properties": {"url": {"description": "Redis Connection URL. This Should start with `redis://`", "format": "uri", "minLength": 1, "title": "Url", "type": "string"}}, "required": ["url"], "title": "RedisConfig", "type": "object"}, "RelationalDatabaseConfig": {"description": "Database Configuration. This tells LMOS how to connect to your relational database.\n[Engine Configuration](https://docs.sqlalchemy.org/en/20/core/engines.html)", "properties": {"url": {"description": "Database Connection URL. This should be any SQLAlchemy support url or similar", "format": "uri", "minLength": 1, "title": "Url", "type": "string"}}, "required": ["url"], "title": "RelationalDatabaseConfig", "type": "object"}, "RouterConfig": {"description": "Configuration for the LMOS Router which delegates requests to the appropriate service.\n\n`log_request_dump_max_queue_size` and `log_request_dump_queue_timeout` are used to determine when to insert usage logs into the RDB.\nLMOS Router will queue usage logs internally to prevent delays in the request response cycle.\n\nIt is recommended that you set the `log_request_dump_max_queue_size` such that it frequently triggers prior to reaching the the time limit specified by `log_request_dump_queue_timeout`.", "properties": {"log_request_dump_max_queue_size": {"default": 1000, "description": "In number of entries: Threshold for worker to insert usage logs into RDB", "title": "Log Request Dump Max Queue Size", "type": "integer"}, "log_request_dump_queue_timeout": {"default": 1000, "description": "In Seconds: The max time between worker inserting into RDB", "title": "Log Request Dump Queue Timeout", "type": "integer"}}, "title": "RouterConfig", "type": "object"}, "Services": {"description": "Configuration for all services.", "properties": {"router": {"$ref": "#/$defs/RouterConfig", "description": "Configuration for the LMOS Router"}, "llm_runner": {"anyOf": [{"items": {"discriminator": {"mapping": {"exl2": "#/$defs/ExllamaV2Runner", "external": "#/$defs/ExternalService", "sglang": "#/$defs/SglangRunner", "vllm": "#/$defs/vLLMRunner"}, "propertyName": "type"}, "oneOf": [{"$ref": "#/$defs/ExllamaV2Runner"}, {"$ref": "#/$defs/vLLMRunner"}, {"$ref": "#/$defs/SglangRunner"}, {"$ref": "#/$defs/ExternalService"}]}, "type": "array"}, {"type": "null"}], "description": "List of LLM runner services", "title": "Llm Runner"}, "stt_runner": {"anyOf": [{"items": {"discriminator": {"mapping": {"external": "#/$defs/ExternalService", "fasterwhisper": "#/$defs/FasterWhisper"}, "propertyName": "type"}, "oneOf": [{"$ref": "#/$defs/FasterWhisper"}, {"$ref": "#/$defs/ExternalService"}]}, "type": "array"}, {"type": "null"}], "description": "List of STT runner services", "title": "Stt Runner"}, "tts_runner": {"anyOf": [{"items": {"discriminator": {"mapping": {"external": "#/$defs/ExternalService"}, "propertyName": "type"}, "oneOf": [{"$ref": "#/$defs/ExternalService"}]}, "type": "array"}, {"type": "null"}], "description": "List of TTS runner services", "title": "Tts Runner"}, "rerank_runner": {"anyOf": [{"items": {"discriminator": {"mapping": {"external": "#/$defs/ExternalService"}, "propertyName": "type"}, "oneOf": [{"$ref": "#/$defs/ExternalService"}]}, "type": "array"}, {"type": "null"}], "description": "List of re-ranker services", "title": "Rerank Runner"}}, "title": "Services", "type": "object"}, "SglangRunner": {"description": "sglang runner config", "properties": {"tokenizer_path": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer Path"}, "host": {"default": "127.0.0.1", "title": "Host", "type": "string"}, "port": {"default": 8000, "title": "Port", "type": "integer"}, "tokenizer_mode": {"default": "auto", "title": "Tokenizer Mode", "type": "string"}, "skip_tokenizer_init": {"default": false, "title": "Skip Tokenizer Init", "type": "boolean"}, "load_format": {"default": "auto", "title": "Load Format", "type": "string"}, "trust_remote_code": {"default": false, "title": "Trust Remote Code", "type": "boolean"}, "dtype": {"default": "auto", "title": "Dtype", "type": "string"}, "kv_cache_dtype": {"default": "auto", "title": "Kv Cache Dtype", "type": "string"}, "quantization": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Quantization"}, "context_length": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Context Length"}, "device": {"default": "cuda", "title": "Device", "type": "string"}, "chat_template": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Chat Template"}, "is_embedding": {"default": false, "title": "Is Embedding", "type": "boolean"}, "mem_fraction_static": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Mem Fraction Static"}, "max_running_requests": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Running Requests"}, "max_total_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Total Tokens"}, "chunked_prefill_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Chunked Prefill Size"}, "max_prefill_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Prefill Tokens"}, "schedule_policy": {"default": "lpm", "title": "Schedule Policy", "type": "string"}, "schedule_conservativeness": {"default": 1.0, "title": "Schedule Conservativeness", "type": "number"}, "tp_size": {"default": 1, "title": "Tp Size", "type": "integer"}, "stream_interval": {"default": 1, "title": "Stream Interval", "type": "integer"}, "random_seed": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Random Seed"}, "watchdog_timeout": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Watchdog Timeout"}, "log_level": {"default": "info", "title": "Log Level", "type": "string"}, "log_level_http": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Log Level Http"}, "log_requests": {"default": false, "title": "Log Requests", "type": "boolean"}, "show_time_cost": {"default": false, "title": "Show Time Cost", "type": "boolean"}, "enable_metrics": {"default": false, "title": "Enable Metrics", "type": "boolean"}, "decode_log_interval": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Decode Log Interval"}, "api_key": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Api Key"}, "file_storage_pth": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "File Storage Pth"}, "enable_cache_report": {"default": false, "title": "Enable Cache Report", "type": "boolean"}, "dp_size": {"default": 1, "title": "Dp Size", "type": "integer"}, "load_balance_method": {"default": "round_robin", "title": "Load Balance Method", "type": "string"}, "dist_init_addr": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Dist Init Addr"}, "nnodes": {"default": 1, "title": "Nnodes", "type": "integer"}, "node_rank": {"default": 0, "title": "Node Rank", "type": "integer"}, "json_model_override_args": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Json Model Override Args"}, "enable_double_sparsity": {"default": false, "title": "Enable Double Sparsity", "type": "boolean"}, "ds_channel_config_path": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Ds Channel Config Path"}, "ds_heavy_channel_num": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Ds Heavy Channel Num"}, "ds_heavy_token_num": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Ds Heavy Token Num"}, "ds_heavy_channel_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Ds Heavy Channel Type"}, "ds_sparse_decode_threshold": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Ds Sparse Decode Threshold"}, "lora_paths": {"anyOf": [{"items": {"type": "string"}, "type": "array"}, {"type": "null"}], "default": null, "title": "Lora Paths"}, "max_loras_per_batch": {"default": 8, "title": "Max Loras Per Batch", "type": "integer"}, "attention_backend": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Attention Backend"}, "sampling_backend": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Sampling Backend"}, "grammar_backend": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Grammar Backend"}, "disable_flashinfer": {"default": false, "title": "Disable Flashinfer", "type": "boolean"}, "disable_flashinfer_sampling": {"default": false, "title": "Disable Flashinfer Sampling", "type": "boolean"}, "disable_radix_cache": {"default": false, "title": "Disable Radix Cache", "type": "boolean"}, "disable_jump_forward": {"default": false, "title": "Disable Jump Forward", "type": "boolean"}, "disable_cuda_graph": {"default": false, "title": "Disable Cuda Graph", "type": "boolean"}, "disable_cuda_graph_padding": {"default": false, "title": "Disable Cuda Graph Padding", "type": "boolean"}, "disable_disk_cache": {"default": false, "title": "Disable Disk Cache", "type": "boolean"}, "disable_custom_all_reduce": {"default": false, "title": "Disable Custom All Reduce", "type": "boolean"}, "disable_mla": {"default": false, "title": "Disable Mla", "type": "boolean"}, "disable_penalizer": {"default": false, "title": "Disable Penalizer", "type": "boolean"}, "disable_nan_detection": {"default": false, "title": "Disable Nan Detection", "type": "boolean"}, "enable_overlap_schedule": {"default": false, "title": "Enable Overlap Schedule", "type": "boolean"}, "enable_mixed_chunk": {"default": false, "title": "Enable Mixed Chunk", "type": "boolean"}, "enable_torch_compile": {"default": false, "title": "Enable Torch Compile", "type": "boolean"}, "torch_compile_max_bs": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Torch Compile Max Bs"}, "cuda_graph_max_bs": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Cuda Graph Max Bs"}, "torchao_config": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Torchao Config"}, "enable_p2p_check": {"default": false, "title": "Enable P2P Check", "type": "boolean"}, "triton_attention_reduce_in_fp32": {"default": false, "title": "Triton Attention Reduce In Fp32", "type": "boolean"}, "num_continuous_decode_steps": {"default": 1, "title": "Num Continuous Decode Steps", "type": "integer"}, "delete_ckpt_after_loading": {"default": false, "title": "Delete Ckpt After Loading", "type": "boolean"}, "name": {"description": "Name of the service", "title": "Name", "type": "string"}, "alias": {"description": "List of aliases for the service", "items": {"type": "string"}, "title": "Alias", "type": "array"}, "model": {"description": "Path to the model folder or HF repository", "title": "Model", "type": "string"}, "type": {"const": "sglang", "enum": ["sglang"], "title": "Type", "type": "string"}}, "required": ["name", "model", "type"], "title": "SglangRunner", "type": "object"}, "vLLMRunner": {"description": "vLLM runner config", "properties": {"tokenizer": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer"}, "skip_tokenizer_init": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Skip Tokenizer Init"}, "revision": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Revision"}, "code_revision": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Code Revision"}, "tokenizer_revision": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer Revision"}, "tokenizer_mode": {"anyOf": [{"enum": ["auto", "slow", "mistral"], "type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer Mode"}, "trust_remote_code": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Trust Remote Code"}, "download_dir": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Download Dir"}, "load_format": {"anyOf": [{"enum": ["auto", "pt", "safetensors", "npcache", "dummy", "tensorizer", "sharded_state", "gguf", "bitsandbytes", "mistral"], "type": "string"}, {"type": "null"}], "default": null, "title": "Load Format"}, "config_format": {"anyOf": [{"enum": ["auto", "hf", "mistral"], "type": "string"}, {"type": "null"}], "default": null, "title": "Config Format"}, "dtype": {"anyOf": [{"enum": ["auto", "half", "float16", "bfloat16", "float", "float32"], "type": "string"}, {"type": "null"}], "default": null, "title": "Dtype"}, "kv_cache_dtype": {"anyOf": [{"enum": ["auto", "fp8", "fp8_e5m2", "fp8_e4m3"], "type": "string"}, {"type": "null"}], "default": null, "title": "Kv Cache Dtype"}, "quantization_param_path": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Quantization Param Path"}, "max_model_len": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Model Len"}, "guided_decoding_backend": {"anyOf": [{"enum": ["outlines", "lm-format-enforcer"], "type": "string"}, {"type": "null"}], "default": null, "title": "Guided Decoding Backend"}, "distributed_executor_backend": {"anyOf": [{"enum": ["ray", "mp"], "type": "string"}, {"type": "null"}], "default": null, "title": "Distributed Executor Backend"}, "worker_use_ray": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Worker Use Ray"}, "pipeline_parallel_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Pipeline Parallel Size"}, "tensor_parallel_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Tensor Parallel Size"}, "max_parallel_loading_workers": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Parallel Loading Workers"}, "ray_workers_use_nsight": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Ray Workers Use Nsight"}, "block_size": {"anyOf": [{"enum": [8, 16, 32], "type": "integer"}, {"type": "null"}], "default": null, "title": "Block Size"}, "enable_prefix_caching": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Enable Prefix Caching"}, "disable_sliding_window": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Disable Sliding Window"}, "use_v2_block_manager": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Use V2 Block Manager"}, "num_lookahead_slots": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Num Lookahead Slots"}, "seed": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Seed"}, "swap_space": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Swap Space"}, "cpu_offload_gb": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Cpu Offload Gb"}, "gpu_memory_utilization": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Gpu Memory Utilization"}, "num_gpu_blocks_override": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Num Gpu Blocks Override"}, "max_num_batched_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Num Batched Tokens"}, "max_num_seqs": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Num Seqs"}, "max_logprobs": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Logprobs"}, "disable_log_stats": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Disable Log Stats"}, "quantization": {"anyOf": [{"enum": ["aqlm", "awq", "deepspeedfp", "tpu_int8", "fp8", "fbgemm_fp8", "modelopt", "marlin", "gguf", "gptq_marlin_24", "gptq_marlin", "awq_marlin", "gptq", "compressed-tensors", "bitsandbytes", "qqq", "experts_int8", "neuron_quant", "ipex", null]}, {"type": "null"}], "default": null, "title": "Quantization"}, "rope_scaling": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Rope Scaling"}, "rope_theta": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Rope Theta"}, "enforce_eager": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Enforce Eager"}, "max_context_len_to_capture": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Context Len To Capture"}, "max_seq_len_to_capture": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Seq Len To Capture"}, "disable_custom_all_reduce": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Disable Custom All Reduce"}, "tokenizer_pool_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Tokenizer Pool Size"}, "tokenizer_pool_type": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer Pool Type"}, "tokenizer_pool_extra_config": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Tokenizer Pool Extra Config"}, "limit_mm_per_prompt": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Limit Mm Per Prompt"}, "mm_processor_kwargs": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Mm Processor Kwargs"}, "enable_lora": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Enable Lora"}, "max_loras": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Loras"}, "max_lora_rank": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Lora Rank"}, "lora_extra_vocab_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Lora Extra Vocab Size"}, "lora_dtype": {"anyOf": [{"enum": ["auto", "float16", "bfloat16", "float32"], "type": "string"}, {"type": "null"}], "default": null, "title": "Lora Dtype"}, "long_lora_scaling_factors": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Long Lora Scaling Factors"}, "max_cpu_loras": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Cpu Loras"}, "fully_sharded_loras": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Fully Sharded Loras"}, "enable_prompt_adapter": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Enable Prompt Adapter"}, "max_prompt_adapters": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Prompt Adapters"}, "max_prompt_adapter_token": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Max Prompt Adapter Token"}, "device": {"anyOf": [{"enum": ["auto", "cuda", "neuron", "cpu", "openvino", "tpu", "xpu"], "type": "string"}, {"type": "null"}], "default": null, "title": "Device"}, "num_scheduler_steps": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Num Scheduler Steps"}, "multi_step_stream_outputs": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Multi Step Stream Outputs"}, "scheduler_delay_factor": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Scheduler Delay Factor"}, "enable_chunked_prefill": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Enable Chunked Prefill"}, "speculative_model": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Speculative Model"}, "speculative_model_quantization": {"anyOf": [{"enum": ["aqlm", "awq", "deepspeedfp", "tpu_int8", "fp8", "fbgemm_fp8", "modelopt", "marlin", "gguf", "gptq_marlin_24", "gptq_marlin", "awq_marlin", "gptq", "compressed-tensors", "bitsandbytes", "qqq", "experts_int8", "neuron_quant", "ipex", null]}, {"type": "null"}], "default": null, "title": "Speculative Model Quantization"}, "num_speculative_tokens": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Num Speculative Tokens"}, "speculative_disable_mqa_scorer": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Speculative Disable Mqa Scorer"}, "speculative_draft_tensor_parallel_size": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Speculative Draft Tensor Parallel Size"}, "speculative_max_model_len": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Speculative Max Model Len"}, "speculative_disable_by_batch_size": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Speculative Disable By Batch Size"}, "ngram_prompt_lookup_max": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Ngram Prompt Lookup Max"}, "ngram_prompt_lookup_min": {"anyOf": [{"type": "integer"}, {"type": "null"}], "default": null, "title": "Ngram Prompt Lookup Min"}, "spec_decoding_acceptance_method": {"anyOf": [{"enum": ["rejection_sampler", "typical_acceptance_sampler"], "type": "string"}, {"type": "null"}], "default": null, "title": "Spec Decoding Acceptance Method"}, "typical_acceptance_sampler_posterior_threshold": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Typical Acceptance Sampler Posterior Threshold"}, "typical_acceptance_sampler_posterior_alpha": {"anyOf": [{"type": "number"}, {"type": "null"}], "default": null, "title": "Typical Acceptance Sampler Posterior Alpha"}, "disable_logprobs_during_spec_decoding": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Disable Logprobs During Spec Decoding"}, "model_loader_extra_config": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Model Loader Extra Config"}, "ignore_patterns": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Ignore Patterns"}, "preemption_mode": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Preemption Mode"}, "served_model_name": {"anyOf": [{"items": {"type": "string"}, "type": "array"}, {"type": "null"}], "default": null, "title": "Served Model Name"}, "qlora_adapter_name_or_path": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Qlora Adapter Name Or Path"}, "otlp_traces_endpoint": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Otlp Traces Endpoint"}, "collect_detailed_traces": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Collect Detailed Traces"}, "disable_async_output_proc": {"anyOf": [{"type": "boolean"}, {"type": "null"}], "default": null, "title": "Disable Async Output Proc"}, "override_neuron_config": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": null, "title": "Override Neuron Config"}, "scheduling_policy": {"anyOf": [{"enum": ["fcfs", "priority"], "type": "string"}, {"type": "null"}], "default": null, "title": "Scheduling Policy"}, "name": {"description": "Name of the service", "title": "Name", "type": "string"}, "alias": {"description": "List of aliases for the service", "items": {"type": "string"}, "title": "Alias", "type": "array"}, "model": {"description": "Path to the model folder or HF repository", "title": "Model", "type": "string"}, "type": {"const": "vllm", "enum": ["vllm"], "title": "Type", "type": "string"}}, "required": ["name", "model", "type"], "title": "vLLMRunner", "type": "object"}}, "description": "The global config for the entire LMOS system.\n\nThis file is mapped provided to all containers on boot,\nand is used to configure all aspects of the system.\n\nThe Router config is automatically derived from the services config.", "properties": {"internal_configuration": {"$ref": "#/$defs/InternalConfiguration", "description": "Internal configuration for assets"}, "auth": {"$ref": "#/$defs/AuthConfig"}, "services": {"$ref": "#/$defs/Services", "description": "Service configurations"}}, "required": ["internal_configuration", "services"], "title": "Global LMOS Config", "type": "object"}