time=2026-05-19T09:14:06.929-04:00 level=INFO source=routes.go:1802 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_DEBUG_LOG_REQUESTS:false OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MAX_TRANSFER_STREAMS:4 OLLAMA_MODELS:X:\\AI Models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES:]" time=2026-05-19T09:14:06.941-04:00 level=INFO source=routes.go:1804 msg="Ollama cloud disabled: false" time=2026-05-19T09:14:07.071-04:00 level=INFO source=images.go:517 msg="total blobs: 29" time=2026-05-19T09:14:07.072-04:00 level=INFO source=images.go:524 msg="total unused blobs removed: 0" time=2026-05-19T09:14:07.075-04:00 level=INFO source=routes.go:1864 msg="Listening on 127.0.0.1:11434 (version 0.24.0)" time=2026-05-19T09:14:07.080-04:00 level=INFO source=runner.go:67 msg="discovering available GPUs..." time=2026-05-19T09:14:07.096-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 10759" time=2026-05-19T09:14:07.245-04:00 level=INFO source=model_recommendations.go:177 msg="model recommendations cache sleep scheduled" wait=4h26m12.987266245s consecutive_failures=0 time=2026-05-19T09:14:09.613-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 10791" time=2026-05-19T09:14:10.439-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 10812" time=2026-05-19T09:14:10.980-04:00 level=INFO source=runner.go:106 msg="experimental Vulkan support disabled. To enable, set OLLAMA_VULKAN=1" time=2026-05-19T09:14:10.982-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 10842" time=2026-05-19T09:14:10.982-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 10843" time=2026-05-19T09:14:11.460-04:00 level=INFO source=types.go:42 msg="inference compute" id=GPU-11fc7c41-6e2e-c103-928c-30d7e1af7171 filter_id="" library=CUDA compute=8.6 name=CUDA0 description="NVIDIA GeForce RTX 3070" libdirs=ollama,cuda_v13 driver=13.2 pci_id=0000:01:00.0 type=discrete total="8.0 GiB" available="5.0 GiB" time=2026-05-19T09:14:11.460-04:00 level=INFO source=routes.go:1914 msg="vram-based default context" total_vram="8.0 GiB" default_num_ctx=4096 [GIN] 2026/05/19 - 09:14:11 | 200 | 523.2µs | 127.0.0.1 | GET "/api/version" time=2026-05-19T13:40:20.371-04:00 level=INFO source=model_recommendations.go:177 msg="model recommendations cache sleep scheduled" wait=3h27m25.298593085s consecutive_failures=0 time=2026-05-19T17:07:45.822-04:00 level=INFO source=model_recommendations.go:177 msg="model recommendations cache sleep scheduled" wait=3h36m56.960735566s consecutive_failures=0 [GIN] 2026/05/19 - 19:41:16 | 200 | 0s | 127.0.0.1 | GET "/api/version" [GIN] 2026/05/19 - 19:42:09 | 200 | 0s | 127.0.0.1 | HEAD "/" time=2026-05-19T19:42:09.924-04:00 level=INFO source=download.go:179 msg="downloading c5ad996bda6e in 1 556 B part(s)" [GIN] 2026/05/19 - 19:42:11 | 200 | 1.802237s | 127.0.0.1 | POST "/api/pull" [GIN] 2026/05/19 - 19:42:38 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2026/05/19 - 19:42:39 | 200 | 142.3099ms | 127.0.0.1 | POST "/api/show" [GIN] 2026/05/19 - 19:42:39 | 200 | 1.5989ms | 127.0.0.1 | POST "/api/show" time=2026-05-19T19:42:39.258-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 20718" time=2026-05-19T19:42:39.970-04:00 level=INFO source=cpu_windows.go:148 msg=packages count=1 time=2026-05-19T19:42:39.970-04:00 level=INFO source=cpu_windows.go:195 msg="" package=0 cores=8 efficiency=0 threads=16 llama_model_loader: loaded meta data with 26 key-value pairs and 339 tensors from X:\AI Models\blobs\sha256-96c415656d377afbff962f6cdb2394ab092ccbcbaab4b82525bc4ca800fe8a49 (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = qwen2 llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = DeepSeek R1 Distill Qwen 7B llama_model_loader: - kv 3: general.basename str = DeepSeek-R1-Distill-Qwen llama_model_loader: - kv 4: general.size_label str = 7B llama_model_loader: - kv 5: qwen2.block_count u32 = 28 llama_model_loader: - kv 6: qwen2.context_length u32 = 131072 llama_model_loader: - kv 7: qwen2.embedding_length u32 = 3584 llama_model_loader: - kv 8: qwen2.feed_forward_length u32 = 18944 llama_model_loader: - kv 9: qwen2.attention.head_count u32 = 28 llama_model_loader: - kv 10: qwen2.attention.head_count_kv u32 = 4 llama_model_loader: - kv 11: qwen2.rope.freq_base f32 = 10000.000000 llama_model_loader: - kv 12: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001 llama_model_loader: - kv 13: general.file_type u32 = 15 llama_model_loader: - kv 14: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 15: tokenizer.ggml.pre str = qwen2 llama_model_loader: - kv 16: tokenizer.ggml.tokens arr[str,152064] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 17: tokenizer.ggml.token_type arr[i32,152064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 18: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... llama_model_loader: - kv 19: tokenizer.ggml.bos_token_id u32 = 151646 llama_model_loader: - kv 20: tokenizer.ggml.eos_token_id u32 = 151643 llama_model_loader: - kv 21: tokenizer.ggml.padding_token_id u32 = 151643 llama_model_loader: - kv 22: tokenizer.ggml.add_bos_token bool = true llama_model_loader: - kv 23: tokenizer.ggml.add_eos_token bool = false llama_model_loader: - kv 24: tokenizer.chat_template str = {% if not add_generation_prompt is de... llama_model_loader: - kv 25: general.quantization_version u32 = 2 llama_model_loader: - type f32: 141 tensors llama_model_loader: - type q4_K: 169 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 4.36 GiB (4.91 BPW) load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect load: printing all EOG tokens: load: - 151643 ('<|end▁of▁sentence|>') load: - 151662 ('<|fim_pad|>') load: - 151663 ('<|repo_name|>') load: - 151664 ('<|file_sep|>') load: special tokens cache size = 22 load: token to piece cache size = 0.9310 MB print_info: arch = qwen2 print_info: vocab_only = 1 print_info: no_alloc = 0 print_info: model type = ?B print_info: model params = 7.62 B print_info: general.name = DeepSeek R1 Distill Qwen 7B print_info: vocab type = BPE print_info: n_vocab = 152064 print_info: n_merges = 151387 print_info: BOS token = 151646 '<|begin▁of▁sentence|>' print_info: EOS token = 151643 '<|end▁of▁sentence|>' print_info: EOT token = 151643 '<|end▁of▁sentence|>' print_info: PAD token = 151643 '<|end▁of▁sentence|>' print_info: LF token = 198 'Ċ' print_info: FIM PRE token = 151659 '<|fim_prefix|>' print_info: FIM SUF token = 151661 '<|fim_suffix|>' print_info: FIM MID token = 151660 '<|fim_middle|>' print_info: FIM PAD token = 151662 '<|fim_pad|>' print_info: FIM REP token = 151663 '<|repo_name|>' print_info: FIM SEP token = 151664 '<|file_sep|>' print_info: EOG token = 151643 '<|end▁of▁sentence|>' print_info: EOG token = 151662 '<|fim_pad|>' print_info: EOG token = 151663 '<|repo_name|>' print_info: EOG token = 151664 '<|file_sep|>' print_info: max token length = 256 llama_model_load: vocab only - skipping tensors time=2026-05-19T19:42:40.297-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --model X:\\AI Models\\blobs\\sha256-96c415656d377afbff962f6cdb2394ab092ccbcbaab4b82525bc4ca800fe8a49 --port 22121" time=2026-05-19T19:42:40.301-04:00 level=INFO source=sched.go:484 msg="system memory" total="63.9 GiB" free="40.0 GiB" free_swap="30.1 GiB" time=2026-05-19T19:42:40.301-04:00 level=INFO source=sched.go:491 msg="gpu memory" id=GPU-11fc7c41-6e2e-c103-928c-30d7e1af7171 library=CUDA available="3.2 GiB" free="3.6 GiB" minimum="457.0 MiB" overhead="0 B" time=2026-05-19T19:42:40.301-04:00 level=INFO source=server.go:532 msg="loading model" "model layers"=29 requested=-1 time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:240 msg="model weights" device=CUDA0 size="2.1 GiB" time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:245 msg="model weights" device=CPU size="2.0 GiB" time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:251 msg="kv cache" device=CUDA0 size="128.0 MiB" time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:256 msg="kv cache" device=CPU size="96.0 MiB" time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:262 msg="compute graph" device=CUDA0 size="730.4 MiB" time=2026-05-19T19:42:40.302-04:00 level=INFO source=device.go:272 msg="total memory" size="5.0 GiB" time=2026-05-19T19:42:40.483-04:00 level=INFO source=runner.go:965 msg="starting go runner" load_backend: loaded CPU backend from C:\Users\luctr\AppData\Local\Programs\Ollama\lib\ollama\ggml-cpu-haswell.dll ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 3070, compute capability 8.6, VMM: yes, ID: GPU-11fc7c41-6e2e-c103-928c-30d7e1af7171 load_backend: loaded CUDA backend from C:\Users\luctr\AppData\Local\Programs\Ollama\lib\ollama\cuda_v13\ggml-cuda.dll time=2026-05-19T19:42:40.604-04:00 level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 CUDA.0.ARCHS=750,800,860,870,890,900,1000,1030,1100,1200,1210 CUDA.0.USE_GRAPHS=1 CUDA.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(clang) time=2026-05-19T19:42:40.605-04:00 level=INFO source=runner.go:1001 msg="Server listening on 127.0.0.1:22121" time=2026-05-19T19:42:40.609-04:00 level=INFO source=runner.go:895 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:Auto KvSize:4096 KvCacheType: NumThreads:8 GPULayers:16[ID:GPU-11fc7c41-6e2e-c103-928c-30d7e1af7171 Layers:16(12..27)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2026-05-19T19:42:40.610-04:00 level=INFO source=server.go:1385 msg="waiting for llama runner to start responding" time=2026-05-19T19:42:40.610-04:00 level=INFO source=server.go:1428 msg="waiting for server to become available" status="llm server loading model" ggml_backend_cuda_device_get_memory device GPU-11fc7c41-6e2e-c103-928c-30d7e1af7171 utilizing NVML memory reporting free: 3901542400 total: 8589934592 llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3070) (0000:01:00.0) - 3720 MiB free llama_model_loader: loaded meta data with 26 key-value pairs and 339 tensors from X:\AI Models\blobs\sha256-96c415656d377afbff962f6cdb2394ab092ccbcbaab4b82525bc4ca800fe8a49 (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = qwen2 llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.name str = DeepSeek R1 Distill Qwen 7B llama_model_loader: - kv 3: general.basename str = DeepSeek-R1-Distill-Qwen llama_model_loader: - kv 4: general.size_label str = 7B llama_model_loader: - kv 5: qwen2.block_count u32 = 28 llama_model_loader: - kv 6: qwen2.context_length u32 = 131072 llama_model_loader: - kv 7: qwen2.embedding_length u32 = 3584 llama_model_loader: - kv 8: qwen2.feed_forward_length u32 = 18944 llama_model_loader: - kv 9: qwen2.attention.head_count u32 = 28 llama_model_loader: - kv 10: qwen2.attention.head_count_kv u32 = 4 llama_model_loader: - kv 11: qwen2.rope.freq_base f32 = 10000.000000 llama_model_loader: - kv 12: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001 llama_model_loader: - kv 13: general.file_type u32 = 15 llama_model_loader: - kv 14: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 15: tokenizer.ggml.pre str = qwen2 llama_model_loader: - kv 16: tokenizer.ggml.tokens arr[str,152064] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 17: tokenizer.ggml.token_type arr[i32,152064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 18: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... llama_model_loader: - kv 19: tokenizer.ggml.bos_token_id u32 = 151646 llama_model_loader: - kv 20: tokenizer.ggml.eos_token_id u32 = 151643 llama_model_loader: - kv 21: tokenizer.ggml.padding_token_id u32 = 151643 llama_model_loader: - kv 22: tokenizer.ggml.add_bos_token bool = true llama_model_loader: - kv 23: tokenizer.ggml.add_eos_token bool = false llama_model_loader: - kv 24: tokenizer.chat_template str = {% if not add_generation_prompt is de... llama_model_loader: - kv 25: general.quantization_version u32 = 2 llama_model_loader: - type f32: 141 tensors llama_model_loader: - type q4_K: 169 tensors llama_model_loader: - type q6_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 4.36 GiB (4.91 BPW) load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect load: printing all EOG tokens: load: - 151643 ('<|end▁of▁sentence|>') load: - 151662 ('<|fim_pad|>') load: - 151663 ('<|repo_name|>') load: - 151664 ('<|file_sep|>') load: special tokens cache size = 22 load: token to piece cache size = 0.9310 MB print_info: arch = qwen2 print_info: vocab_only = 0 print_info: no_alloc = 0 print_info: n_ctx_train = 131072 print_info: n_embd = 3584 print_info: n_embd_inp = 3584 print_info: n_layer = 28 print_info: n_head = 28 print_info: n_head_kv = 4 print_info: n_rot = 128 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = 7 print_info: n_embd_k_gqa = 512 print_info: n_embd_v_gqa = 512 print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-06 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = 18944 print_info: n_expert = 0 print_info: n_expert_used = 0 print_info: n_expert_groups = 0 print_info: n_group_used = 0 print_info: causal attn = 1 print_info: pooling type = -1 print_info: rope type = 2 print_info: rope scaling = linear print_info: freq_base_train = 10000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 131072 print_info: rope_yarn_log_mul= 0.0000 print_info: rope_finetuned = unknown print_info: model type = 7B print_info: model params = 7.62 B print_info: general.name = DeepSeek R1 Distill Qwen 7B print_info: vocab type = BPE print_info: n_vocab = 152064 print_info: n_merges = 151387 print_info: BOS token = 151646 '<|begin▁of▁sentence|>' print_info: EOS token = 151643 '<|end▁of▁sentence|>' print_info: EOT token = 151643 '<|end▁of▁sentence|>' print_info: PAD token = 151643 '<|end▁of▁sentence|>' print_info: LF token = 198 'Ċ' print_info: FIM PRE token = 151659 '<|fim_prefix|>' print_info: FIM SUF token = 151661 '<|fim_suffix|>' print_info: FIM MID token = 151660 '<|fim_middle|>' print_info: FIM PAD token = 151662 '<|fim_pad|>' print_info: FIM REP token = 151663 '<|repo_name|>' print_info: FIM SEP token = 151664 '<|file_sep|>' print_info: EOG token = 151643 '<|end▁of▁sentence|>' print_info: EOG token = 151662 '<|fim_pad|>' print_info: EOG token = 151663 '<|repo_name|>' print_info: EOG token = 151664 '<|file_sep|>' print_info: max token length = 256 load_tensors: loading model tensors, this can take a while... (mmap = false) load_tensors: offloading 16 repeating layers to GPU load_tensors: offloaded 16/29 layers to GPU load_tensors: CPU model buffer size = 292.36 MiB load_tensors: CUDA0 model buffer size = 2138.13 MiB load_tensors: CUDA_Host model buffer size = 2029.96 MiB llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 4096 llama_context: n_ctx_seq = 4096 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = auto llama_context: kv_unified = false llama_context: freq_base = 10000.0 llama_context: freq_scale = 1 llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized llama_context: CPU output buffer size = 0.59 MiB llama_kv_cache: CPU KV buffer size = 96.00 MiB llama_kv_cache: CUDA0 KV buffer size = 128.00 MiB llama_kv_cache: size = 224.00 MiB ( 4096 cells, 28 layers, 1/1 seqs), K (f16): 112.00 MiB, V (f16): 112.00 MiB llama_context: Flash Attention was auto, set to enabled llama_context: CUDA0 compute buffer size = 737.36 MiB llama_context: CUDA_Host compute buffer size = 15.01 MiB llama_context: graph nodes = 959 llama_context: graph splits = 172 (with bs=512), 3 (with bs=1) time=2026-05-19T19:43:26.445-04:00 level=INFO source=server.go:1432 msg="llama runner started in 46.14 seconds" time=2026-05-19T19:43:26.445-04:00 level=INFO source=sched.go:561 msg="loaded runners" count=1 time=2026-05-19T19:43:26.445-04:00 level=INFO source=server.go:1385 msg="waiting for llama runner to start responding" time=2026-05-19T19:43:26.446-04:00 level=INFO source=server.go:1432 msg="llama runner started in 46.14 seconds" [GIN] 2026/05/19 - 19:43:26 | 200 | 47.3011417s | 127.0.0.1 | POST "/api/generate" [GIN] 2026/05/19 - 19:45:01 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2026/05/19 - 19:45:01 | 200 | 2.1243ms | 127.0.0.1 | POST "/api/show" [GIN] 2026/05/19 - 19:45:39 | 200 | 38.3762063s | 127.0.0.1 | POST "/api/generate" [GIN] 2026/05/19 - 19:47:17 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2026/05/19 - 19:47:17 | 200 | 1.6007ms | 127.0.0.1 | POST "/api/show" [GIN] 2026/05/19 - 19:49:37 | 200 | 2m19s | 127.0.0.1 | POST "/api/generate" [GIN] 2026/05/19 - 19:52:41 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2026/05/19 - 19:52:41 | 200 | 2.3108ms | 127.0.0.1 | POST "/api/show" [GIN] 2026/05/19 - 19:52:52 | 200 | 11.5537759s | 127.0.0.1 | POST "/api/generate" [GIN] 2026/05/19 - 19:53:07 | 200 | 0s | 127.0.0.1 | HEAD "/" [GIN] 2026/05/19 - 19:53:07 | 200 | 1.6165ms | 127.0.0.1 | POST "/api/show" [GIN] 2026/05/19 - 19:54:28 | 200 | 1m20s | 127.0.0.1 | POST "/api/generate" time=2026-05-19T19:59:28.018-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 62416" time=2026-05-19T19:59:28.579-04:00 level=INFO source=server.go:433 msg="starting runner" cmd="C:\\Users\\luctr\\AppData\\Local\\Programs\\Ollama\\ollama.exe runner --ollama-engine --port 62443"