From 27b95e7c051dfc82d21d248b0266127a137762d1 Mon Sep 17 00:00:00 2001 From: Wai Hon Law Date: Tue, 10 Jun 2025 11:22:08 -0700 Subject: [PATCH] Add estimated peak memory usage for models --- model_allowlist.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/model_allowlist.json b/model_allowlist.json index ba347ac..4d205f1 100644 --- a/model_allowlist.json +++ b/model_allowlist.json @@ -6,6 +6,7 @@ "modelFile": "gemma-3n-E2B-it-int4.task", "description": "Preview version of [Gemma 3n E2B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.", "sizeInBytes": 3136226711, + "estimatedPeakMemoryInBytes": 5905580032, "version": "20250520", "llmSupportImage": true, "defaultConfig": { @@ -23,6 +24,7 @@ "modelFile": "gemma-3n-E4B-it-int4.task", "description": "Preview version of [Gemma 3n E4B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.", "sizeInBytes": 4405655031, + "estimatedPeakMemoryInBytes": 6979321856, "version": "20250520", "llmSupportImage": true, "defaultConfig": { @@ -40,6 +42,7 @@ "modelFile": "Gemma3-1B-IT_multi-prefill-seq_q4_ekv2048.task", "description": "A variant of [google/Gemma-3-1B-IT](https://huggingface.co/google/Gemma-3-1B-IT) with 4-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)", "sizeInBytes": 554661246, + "estimatedPeakMemoryInBytes": 2147483648, "version": "20250514", "defaultConfig": { "topK": 64, @@ -56,6 +59,7 @@ "modelFile": "Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task", "description": "A variant of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) with 8-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)", "sizeInBytes": 1625493432, + "estimatedPeakMemoryInBytes": 2684354560, "version": "20250514", "defaultConfig": { "topK": 40,