mirror of
https://github.com/google-ai-edge/gallery.git
synced 2025-07-04 13:47:04 -04:00
Add estimated peak memory usage for models
This commit is contained in:
parent
ebb605131d
commit
27b95e7c05
1 changed files with 4 additions and 0 deletions
|
@ -6,6 +6,7 @@
|
|||
"modelFile": "gemma-3n-E2B-it-int4.task",
|
||||
"description": "Preview version of [Gemma 3n E2B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.",
|
||||
"sizeInBytes": 3136226711,
|
||||
"estimatedPeakMemoryInBytes": 5905580032,
|
||||
"version": "20250520",
|
||||
"llmSupportImage": true,
|
||||
"defaultConfig": {
|
||||
|
@ -23,6 +24,7 @@
|
|||
"modelFile": "gemma-3n-E4B-it-int4.task",
|
||||
"description": "Preview version of [Gemma 3n E4B](https://ai.google.dev/gemma/docs/gemma-3n) ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference). The current checkpoint only supports text and vision input, with 4096 context length.",
|
||||
"sizeInBytes": 4405655031,
|
||||
"estimatedPeakMemoryInBytes": 6979321856,
|
||||
"version": "20250520",
|
||||
"llmSupportImage": true,
|
||||
"defaultConfig": {
|
||||
|
@ -40,6 +42,7 @@
|
|||
"modelFile": "Gemma3-1B-IT_multi-prefill-seq_q4_ekv2048.task",
|
||||
"description": "A variant of [google/Gemma-3-1B-IT](https://huggingface.co/google/Gemma-3-1B-IT) with 4-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)",
|
||||
"sizeInBytes": 554661246,
|
||||
"estimatedPeakMemoryInBytes": 2147483648,
|
||||
"version": "20250514",
|
||||
"defaultConfig": {
|
||||
"topK": 64,
|
||||
|
@ -56,6 +59,7 @@
|
|||
"modelFile": "Qwen2.5-1.5B-Instruct_multi-prefill-seq_q8_ekv1280.task",
|
||||
"description": "A variant of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) with 8-bit quantization ready for deployment on Android using the [MediaPipe LLM Inference API](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference)",
|
||||
"sizeInBytes": 1625493432,
|
||||
"estimatedPeakMemoryInBytes": 2684354560,
|
||||
"version": "20250514",
|
||||
"defaultConfig": {
|
||||
"topK": 40,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue