A fully quantized version of the latest Gemma 4 model from Google's TensorFlow library.
Here is the JSON configuration for running this notebook on Google Colab:
{
"project": "google-colab",
"job_name": "gemma-4-31b-fq",
"runtime_version": "2.12.0",
"region": "us-central1",
"machine_type": "N1_HIGH_PERFORMANCE",
"accelerator_type": "GPU",
"image_project_id": "us-core-1",
"image_version": "latest",
"environment_version": "2.12.0",
"extra_resources": {
"gpu_memory_training_mode": true,
"num_cpu_cores": 16,
"cpu_threads_per_worker": 1
},
"network_config": {
"network": "my-project"
},
"service_account_email": "your-account@my-project.iamcredentials.com",
"additional_flags": [
"--use_torch_xla",
"--disable_client_library"
],
"dockerfile": "FROM nvidia/cuda:11.2-base\nRUN apt-get update && apt-get install -y --no-install-recommends python3-pip\nWORKDIR /opt/python\nENV http_proxy=:8080\nENV https_proxy=:8080\nCOPY requirements.txt requirements.txt\nRUN pip3 install --no-cache-dir -r requirements.txt\nCMD [\"python3\", \"run.py\"]"
},
"requirements": [
"torch==1.12.1+cu112",
"transformers==4.25.0",
"einops==0.3.0",
"bitsandbytes==0.4.0",
"unzip",
"git",
"pyyaml"
]
}