Use the right layer name and disable auto saving in the XLA notebook

- Fix FSDP Config to use the right layer name (Gemma2DecoderLayer) and improve memory efficiency - Disable auto saving checkpoints in SFTConfig - Unused comment removed
google-gemini · Dec 17, 2024 · 3d3b2c7 · 3d3b2c7
1 parent ed01f67
commit 3d3b2c7
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/Gemma/Finetune_with_Torch_XLA.ipynb b/Gemma/Finetune_with_Torch_XLA.ipynb
@@ -903,7 +903,7 @@
         "# Maximum sequence length to use\n",
         "max_seq_length = 512 # @param {\"type\":\"slider\",\"min\":32,\"max\":1024,\"step\":2}\n",
         "# Pack multiple short examples in the same input sequence to increase efficiency\n",
-        "packing = True # False"
+        "packing = True"
       ]
     },
     {
@@ -1205,7 +1205,7 @@
         "# Set up the FSDP config. To enable FSDP via SPMD, set xla_fsdp_v2 to True.\n",
         "fsdp_config = {\n",
         "    \"fsdp_transformer_layer_cls_to_wrap\": [\n",
-        "        \"GemmaDecoderLayer\"\n",
+        "        \"Gemma2DecoderLayer\"\n",
         "    ],\n",
         "    \"xla\": True,\n",
         "    \"xla_fsdp_v2\": True,\n",
@@ -1245,6 +1245,7 @@
         "training_arguments = SFTConfig(\n",
         "    output_dir=output_dir,\n",
         "    overwrite_output_dir=True,\n",
+        "    save_strategy=\"no\",\n",
         "    # Training\n",
         "    num_train_epochs=num_train_epochs,\n",
         "    # This is the global train batch size for SPMD\n",