openai
/

circuit-sparsity

Text Generation

Model card Files Files and versions

achyutarajaram commited on 3 days ago

Commit

5cd6989

·

verified ·

1 Parent(s): 7eae0f9

Update gpt.py

Files changed (1) hide show

gpt.py +16 -0

gpt.py CHANGED Viewed

@@ -657,6 +657,22 @@ class GPT(nn.Module):
         # report number of parameters
         print("number of parameters: %.2fM" % (self.get_num_params() / 1e6,))
     def get_num_params(self, non_embedding=True):
         """
         Return the number of parameters in the model.

         # report number of parameters
         print("number of parameters: %.2fM" % (self.get_num_params() / 1e6,))
+    @torch.no_grad()
+    def _initialize_weights(self, module: nn.Module) -> None:
+        """
+        Compatibility shim for newer `transformers` versions.
+        `transformers.PreTrainedModel.initialize_weights()` will treat any submodule that
+        defines `_init_weights` as a nested "sub-model" and will recursively call that
+        submodule's `_initialize_weights`. Our core `GPT` module historically only
+        implemented `_init_weights`, so we provide this wrapper to match HF's contract.
+        """
+        if getattr(module, "_is_hf_initialized", False):
+            return
+        self._init_weights(module)
+        module._is_hf_initialized = True
     def get_num_params(self, non_embedding=True):
         """
         Return the number of parameters in the model.