NVIDIA · kevalmorabia97 · Mar 4, 2026 · Mar 6, 2026 · Mar 24, 2026 · Mar 24, 2026
@@ -47,10 +47,6 @@ jobs:
           echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
       - name: Install dependencies
         run: |
-          # Install git-lfs for Daring-Anteater dataset
-          apt-get update && apt-get install -y git-lfs
-          git lfs install --system
-
           # use `python -m pip` instead of `pip` to avoid conflicts with system pip for nemo containers
           python -m pip install ".${{ inputs.pip_install_extras }}"
 

@@ -70,9 +70,9 @@ jobs:
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.01' }}-py3"
+      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.03' }}-py3"
       example: ${{ matrix.example }}
-      timeout_minutes: 30
+      timeout_minutes: 45
       pip_install_extras: "[hf,dev-test]"
       runner: linux-amd64-gpu-h100-latest-1
 
@@ -82,9 +82,9 @@ jobs:
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.01' }}-py3"
+      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.03' }}-py3"
       example: ${{ matrix.example }}
-      timeout_minutes: 30
+      timeout_minutes: 45
       pip_install_extras: "[hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-2
 
@@ -99,7 +99,7 @@ jobs:
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc5"
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc10"
       example: ${{ matrix.example }}
       pip_install_extras: "[hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-1
@@ -113,7 +113,7 @@ jobs:
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
-      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc5"
+      docker_image: "nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc10"
       example: ${{ matrix.example }}
       pip_install_extras: "[hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-2

@@ -65,18 +65,19 @@ jobs:
           - example: gpu
             timeout: 45
             container_image: pytorch:26.01-py3
+            # tests/gpu/_extensions/test_onnx_extensions.py fails for newer containers until https://github.com/tbenthompson/cppimport/pull/98
           - example: gpu-megatron
             timeout: 45
             container_image: pytorch:26.01-py3
           - example: gpu-trtllm
             timeout: 30
-            container_image: tensorrt-llm/release:1.3.0rc5
+            container_image: tensorrt-llm/release:1.3.0rc10
     runs-on: linux-amd64-gpu-rtxpro6000-latest-1
     timeout-minutes: ${{ matrix.timeout }}
     container: &gpu_container
       image: nvcr.io/nvidia/${{ matrix.container_image }}
       env:
-        GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
+        GIT_DEPTH: 1000 # For correct version
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
     steps: &gpu_steps

@@ -38,7 +38,7 @@ jobs:
       - uses: actions/checkout@v6
       - uses: ./.github/actions/ubuntu-setup
       - name: Run unit tests
-        run: pip install tox && COV_ARGS="--cov" tox -e py312-torch210-tf_latest-unit
+        run: pip install tox && COV_ARGS="--cov" tox -e py312-torch211-tf_latest-unit
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v5
         with:
@@ -64,6 +64,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:
+      fail-fast: false
       matrix:
         py: [10, 11, 13]
     steps:
@@ -72,15 +73,16 @@ jobs:
         with:
           python-version: "3.${{ matrix.py }}"
       - name: Run unit tests
-        run: pip install tox && tox -e py3${{ matrix.py }}-torch210-tf_latest-unit
+        run: pip install tox && tox -e py3${{ matrix.py }}-torch211-tf_latest-unit
   multi-torch:
     if: github.event_name == 'pull_request'
     needs: [linux]
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:
+      fail-fast: false
       matrix:
-        torch: [26, 27, 28, 29]
+        torch: [28, 29, 210]
     steps:
       - uses: actions/checkout@v6
       - uses: ./.github/actions/ubuntu-setup
@@ -92,13 +94,14 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:
+      fail-fast: false
       matrix:
         tf: [min]
     steps:
       - uses: actions/checkout@v6
       - uses: ./.github/actions/ubuntu-setup
       - name: Run unit tests
-        run: pip install tox && tox -e py312-torch210-tf_${{ matrix.tf }}-unit
+        run: pip install tox && tox -e py312-torch211-tf_${{ matrix.tf }}-unit
   launcher:
     if: github.event_name == 'pull_request'
     needs: [linux]
@@ -122,6 +125,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:
+      fail-fast: false
       matrix:
         test-env: [onnx, torch]
     steps:

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,5 +1,6 @@
-NVIDIA Model Optimizer Changelog
-================================
+Changelog
+=========
+
 0.44 (2026-05-xx)
 ^^^^^^^^^^^^^^^^^
 
@@ -15,6 +16,11 @@ NVIDIA Model Optimizer Changelog
 
 - Fix Minitron pruning (``mcore_minitron``) for MoE models. Importance estimation hooks were incorrectly registered for MoE modules and NAS step was hanging before this.
 
+**Misc**
+
+- Bump minimum required PyTorch version to 2.8.
+- Add experimental support for transformers>=5.0. Unified Hugging Face checkpoint export for quantized checkpoints may not work for some models with transformers>=5.0 yet.
+
 0.43 (2026-04-09)
 ^^^^^^^^^^^^^^^^^
 

diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst
@@ -16,7 +16,7 @@ Latest Model Optimizer (``nvidia-modelopt``) currently has the following system
 +-------------------------+-----------------------------+
 | CUDA                    |  12.x, 13.x                 |
 +-------------------------+-----------------------------+
-| PyTorch                 |  >=2.6                      |
+| PyTorch                 |  >=2.8                      |
 +-------------------------+-----------------------------+
 | TensorRT-LLM (Optional) |  >=1.0                      |
 +-------------------------+-----------------------------+

@@ -16,7 +16,7 @@ per_device_train_batch_size: 2
 per_device_eval_batch_size: 2
 gradient_accumulation_steps: 2
 max_length: 4096
-warmup_ratio: 0.03
+warmup_steps: 0.03 # use warmup_ratio if using transformers<5.0
 lr_scheduler_type: cosine_with_min_lr
 lr_scheduler_kwargs:
   min_lr_rate: 0.1
@@ -30,6 +30,6 @@ eval_steps: 8
 dataset_test_split: test
 
 # ModelOpt Quantization Parameters
-quant_cfg:  # Examples: MXFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_ONLY_CFG
-            # For the full list of supported configs, do: mtq.config.choices
+quant_cfg: # Examples: MXFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_ONLY_CFG
+  # For the full list of supported configs, do: mtq.config.choices
 calib_size: 128
@@ -21,7 +21,7 @@ lora_alpha: 16
 lora_dropout: 0.0
 lora_target_modules: all-linear
 max_length: 4096
-warmup_ratio: 0.03
+warmup_steps: 0.03 # use warmup_ratio if using transformers<5.0
 lr_scheduler_type: cosine_with_min_lr
 lr_scheduler_kwargs:
   min_lr_rate: 0.1
@@ -35,6 +35,6 @@ eval_steps: 8
 dataset_test_split: test
 
 # ModelOpt Quantization Parameters
-quant_cfg:  # Examples: MXFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_ONLY_CFG
-            # For the full list of supported configs, do: mtq.config.choices
+quant_cfg: # Examples: MXFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_WEIGHT_ONLY_CFG, NVFP4_MLP_ONLY_CFG
+  # For the full list of supported configs, do: mtq.config.choices
 calib_size: 128
@@ -95,21 +95,23 @@ def convert_and_save(model, tokenizer, output_path: str):
 
 def create_parser():
     parser = argparse.ArgumentParser(description=__doc__)
-
     parser.add_argument("--model_path", type=str, help="path to the fake-quantized model from QAT.")
-
+    parser.add_argument(
+        "--trust_remote_code",
+        help="Set trust_remote_code for Huggingface models and tokenizers",
+        default=False,
+        action="store_true",
+    )
     parser.add_argument(
         "--lora_path",
         type=str,
         help="path to the LoRA-QAT adapter weights. You can only specify lora_path or model_path, not both.",
     )
-
     parser.add_argument(
         "--base_path",
         type=str,
         help="path to the base model used for LoRA-QAT. Only used if lora_path is specified.",
     )
-
     parser.add_argument(
         "--output_path", type=str, required=True, help="location to save converted model."
     )
@@ -121,7 +123,7 @@ def create_parser():
     parser = create_parser()
     args = parser.parse_args()
 
-    kwargs = {"device_map": "auto", "torch_dtype": "auto", "trust_remote_code": True}
+    kwargs = {"device_map": "auto", "dtype": "auto", "trust_remote_code": args.trust_remote_code}
     if args.lora_path:
         assert args.model_path is None, "You can only specify lora_path or model_path, not both."
         model_path = args.base_path
@@ -140,7 +142,7 @@ def create_parser():
         gc.collect()
 
     # Load tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=args.trust_remote_code)
 
     # Quantize and save model
     convert_and_save(model, tokenizer, args.output_path)
@@ -207,7 +207,7 @@
     "    per_device_eval_batch_size=1,\n",
     "    gradient_accumulation_steps=2,\n",
     "    max_length=4096,\n",
-    "    warmup_ratio=0.03,\n",
+    "    warmup_steps=0.03,  # use warmup_ratio if using transformers<5.0\n",
     "    eval_strategy=\"steps\",\n",
     "    eval_on_start=True,\n",
     "    logging_steps=10,\n",

@@ -1,5 +1,3 @@
 kernels>=0.9.0
-torch>2.7.1
 trackio
-transformers>=4.55.0
 trl>=0.21.0
@@ -72,7 +72,7 @@ def main(script_args, training_args, model_args, quant_args):
         "revision": model_args.model_revision,
         "trust_remote_code": model_args.trust_remote_code,
         "attn_implementation": model_args.attn_implementation,
-        "torch_dtype": getattr(model_args, "dtype", "bfloat16"),
+        "dtype": getattr(model_args, "dtype", "bfloat16"),
         "use_cache": not training_args.gradient_checkpointing,
     }
 

@@ -118,18 +118,19 @@ def modelopt_ptq(
     auto_quantize_bits: float | None = None,
     calib_dataset: str = "cnn_dailymail",
     calib_batch_size: int = 8,
+    trust_remote_code: bool = False,
 ) -> torch.nn.Module:
     """Quantize the model with modelopt."""
     model = AutoModelForCausalLM.from_pretrained(
-        model_path, trust_remote_code=True, torch_dtype="auto", device_map="auto"
+        model_path, trust_remote_code=trust_remote_code, dtype="auto", device_map="auto"
     )
     model.eval()
 
     tokenizer = AutoTokenizer.from_pretrained(
         model_path,
         model_max_length=2048,
         padding_side="left",
-        trust_remote_code=True,
+        trust_remote_code=trust_remote_code,
     )
     # sanitize tokenizer
     if tokenizer.pad_token != "<unk>":
@@ -203,6 +204,12 @@ def modelopt_ptq(
             "regular quantization without auto_quantize search will be applied."
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        help="Set trust_remote_code for Huggingface models and tokenizers",
+        default=False,
+        action="store_true",
+    )
 
     args = parser.parse_args()
 
@@ -213,4 +220,5 @@ def modelopt_ptq(
         args.num_samples,
         auto_quantize_bits=args.effective_bits,
         calib_batch_size=args.calib_batch_size,
+        trust_remote_code=args.trust_remote_code,
     )
@@ -1,4 +1,3 @@
 pyarrow
 torchao>=0.14.1
-transformers<5.0
 trl>=0.23.0
@@ -38,6 +38,7 @@
 # limitations under the License.
 import warnings
 
+import datasets
 from lm_eval import utils
 from lm_eval.__main__ import cli_evaluate, parse_eval_args, setup_parser
 from lm_eval.api.model import T
@@ -180,8 +181,6 @@ def setup_parser_with_modelopt_args():
     model_args = utils.simple_parse_args_string(args.model_args)
 
     if args.trust_remote_code:
-        import datasets
-
         datasets.config.HF_DATASETS_TRUST_REMOTE_CODE = True
         model_args["trust_remote_code"] = True
         args.trust_remote_code = None