Spaces:
Runtime error
Runtime error
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| from unittest.mock import MagicMock, patch | |
| import pytest | |
| import torch | |
| from nemo.lightning.pytorch.callbacks.nsys import NsysCallback | |
| class TestNsysCallback: | |
| def setup_mocks(self): | |
| self.cuda_mock = patch('torch.cuda') | |
| self.cudart_mock = patch('torch.cuda.cudart') | |
| self.emit_nvtx_mock = patch('torch.autograd.profiler.emit_nvtx') | |
| self.get_rank_mock = patch('nemo.lightning.pytorch.callbacks.nsys.get_rank') | |
| self.cuda_mock.start() | |
| self.cudart_mock.start() | |
| self.emit_nvtx_mock.start() | |
| self.get_rank_mock.start() | |
| # Mock CUDA availability | |
| torch.cuda.is_available = MagicMock(return_value=True) | |
| torch.cuda.current_device = MagicMock(return_value=0) | |
| # Set up fixed cudart mock for all tests | |
| self.fixed_cudart = MagicMock() | |
| torch.cuda.cudart = MagicMock(return_value=self.fixed_cudart) | |
| yield | |
| self.cuda_mock.stop() | |
| self.cudart_mock.stop() | |
| self.emit_nvtx_mock.stop() | |
| self.get_rank_mock.stop() | |
| def mock_trainer(self): | |
| trainer = MagicMock() | |
| trainer.strategy.root_device.type = 'cuda' | |
| return trainer | |
| def mock_pl_module(self): | |
| return MagicMock() | |
| def test_init_valid_params(self): | |
| """Test initialization with valid parameters.""" | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0, 1], gen_shape=True) | |
| assert callback._nsys_profile_start_step == 10 | |
| assert callback._nsys_profile_end_step == 20 | |
| assert callback._nsys_profile_ranks == [0, 1] | |
| assert callback._nsys_profile_gen_shape == True | |
| def test_init_invalid_params(self): | |
| """Test initialization with invalid parameters.""" | |
| with pytest.raises(AssertionError): | |
| NsysCallback(start_step='10', end_step=20) | |
| with pytest.raises(AssertionError): | |
| NsysCallback(start_step=10, end_step='20') | |
| with pytest.raises(AssertionError): | |
| NsysCallback(start_step=20, end_step=10) | |
| def test_on_train_batch_start_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module): | |
| # Set mocked cudart via the fixture patch | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0], gen_shape=True) | |
| mock_trainer.strategy.current_epoch_step = 10 | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10) | |
| self.fixed_cudart.cudaProfilerStart.assert_called_once() | |
| mock_emit_nvtx.assert_called_once_with(record_shapes=True) | |
| def test_on_train_batch_start_no_profiling(self, mock_get_rank, mock_trainer, mock_pl_module): | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0]) | |
| mock_trainer.strategy.current_epoch_step = 9 | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 9) | |
| self.fixed_cudart.cudaProfilerStart.assert_not_called() | |
| def test_on_train_batch_end_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module): | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0]) | |
| mock_trainer.strategy.current_epoch_step = 20 | |
| assert callback._has_nsys_enabled == False | |
| callback._has_nsys_enabled = True | |
| callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20) | |
| self.fixed_cudart.cudaProfilerStop.assert_called_once() | |
| def test_on_train_batch_end_no_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module): | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0]) | |
| callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 19) | |
| self.fixed_cudart.cudaProfilerStop.assert_not_called() | |
| def test_non_cuda_device(self, mock_trainer, mock_pl_module): | |
| """Test behavior when the device is not CUDA.""" | |
| mock_trainer.strategy.root_device.type = 'cpu' | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0]) | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10) | |
| callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20) | |
| # No exceptions should be raised, and no profiling calls should be made | |
| def test_rank_not_in_profile_ranks(self, mock_get_rank, mock_trainer, mock_pl_module): | |
| """Test behavior when the current rank is not in the profile ranks.""" | |
| mock_get_rank.return_value = 1 | |
| callback = NsysCallback(start_step=10, end_step=20, ranks=[0]) | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10) | |
| callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20) | |
| # No profiling calls should be made | |
| def test_profiling_range( | |
| self, | |
| mock_emit_nvtx, | |
| mock_get_rank, | |
| start_step, | |
| end_step, | |
| batch_idx, | |
| expected_call, | |
| mock_trainer, | |
| mock_pl_module, | |
| ): | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=start_step, end_step=end_step, ranks=[0]) | |
| mock_trainer.strategy.current_epoch_step = batch_idx | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, batch_idx) | |
| if expected_call: | |
| self.fixed_cudart.cudaProfilerStart.assert_called_once() | |
| mock_emit_nvtx.assert_called_once() | |
| else: | |
| self.fixed_cudart.cudaProfilerStart.assert_not_called() | |
| mock_emit_nvtx.assert_not_called() | |
| def test_single_profile_range(self, mock_get_rank, mock_trainer, mock_pl_module): | |
| mock_get_rank.return_value = 0 | |
| callback = NsysCallback(start_step=10, end_step=40, ranks=[0]) | |
| # Ensure the device type is 'cuda' | |
| mock_trainer.strategy.root_device.type = 'cuda' | |
| # Start of range | |
| mock_trainer.strategy.current_epoch_step = 10 | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10) | |
| assert self.fixed_cudart.cudaProfilerStart.call_count == 1, "cudaProfilerStart was not called" | |
| # Middle of range | |
| mock_trainer.strategy.current_epoch_step = 25 | |
| callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 25) | |
| assert self.fixed_cudart.cudaProfilerStart.call_count == 1, "cudaProfilerStart was called again" | |
| # End of range | |
| mock_trainer.strategy.current_epoch_step = 40 | |
| callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 40) | |
| assert self.fixed_cudart.cudaProfilerStop.call_count == 1, "cudaProfilerStop was not called" | |