Skip to content

Commit

Permalink
Add codespell to pre-commit checks (microsoft#1717)
Browse files Browse the repository at this point in the history
  • Loading branch information
aphedges authored Jan 22, 2022
1 parent 09c065b commit 4cf970e
Show file tree
Hide file tree
Showing 35 changed files with 83 additions and 70 deletions.
13 changes: 13 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,16 @@ repos:
hooks:
- id: clang-format # formatter of C/C++ code based on a style guide: LLVM, Google, Chromium, Mozilla, and WebKit available
args: []

- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
args: [
# Do not check files that are automatically generated
'--skip=docs/Gemfile.lock,tests/unit/gpt2-merges.txt,tests/unit/gpt2-vocab.json',
'--ignore-regex=\\n', # Do not count the 'n' in an escaped newline as part of a word
'--ignore-words-list=unsupport', # Word used in error messages that need rewording
--check-filenames,
--check-hidden
]
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ confidence=
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# disable everything first and then re-enable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
Expand Down
4 changes: 2 additions & 2 deletions csrc/aio/py_lib/py_ds_aio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ Functionality for swapping optimizer tensors to/from (NVMe) storage devices.

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("aio_read", &deepspeed_py_aio_read, "DeepSpeed Asynchornous I/O Read");
m.def("aio_read", &deepspeed_py_aio_read, "DeepSpeed Asynchronous I/O Read");

m.def("aio_write", &deepspeed_py_aio_write, "DeepSpeed Asynchornous I/O Write");
m.def("aio_write", &deepspeed_py_aio_write, "DeepSpeed Asynchronous I/O Write");

m.def("deepspeed_memcpy", &deepspeed_py_memcpy, "DeepSpeed Memory Copy");

Expand Down
4 changes: 2 additions & 2 deletions csrc/lamb/fused_lamb_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ at::Tensor lamb(at::Tensor& p,

// intermediate for weight L2 reduction
// make sure that the threads per block is at least 512 during the kernel launch otherwise the
// behavious is unexpected
// behaviour is unexpected
at::Tensor w_l2_i = at::empty(
{512},
p.options().dtype(p.type().scalarType() == at::ScalarType::Half ? at::ScalarType::Float
: p.type().scalarType()));

// intermediate for update L2 reduction
// make sure that the threads per block is at least 512 during the kernel launch otherwise the
// behavious is unexpected
// behaviour is unexpected
at::Tensor u_l2_i = at::empty(
{512},
p.options().dtype(p.type().scalarType() == at::ScalarType::Half ? at::ScalarType::Float
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ For example, the following section in the DeepSpeed configuration file limits th
}
```

The entry bellow asks the Autotuner to use `4` as the micro-batch size per GPU in tuning (micro-batch size per GPU is fixed as 4). Note that it's different from using ` "train_micro_batch_size_per_gpu": [4]` which asks the Autotuner to tune micro-batch size per GPU starting from `4`.
The entry below asks the Autotuner to use `4` as the micro-batch size per GPU in tuning (micro-batch size per GPU is fixed as 4). Note that it's different from using ` "train_micro_batch_size_per_gpu": [4]` which asks the Autotuner to tune micro-batch size per GPU starting from `4`.
```json
{
"train_micro_batch_size_per_gpu": [4],
Expand Down
10 changes: 5 additions & 5 deletions deepspeed/autotuning/autotuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def _generate_experiments(self, tuning_space, max_train_batch_size_per_gpu):
else:
return exps

# replace the corresponding parameter values if the user specfies them in the DeepSpeed configuration file
# replace the corresponding parameter values if the user specifies them in the DeepSpeed configuration file
replace_dict(tuning_space,
self.user_config,
[ZERO_OPTIMIZATION,
Expand Down Expand Up @@ -511,7 +511,7 @@ def tune_space(self,
max_train_batch_size_per_gpu = 0
tuning_micro_batch_sizes_overwritten = False

# calcuate max micro batch size using gpu memory, model instatiation memory and activation memory
# calculate max micro batch size using gpu memory, model instantiation memory and activation memory
# calculated_max_micro_batch_size = (memory_per_gpu - instantiation_memory) // activation_memory_micro_batch_size_1
calculated_max_micro_batch_size = int(
self.gpu_mem -
Expand Down Expand Up @@ -584,11 +584,11 @@ def tune_space(self,
logger.info(f"End tuning for space: {tuning_space_name}")
return max_micro_batch_size, fast_best_mbs, fast_best_metric_val

# if the best metric or the micro batch size for that best metric in the current Zero stage after tuning micro batch size is less than the corrresponding value in the prevous Zero stage, return, do not tune other Zero configuration paramerts
# if the best metric or the micro batch size for that best metric in the current Zero stage after tuning micro batch size is less than the corresponding value in the previous Zero stage, return, do not tune other Zero configuration parameters
if stage > 0:
if fast_best_mbs <= prev_best_mbs or fast_best_metric_val < prev_best_metric_val:
logger.info(
f"End tuning for space: {tuning_space_name}. No need to tune other Zero configuration paramerts."
f"End tuning for space: {tuning_space_name}. No need to tune other Zero configuration parameters."
)
return max_micro_batch_size, fast_best_mbs, fast_best_metric_val

Expand Down Expand Up @@ -665,7 +665,7 @@ def model_info_profile_run(self):
"""Does a model information profling experiment that collects the number of model parameters and activation memory.\
The experiment produces a "profile_model_info" folder under self.results_dir.
Returns:
[dict]: a model inforation dictionary, e.g., {"num_params": 335144976, "trainable_num_params": 335144976, "activation_mem_per_gpu": 324358144, "rank": 0}
[dict]: a model information dictionary, e.g., {"num_params": 335144976, "trainable_num_params": 335144976, "activation_mem_per_gpu": 324358144, "rank": 0}
"""
logger.info("Starting model info profile run.")
model_info = self.autotuning_config.model_info
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
}

#########################################
# autotunner serach space constants
# autotunner search space constants
#########################################

DEFAULT_HF_CONFIG = {
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def parse_results(self, metric):
for exp_id, (exp, err) in self.finished_experiments.items():
if err:
logger.info(
f"The experiment exp_id = {exp_id}, exp_name = {exp['name']}, did not run succesfully with error = {err}, thus a metrics.txt does not exist for it. Check the stderr.log in {exp['result_dir']}"
f"The experiment exp_id = {exp_id}, exp_name = {exp['name']}, did not run successfully with error = {err}, thus a metrics.txt does not exist for it. Check the stderr.log in {exp['result_dir']}"
)
continue

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/tuner/README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Tuner


`exps` is a list of experiment descriptions (dictionarys).
`exps` is a list of experiment descriptions (dictionaries).
An experimentation description has a `ds_config` field that stores the DeepSpeed configuration to be used in the experiment.

A tuner is based on BaseTuner and at least implements the `next_batch` method. It can implement a different `tune` method from the BaseTuner's.
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/tuner/model_based_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def update(self):
feature_val = []
if err:
logger.info(
f"Skipping exp_id = {exp_id}, exp_name = {exp['name']}, the experiment did not run succesfully with error = {err}, thus a metrics.txt does not exist for it. Please check the stderr.log in {exp['result_dir']}"
f"Skipping exp_id = {exp_id}, exp_name = {exp['name']}, the experiment did not run successfully with error = {err}, thus a metrics.txt does not exist for it. Please check the stderr.log in {exp['result_dir']}"
)
ds_config = exp["ds_config"]
flattened_ds_config = flatten(ds_config)
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/autotuning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def canonical_name(config: dict, tuning_keys=None, prefix="", omit_val=False):
Args:
config (dict): the config dict used to generate the name
tuning_keys (list, optional): the tuning keys used to generate the name. Defaults to None.
prefix (str, optional): a string added to the begining of the name. Defaults to None.
prefix (str, optional): a string added to the beginning of the name. Defaults to None.
"""
if TRAIN_MICRO_BATCH_SIZE_PER_GPU not in tuning_keys:
tuning_keys.append(TRAIN_MICRO_BATCH_SIZE_PER_GPU)
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/env_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def parse_arguments():
'--hide_operator_status',
action='store_true',
help=
'Suppress display of installation and compatiblity statuses of DeepSpeed operators. '
'Suppress display of installation and compatibility statuses of DeepSpeed operators. '
)
parser.add_argument('--hide_errors_and_warnings',
action='store_true',
Expand Down
10 changes: 5 additions & 5 deletions deepspeed/profiling/flops_profiler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ Below is an example output for BERT-Large(NVIDIA) on an A100 GPU with batch size
-------------------------- DeepSpeed Flops Profiler --------------------------
Profile Summary at step 10:
Notations:
data parallel size (dp_size), model paralel size(mp_size),
data parallel size (dp_size), model parallel size(mp_size),
number of parameters (params), number of multiply-accumulate operations(MACs),
number of floating-point operations (flops), floating-point operations per second (FLOPS),
fwd latency (forward propagation latency), bwd latency (backward propagation latency),
step (weights update latency), iter latency (sum of fwd, bwd and step latency)

world size: 1
data parallel size: 1
model paralel size: 1
model parallel size: 1
batch size per GPU: 80
params per gpu: 336.23 M
params of model = params per GPU * mp_size: 336.23 M
Expand Down Expand Up @@ -160,7 +160,7 @@ The DeepSpeed Flops Profiler can be used with the DeepSpeed runtime or as a stan
- [Example Training Workflow](#example-training-workflow)
### Usage With the DeepSpeed Runtime

When using DeepSpeed for model training, the profiler can be configured in the deepspeed configuration file. No explict API calls are needed to use the profiler. The profiler can be enabled by adding the following field to the `deepspeed_config` json file. Refer to [flops profiler](https://www.deepspeed.ai/docs/config-json/#flops-profiler) for details.
When using DeepSpeed for model training, the profiler can be configured in the deepspeed configuration file. No explicit API calls are needed to use the profiler. The profiler can be enabled by adding the following field to the `deepspeed_config` json file. Refer to [flops profiler](https://www.deepspeed.ai/docs/config-json/#flops-profiler) for details.

```json
{
Expand All @@ -185,15 +185,15 @@ An example output of 12-layer Megatron-LM model (`hidden_size = 8192, num_attent
-------------------------- DeepSpeed Flops Profiler --------------------------
Profile Summary at step 10:
Notations:
data parallel size (dp_size), model paralel size(mp_size),
data parallel size (dp_size), model parallel size(mp_size),
number of parameters (params), number of multiply-accumulate operations(MACs),
number of floating-point operations (flops), floating-point operations per second (FLOPS),
fwd latency (forward propagation latency), bwd latency (backward propagation latency),
step (weights update latency), iter latency (sum of fwd, bwd and step latency)

world size: 1
data parallel size: 1
model paralel size: 1
model parallel size: 1
batch size per GPU: 1024
params per gpu: 1.29 M
params of model = params per GPU * mp_size: 1.29 M
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/comm/coalesced_collectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def reduce_scatter_coalesced(
"""simultaneously reduce-scatter a list of tensors - this can be done more
efficiently than individual reduce scatter calls
TODO. see if PyTorch team wants a c++ verson of this for ProcessGroupNCCL
TODO. see if PyTorch team wants a c++ version of this for ProcessGroupNCCL
"""
this_rank = torch.distributed.get_rank(group)
world_sz = torch.distributed.get_world_size(group)
Expand Down
6 changes: 3 additions & 3 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def _set_distributed_vars(self, args):
def _configure_with_arguments(self, args, mpu):
# After the distributed backend is initialized we are guaranteed the LOCAL_RANK
# environment variable is set. We must align args.local_rank to this value for
# backwards compatability with scripts relying on [args|self].local_rank containing
# backwards compatibility with scripts relying on [args|self].local_rank containing
# the correct local rank info. _do_args_sanity_check will ensure this is the case.

if "OMPI_COMM_WORLD_LOCAL_RANK" in os.environ:
Expand Down Expand Up @@ -2005,7 +2005,7 @@ def _autotuning_exit(self):
msg["latency"]
print_json_dist(msg, [0], path=self.autotuning_metric_path())
import atexit
atexit.register(print, "Autotuning: done with runing current ds config.")
atexit.register(print, "Autotuning: done with running current ds config.")
exit()

def _write_tensorboard(self):
Expand Down Expand Up @@ -2290,7 +2290,7 @@ def load_moe_state_dict(self, checkpoint_path, tag, state_dict, old_moe_load):
global_expert_id = expp_rank * num_local_experts + local_expert_id
expert_state_dict = torch.load(self._get_expert_ckpt_name(
checkpoint_path,
-1, # -1 means ingore layer_id
-1, # -1 means ignore layer_id
global_expert_id,
tag),
map_location=torch.device('cpu'))
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/fp16/unfused_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self,
#copied to fp16 weights
fp32_group = [p.clone().float().detach() for p in param_group['params']]

#incase the internal optimizer needs it
#in case the internal optimizer needs it
for p in fp32_group:
p.requires_grad = True

Expand Down
4 changes: 2 additions & 2 deletions deepspeed/runtime/lr_schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ class LRRangeTest(object):
the paper `A disciplined approach to neural network hyper-parameters: Part1`_.
LRRT policy is used for finding maximum LR that trains a model without divergence, and can be used to
configure the LR boundaries for Cylic LR schedules.
configure the LR boundaries for Cyclic LR schedules.
LRRT changes the learning rate after every batch.
`step` should be called after a batch has been used for training.
Expand All @@ -325,7 +325,7 @@ class LRRangeTest(object):
lower boundary in the range test for each parameter group.
lr_range_test_step_size (int): Interval of training steps to increase learning rate. Default: 2000
lr_range_test_step_rate (float): Scaling rate for range test. Default: 1.0
lr_range_test_staircase (bool): Scale in staircase fashion, rather than continous. Default: False.
lr_range_test_staircase (bool): Scale in staircase fashion, rather than continuous. Default: False.
last_batch_iteration (int): The index of the last batch. This parameter is used when
resuming a training job. Since `step()` should be invoked after each
batch instead of after each epoch, this number represents the total
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def __init__(self, has_bool_tensors=False, *super_args, **super_kwargs):
f'TOTAL_PARAMS={total_params} ({total_params/1e6:0.3f}M) '
f'UNIQUE_PARAMS={unique_params} ({unique_params/1e6:0.3f}M)')

#intialize peer-2-peer communication and allreduce groups
#initialize peer-2-peer communication and allreduce groups
if self.is_pipe_parallel:
p2p.init_process_groups(self.grid)

Expand Down
4 changes: 2 additions & 2 deletions deepspeed/runtime/pipe/p2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _is_valid_send_recv(src_stage, dest_stage):

def send(tensor, dest_stage, async_op=False):
global _groups
assert async_op == False, "Doesnt support async_op true"
assert async_op == False, "Doesn't support async_op true"
src_stage = _grid.get_stage_id()
_is_valid_send_recv(src_stage, dest_stage)

Expand All @@ -68,7 +68,7 @@ def send(tensor, dest_stage, async_op=False):

def recv(tensor, src_stage, async_op=False):
global _groups
assert async_op == False, "Doesnt support async_op true"
assert async_op == False, "Doesn't support async_op true"
dest_stage = _grid.get_stage_id()
_is_valid_send_recv(src_stage, dest_stage)

Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/pipe/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _filter_helper(x):
return True

coords = filter(_filter_helper, self.mapping.keys())
return [self.mapping[coo] for coo in coords]
return [self.mapping[coord] for coord in coords]

def get_axis_list(self, axis, idx):
"""Returns the list of global ranks whose coordinate in an axis is idx.
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
ZERO_OPTIMIZATION_IGNORE_UNUSED_PARAMETERS = 'ignore_unused_parameters'
ZERO_OPTIMIZATION_IGNORE_UNUSED_PARAMETERS_DEFAULT = True

# Use deepspeed < v0.3.17 zero stage 1, kept for backwards compatability reasons
# Use deepspeed < v0.3.17 zero stage 1, kept for backwards compatibility reasons
ZERO_OPTIMIZATION_LEGACY_STAGE1 = "legacy_stage1"
ZERO_OPTIMIZATION_LEGACY_STAGE1_DEFAULT = False

Expand Down
4 changes: 2 additions & 2 deletions deepspeed/runtime/zero/contiguous_memory_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def release_tensor(self, tensor):
print_rank_0(
f"Free before release {free_before}. Released {tensor.numel()}. Total free after {self.total_free}."
)
assert self.total_free - tensor_size == free_before, "Release bookeeping error"
assert self.total_free - tensor_size == free_before, "Release bookkeeping error"

def release_tensor_with_id(self, tensor_id):
free_before = self.total_free
Expand All @@ -109,7 +109,7 @@ def release_tensor_with_id(self, tensor_id):
print_rank_0(
f"Free before release {free_before}. Released {tensor.numel()}. Total free after {self.total_free}."
)
assert self.total_free - tensor_size == free_before, "Release bookeeping error"
assert self.total_free - tensor_size == free_before, "Release bookkeeping error"

#shows the current memory allocation at specified resolution
def print_allocation(self, resolution=200):
Expand Down
2 changes: 1 addition & 1 deletion deepspeed/runtime/zero/partition_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def get_model():

self._validate_remote_device(remote_device, _ds_config)

# Remote device is the device where parameter partiitons are stored
# Remote device is the device where parameter partitions are stored
# It can be same as local_device or it could be CPU or NVMe.
self.remote_device = self.local_device if remote_device is None else remote_device
self.pin_memory = pin_memory if (self.remote_device
Expand Down
Loading

0 comments on commit 4cf970e

Please sign in to comment.