diff --git a/python/paddle/amp/auto_cast.py b/python/paddle/amp/auto_cast.py
index 0286a668d10f56..5a271171e09cea 100644
--- a/python/paddle/amp/auto_cast.py
+++ b/python/paddle/amp/auto_cast.py
@@ -53,7 +53,7 @@ def __init__(self):
         self.model_parameters = []
         self.use_master_grad = False
         self.already_register_final_backward_hook = False
-        self.already_classify_params_meshs = False  # For dist
+        self.already_classify_params_meshes = False  # For dist
         self.mesh2params = {}  # For dist
         self.amp_dtype = 'float32'
 
@@ -471,7 +471,7 @@ def master_grad_hook():
                 # NOTE(lizhiyu): To support semi-auto of dygraph mode, we must
                 # classify the params of model into different calsses according to their process_mesh.
                 # Otherwise, fault will occur.
-                if not amp_global_state().already_classify_params_meshs:
+                if not amp_global_state().already_classify_params_meshes:
                     for param in amp_global_state().model_parameters:
                         if param is not None and param.process_mesh is not None:
                             if (
@@ -485,7 +485,7 @@ def master_grad_hook():
                                 amp_global_state().mesh2params[
                                     param.process_mesh
                                 ].append(param)
-                    amp_global_state().already_classify_params_meshs = True
+                    amp_global_state().already_classify_params_meshes = True
 
                 if len(amp_global_state().mesh2params):
                     for _, params in amp_global_state().mesh2params.items():
diff --git a/python/paddle/amp/debugging.py b/python/paddle/amp/debugging.py
index 0fd8fce8fe5f8f..974daa0a90697e 100644
--- a/python/paddle/amp/debugging.py
+++ b/python/paddle/amp/debugging.py
@@ -270,7 +270,7 @@ def _set_seed(self, flag):
             self.seed = self.initial_seed
 
         if self.seed > np.iinfo(np.uint32).max or self.seed < 0:
-            print("[Warnning: Seed must be between 0 and 2**32 - 1")
+            print("[Warning: Seed must be between 0 and 2**32 - 1")
             self.seed = 123
 
         # get random seed
@@ -616,7 +616,7 @@ def compare_accuracy(
             ...             [1, 5, 2, 0], dtype="float32"
             ...         )
             ...         z1 = x + y
-            ...         out_excel = "compary_accuracy_out_excel.csv"
+            ...         out_excel = "compare_accuracy_out_excel.csv"
             ...         paddle.amp.debugging.compare_accuracy(
             ...             path, path, out_excel, loss_scale=1, dump_all_tensors=False
             ...         )
diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 5ddf610bb032ba..2843560f4a878c 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -18,7 +18,7 @@
 __all__ = []
 
 
-def with_mateclass(meta, *bases):
+def with_metaclass(meta, *bases):
     class impl(meta):
         def __new__(cls, name, temp_bases, attrs):
             return meta(name, bases, attrs)
@@ -267,7 +267,7 @@ def __init__(cls, name, bases, attrs):
         return super().__init__(name, bases, attrs)
 
 
-class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
+class PyLayer(with_metaclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     """
     Paddle implements Python custom operators on the PaddlePaddle framework by creating a subclass of
     ``PyLayer``, which must comply with the following rules:
diff --git a/python/paddle/base/dygraph/tensor_patch_methods.py b/python/paddle/base/dygraph/tensor_patch_methods.py
index 7c7a3d60ebf45c..275ab3a232d96c 100644
--- a/python/paddle/base/dygraph/tensor_patch_methods.py
+++ b/python/paddle/base/dygraph/tensor_patch_methods.py
@@ -104,7 +104,7 @@ def _to_static_var(self, to_parameter=False, **kwargs):
         """
 
         # Note: getattr(self, attr, None) will call x.grad=x.gradient(), but gradient() only available in dygraph.
-        # It will fail. So, for propery that different between dynamic and static graph, should not getattr(self, attr, None).
+        # It will fail. So, for property that different between dynamic and static graph, should not getattr(self, attr, None).
         attr_not_need_keys = [
             'grad',
             'T',
@@ -227,7 +227,7 @@ def set_value(self, value):
 
             # NOTE(wuweilong): self could be Tensor, the subsequent behavior are defined in different files
             # if self is Tensor, method value() return self that defined in this file, get_tensor() defined in eager_method.cc
-            # this Interface behavior will be unifed in the future.
+            # this Interface behavior will be unified in the future.
             if self.is_dist():
                 if isinstance(value, paddle.Tensor) and value.is_dist():
                     from paddle.distributed.auto_parallel.placement_type import (
@@ -702,7 +702,7 @@ def get_device_dtype_from_tensor(other):
 
         if size_args + size_kwargs > 3 or size_args + size_kwargs == 0:
             raise TypeError(
-                "to() received too mant arguments - expected one of:\n  \
+                "to() received too many arguments - expected one of:\n  \
                 * (Union[str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace(), paddle.XPUPlace(), paddle.CustomPlace()] \
                 device, Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
                 * (Union[str, paddle.dtype, numpy.dtype] dtype, bool blocking)\n \
@@ -976,7 +976,7 @@ def __array__(self, dtype=None):
         return array
 
     def pre_deal_index(self, item):
-        # since in pybind there is no effiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
+        # since in pybind there is no efficiency way to transfer Py_Tuple/Py_List/Py_Range to Tensor
         # we call this function in python level.
         item = list(item) if isinstance(item, tuple) else [item]
         for i, slice_item in enumerate(item):
diff --git a/python/paddle/base/incubate/checkpoint/auto_checkpoint.py b/python/paddle/base/incubate/checkpoint/auto_checkpoint.py
index 742289acd27f1a..329cdc25ab0831 100644
--- a/python/paddle/base/incubate/checkpoint/auto_checkpoint.py
+++ b/python/paddle/base/incubate/checkpoint/auto_checkpoint.py
@@ -419,7 +419,7 @@ def _serialize(self, pop_keys=["restored_from", "checkpoint_epoch_no"]):
         for k in pop_keys:
             d.pop(k, None)
 
-        # registerd exes
+        # registered exes
         d["exe_status"] = {}
         e = d["exe_status"]
         for k, t in self._exe_status.items():
@@ -625,7 +625,7 @@ def train_epoch_range(max_epoch_num, save_checkpoint_inter=None):
     global g_acp_type
     if not _get_checker().valid():
         logger.warning(
-            "auto checkpoint will take effect  automaticly on PaddleCloud"
+            "auto checkpoint will take effect automatically on PaddleCloud"
         )
         for i in _normal_yield(max_epoch_num):
             yield i
diff --git a/python/paddle/base/layers/io.py b/python/paddle/base/layers/io.py
index 51f5b10fe06188..de9725ec28fac0 100644
--- a/python/paddle/base/layers/io.py
+++ b/python/paddle/base/layers/io.py
@@ -74,7 +74,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
     var_name = unique_name(op_type)
     startup_blk = default_startup_program().current_block()
     startup_var = startup_blk.create_var(name=var_name)
-    startop_op = startup_blk.append_op(
+    startup_op = startup_blk.append_op(
         type=op_type,
         inputs={'UnderlyingReader': reader},
         outputs={'Out': [startup_var]},
@@ -83,7 +83,7 @@ def __create_shared_decorated_reader__(op_type, reader, attrs):
     startup_var.persistable = True
     main_prog_block = default_main_program().current_block()
     main_prog_var = _copy_reader_var_(main_prog_block, startup_var)
-    _copy_reader_create_op_(main_prog_block, startop_op)
+    _copy_reader_create_op_(main_prog_block, startup_op)
     return monkey_patch_reader_methods(main_prog_var)
 
 
diff --git a/python/paddle/base/layers/layer_function_generator.py b/python/paddle/base/layers/layer_function_generator.py
index 009cb2ae49a6b7..a8128603e05cda 100644
--- a/python/paddle/base/layers/layer_function_generator.py
+++ b/python/paddle/base/layers/layer_function_generator.py
@@ -86,7 +86,7 @@ def _generate_doc_string_(
         buf.write(" (Tensor): ")
         buf.write(escape_math(each_input.comment))
         if each_input.duplicable:
-            buf.write("  Duplicatable.")
+            buf.write("  Duplicable.")
         if each_input.dispensable:
             buf.write("  Optional.")
         buf.write('\n')
@@ -327,7 +327,7 @@ def func(x, name=None):
                 and x.is_view_var
             ):
                 raise ValueError(
-                    'Sorry about what\'s happend. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
+                    'Sorry about what\'s happened. In to_static mode, {}\'s output variable {} is a viewed Tensor in dygraph. This will result in inconsistent calculation behavior between dynamic and static graphs. You must find the location of the strided API be called, and call {} = {}.assign().'.format(
                         inplace_op_type, x.name, x.name, x.nameb
                     )
                 )
diff --git a/python/paddle/base/reader.py b/python/paddle/base/reader.py
index e90378249da03f..d5695aec5b220e 100644
--- a/python/paddle/base/reader.py
+++ b/python/paddle/base/reader.py
@@ -137,7 +137,7 @@ def _check_input_array(cls, item):
         arr = np.asarray(item)
         if arr.dtype == np.object_:
             raise TypeError(
-                "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually "
+                "\n\tFailed to convert input data to a regular ndarray :\n\t* Usually "
                 "this means the input data contains nested lists with different lengths. "
                 "\n\t* Check the reader function passed to 'decorate_batch_generator'"
                 " to locate the data causes this issue.\n\t* Please consider using "
@@ -532,7 +532,7 @@ def __init__(
         # NOTE: the C++ LoDTensorBlockingQueue instance
         self._blocking_queue = None
         # NOTE: 1. In multiprocess mode, this thread is used to get next batch data from
-        # self._data_queue, then push it into self._blocking_queue; 2. In singleprocess
+        # self._data_queue, then push it into self._blocking_queue; 2. In single process
         # mode, this thread is used to get next batch data from self._batch_reader, then
         # push it into self._blocking_queue
         self._thread = None
diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index 7618590b376b76..328f3e0078052a 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -293,7 +293,7 @@ def _update_input_info(inputs):
 class StaticGraphAdapter:
     """
 
-    Model traning/inference with a static graph.
+    Model training/inference with a static graph.
 
     """
 
@@ -633,7 +633,7 @@ def _make_program(self, mode):
         prog = self._orig_prog.clone()
         # NOTE: When defining learning rate scheduling in static-graph, ops to
         # increase the global step var and calculate learning rate would be
-        # prepended into _orig_prog. test program maked by `_orig_prog.clone`
+        # prepended into _orig_prog. test program marked by `_orig_prog.clone`
         # also would include these ops. Thus must prune these ops in test
         # program, otherwise the global step would be changed in test.
         if mode != 'train':
@@ -794,16 +794,16 @@ def __init__(self, model):
 
         if self._nranks > 1:
             dist.init_parallel_env()
-            stradegy = paddle.distributed.parallel.ParallelStrategy()
-            stradegy.nranks = paddle.distributed.ParallelEnv().nranks
-            stradegy.local_rank = paddle.distributed.ParallelEnv().local_rank
-            stradegy.trainer_endpoints = (
+            strategy = paddle.distributed.parallel.ParallelStrategy()
+            strategy.nranks = paddle.distributed.ParallelEnv().nranks
+            strategy.local_rank = paddle.distributed.ParallelEnv().local_rank
+            strategy.trainer_endpoints = (
                 paddle.distributed.ParallelEnv().trainer_endpoints
             )
-            stradegy.current_endpoint = (
+            strategy.current_endpoint = (
                 paddle.distributed.ParallelEnv().current_endpoint
             )
-            self.ddp_model = paddle.DataParallel(self.model.network, stradegy)
+            self.ddp_model = paddle.DataParallel(self.model.network, strategy)
 
     @property
     def mode(self):
@@ -879,7 +879,7 @@ def eval_batch(self, inputs, labels=None):
 
         outputs = self.model.network(*[paddle.to_tensor(x) for x in inputs])
 
-        # Transfrom data to expected device
+        # Transform data to expected device
         expected_device = paddle.device.get_device()
         for o in to_list(outputs):
             o._to(device=expected_device)
@@ -966,7 +966,7 @@ def load(self, param_state_pairs, optim_state, scaler_state=None):
             if scaler_state:
                 self.model._scaler.load_state_dict(scaler_state)
 
-        # resotre optimizer states
+        # restore optimizer states
         if not self.model._optimizer or not optim_state:
             return
 
@@ -1077,7 +1077,7 @@ class Model:
             or dict ({name: InputSpec}), and it couldn't be None in static
             graph. Default: None.
         labels (InputSpec|list|tuple|None, optional): `labels`, entry points of network,
-            could be a InputSpec instnace or list/tuple of InputSpec instances,
+            could be a InputSpec instance or list/tuple of InputSpec instances,
             or None. For static graph, if labels is required in loss,
             labels must be set. Otherwise, it could be None. Default: None.
 
@@ -1676,7 +1676,7 @@ def prepare(
     ):
         """
 
-        Configures the model before runing.
+        Configures the model before running.
 
         Args:
             optimizer (Optimizer|None, optional): Optimizer must be set in training
@@ -1777,16 +1777,16 @@ def fit(
         Args:
             train_data (Dataset|DataLoader, optional): An iterable data loader is used for
                 train. An instance of paddle paddle.io.Dataset or
-                paddle.io.Dataloader is recomended. Default: None.
+                paddle.io.Dataloader is recommended. Default: None.
             eval_data (Dataset|DataLoader, optional): An iterable data loader is used for
                 evaluation at the end of epoch. If None, will not do evaluation.
                 An instance of paddle.io.Dataset or paddle.io.Dataloader
-                is recomended. Default: None.
+                is recommended. Default: None.
             batch_size (int|list, optional): The batch size of train_data and eval_data. When
                 train_data and eval_data are both the instance of Dataloader, this
                 parameter will be ignored. Default: 1.
             epochs (int, optional): The number of epochs to train the model. Default: 1.
-            eval_freq (int, optional): The frequency, in number of epochs, an evalutation
+            eval_freq (int, optional): The frequency, in number of epochs, an evaluation
                 is performed. Default: 1.
             log_freq (int, optional): The frequency, in number of steps, the training logs
                 are printed. Default: 10.
@@ -1800,7 +1800,7 @@ def fit(
                 train_data when dataset size is not divisible by the batch size.
                 When train_data is an instance of Dataloader, this parameter
                 will be ignored. Default: False.
-            shuffle (bool, optional): Whther to shuffle train_data. When train_data is
+            shuffle (bool, optional): Whether to shuffle train_data. When train_data is
                 an instance of Dataloader, this parameter will be ignored.
                 Default: True.
             num_workers (int, optional): The number of subprocess to load data, 0 for no
@@ -1810,7 +1810,7 @@ def fit(
             callbacks (Callback|None, optional): A list of `Callback` instances to apply
                 during training. If None, :ref:`api_paddle_callbacks_ProgBarLogger` and
                 :ref:`api_paddle_callbacks_ModelCheckpoint` are automatically inserted. Default: None.
-            accumulate_grad_batches (int, optional): The number of batches to accumulate gradident
+            accumulate_grad_batches (int, optional): The number of batches to accumulate gradient
                 during training process before optimizer updates. It can mimic large batch
                 size. Default: 1.
             num_iters (int|None, optional): The number of iterations to evaluate the model.
@@ -2016,7 +2016,7 @@ def evaluate(
         Args:
             eval_data (Dataset|DataLoader): An iterable data loader is used for
                 evaluation. An instance of paddle.io.Dataset or
-                paddle.io.Dataloader is recomended.
+                paddle.io.Dataloader is recommended.
             batch_size (int, optional): The batch size of train_data and eval_data.
                 When eval_data is the instance of Dataloader, this argument will be
                 ignored. Default: 1.
@@ -2126,7 +2126,7 @@ def predict(
         Args:
             test_data (Dataset|DataLoader): An iterable data loader is used for
                 predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
-                is recomended.
+                is recommended.
             batch_size (int, optional): The batch size of test_data. When test_data is the
                 instance of Dataloader, this argument will be ignored. Default: 1.
             num_workers (int, optional): The number of subprocess to load data, 0 for no subprocess
@@ -2300,13 +2300,13 @@ def _run_one_epoch(
             # Data might come from different types of data_loader and have
             # different format, as following:
             # 1. DataLoader in static graph:
-            #    [[input1, input2, ..., label1, lable2, ...]]
+            #    [[input1, input2, ..., label1, label2, ...]]
             # 2. DataLoader in dygraph
-            #    [input1, input2, ..., label1, lable2, ...]
+            #    [input1, input2, ..., label1, label2, ...]
             # 3. custumed iterator yield concated inputs and labels:
-            #   [input1, input2, ..., label1, lable2, ...]
+            #   [input1, input2, ..., label1, label2, ...]
             # 4. custumed iterator yield separated inputs and labels:
-            #   ([input1, input2, ...], [label1, lable2, ...])
+            #   ([input1, input2, ...], [label1, label2, ...])
             # To handle all of these, flatten (nested) list to list.
             data = paddle.utils.flatten(data)
             # LoDTensor.shape is callable, where LoDTensor comes from
diff --git a/python/paddle/incubate/asp/supported_layer_list.py b/python/paddle/incubate/asp/supported_layer_list.py
index 0ebc6ea2d31280..7720a1cf7127c9 100644
--- a/python/paddle/incubate/asp/supported_layer_list.py
+++ b/python/paddle/incubate/asp/supported_layer_list.py
@@ -35,16 +35,16 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
     shape = weight_nparray.shape
     weight_pruned_nparray = copy.deepcopy(weight_nparray)
     weight_sparse_mask = np.ones_like(weight_pruned_nparray)
-    exlude_cond_shape2 = len(shape) == 2 and shape[0] < m
-    exlude_cond_shape4 = len(shape) == 4 and shape[1] < m
-    if exlude_cond_shape2:
+    exclude_cond_shape2 = len(shape) == 2 and shape[0] < m
+    exclude_cond_shape4 = len(shape) == 4 and shape[1] < m
+    if exclude_cond_shape2:
         _logger.warning(
             '{} is not pruned because the first dimension of {} is smaller than {}'.format(
                 param_name, shape, m
             )
         )
         return weight_pruned_nparray, weight_sparse_mask
-    if exlude_cond_shape4:
+    if exclude_cond_shape4:
         _logger.warning(
             '{} is not pruned because the second dimension of {} is smaller than {}'.format(
                 param_name, shape, m
@@ -58,12 +58,12 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
     # SPMMA in cuSparseLt: D = (AxB) + C, where matrix A (mxk) is sparse matrix.
     # cuSparseLt would prune matrix A along k dimension.
     # In sparse training, layer weight matrices is viewed sparse matrix A, so
-    # the math fomula should be 'Act(WX + b)'. However, default fomula in PaddlePaddle
+    # the math formula should be 'Act(WX + b)'. However, default formula in PaddlePaddle
     #  is 'Act(XW + b)'. For enabling SPMMA, weights and inputs should be transposed
     # for computing, Act( (W^T X^T)^T + b). Therefore, we have to prune alog k dimension
-    # of W^T, which is m dimension of W. Moreove, all mask generating functions in
+    # of W^T, which is m dimension of W. Moreover, all mask generating functions in
     # asp/utils is row-major pruning. That is the reason we have to transpose weight
-    # matrices beforce invoking create_mask. Then we transpose the result mask to make
+    # matrices before invoking create_mask. Then we transpose the result mask to make
     # sure its shape to be the same as the input weight.
     weight_sparse_mask = asp.create_mask(
         weight_nparray.T, func_name=func_name, n=n, m=m
diff --git a/python/paddle/incubate/asp/utils.py b/python/paddle/incubate/asp/utils.py
index 4ed8d7e74d56ed..f8918a5ed0ced9 100644
--- a/python/paddle/incubate/asp/utils.py
+++ b/python/paddle/incubate/asp/utils.py
@@ -171,11 +171,11 @@ def check_mask_1d(mat, n, m):
           True
     """
     if len(mat.shape) <= 1:
-        mat_flattern, shape = _reshape_1d(mat.reshape(1, mat.shape[0]), m)
+        mat_flatten, shape = _reshape_1d(mat.reshape(1, mat.shape[0]), m)
     else:
-        mat_flattern, shape = _reshape_1d(mat, m)
+        mat_flatten, shape = _reshape_1d(mat, m)
 
-    for sub_mat in mat_flattern:
+    for sub_mat in mat_flatten:
         if np.nonzero(sub_mat)[0].size > (m - n):
             return False
     return True
@@ -210,12 +210,12 @@ def get_mask_1d(mat, n, m):
           >>> print(y)
           True
     """
-    mat_flattern, shape = _reshape_1d(mat, m)
+    mat_flatten, shape = _reshape_1d(mat, m)
 
-    mask_flattern = np.ones_like(mat_flattern)
+    mask_flattern = np.ones_like(mat_flatten)
     mask = np.ones_like(mat)
-    for i in range(mat_flattern.shape[0]):
-        sub_mat = mat_flattern[i]
+    for i in range(mat_flatten.shape[0]):
+        sub_mat = mat_flatten[i]
         min_order_indices = np.argsort(np.absolute(sub_mat))
         mask_flattern[i, min_order_indices[:n].tolist()] = 0
     mask_flattern = mask_flattern.reshape(shape)
@@ -252,7 +252,7 @@ def _reshape_2d(mat, m):
     mat_padded = np.zeros(new_shape)
     mat_padded[: mat.shape[0], : mat.shape[1]] = mat
 
-    mat_flattern = np.empty(new_shape).reshape(-1, m * m)
+    mat_flatten = np.empty(new_shape).reshape(-1, m * m)
     curr_idx = 0
     for row_start in range(0, mat_padded.shape[0], m):
         row_end = row_start + m
@@ -261,9 +261,9 @@ def _reshape_2d(mat, m):
             sub_mat = np.squeeze(
                 mat_padded[row_start:row_end, col_start:col_end].reshape(-1)
             )
-            mat_flattern[curr_idx] = sub_mat
+            mat_flatten[curr_idx] = sub_mat
             curr_idx += 1
-    return mat_flattern, mat_padded.shape
+    return mat_flatten, mat_padded.shape
 
 
 def check_mask_2d(mat, n, m):
@@ -400,7 +400,7 @@ def get_mask_2d_greedy(mat, n, m):
 
 def _compute_valid_2d_patterns(n, m):
     r"""
-    Compute all vaild 2D `n:m` sparse patterns.
+    Compute all valid 2D `n:m` sparse patterns.
 
     2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
     under the constraint of at least :attr:`n` zeros for each row and column.
@@ -409,7 +409,7 @@ def _compute_valid_2d_patterns(n, m):
         n (int): n of `n:m` sparse pattern.
         m (int): m of `n:m` sparse pattern.
     Returns:
-        dictionary: A dictionary with key: *m_n* (string) and value: all vaild 2D `n:m` sparse patterns.
+        dictionary: A dictionary with key: *m_n* (string) and value: all valid 2D `n:m` sparse patterns.
     """
     global _valid_2d_patterns_lock
     global _valid_2d_patterns
@@ -442,7 +442,7 @@ def _compute_valid_2d_patterns(n, m):
 def get_mask_2d_best(mat, n, m):
     r"""
     Generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`
-    to form sparse matrix with maximun L1 norm .This function would pad each
+    to form sparse matrix with maximum L1 norm .This function would pad each
     dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation.
 
     2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
@@ -475,10 +475,10 @@ def get_mask_2d_best(mat, n, m):
     """
     patterns = _compute_valid_2d_patterns(n, m)
 
-    mat_flattern, shape = _reshape_2d(mat, m)
-    mask_flattern = np.ones_like(mat_flattern).reshape(-1, m, m)
+    mat_flatten, shape = _reshape_2d(mat, m)
+    mask_flattern = np.ones_like(mat_flatten).reshape(-1, m, m)
     pmax = np.argmax(
-        np.matmul(mat_flattern, patterns.reshape(patterns.shape[0], m * m).T),
+        np.matmul(mat_flatten, patterns.reshape(patterns.shape[0], m * m).T),
         axis=1,
     )
 
@@ -502,7 +502,7 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
 
     Args:
         tensor (nparray): The input tensor.
-        func_name (MaskAlgo, optional): The function name to generate spase mask. Default is `MaskAlgo.MASK_1D`. All options please refer to `MaskAlgo`.
+        func_name (MaskAlgo, optional): The function name to generate sparse mask. Default is `MaskAlgo.MASK_1D`. All options please refer to `MaskAlgo`.
         n (int, optional): n of `n:m` sparse pattern. Default is 2.
         m (int, optional): m of `n:m` sparse pattern. Default is 4.
     Returns:
@@ -573,7 +573,7 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
 
     Args:
         tensor (nparray): The input tensor.
-        func_name (CheckMethod, optional): The function name to generate spase mask. Default is `CheckMethod.CHECK_1D`. All options please refer to `CheckMethod`.
+        func_name (CheckMethod, optional): The function name to generate sparse mask. Default is `CheckMethod.CHECK_1D`. All options please refer to `CheckMethod`.
         n (int, optional): n of `n:m` sparse pattern. Default is 2.
         m (int, optional): m of `n:m` sparse pattern. Default is 4.
     Returns:
@@ -605,7 +605,7 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
     t = tensor.astype(float)
 
     assert type(func_name) == CheckMethod, (
-        "func_name argumet of check_sparsity is only accepted as type CheckMethod. "
+        "func_name argument of check_sparsity is only accepted as type CheckMethod. "
         f"But got {type(func_name)}"
     )
     func = getattr(sys.modules[__name__], func_name.value, None)
diff --git a/python/paddle/incubate/autograd/primapi.py b/python/paddle/incubate/autograd/primapi.py
index 9f62d1f5835c7f..d0c7d41ef194d0 100644
--- a/python/paddle/incubate/autograd/primapi.py
+++ b/python/paddle/incubate/autograd/primapi.py
@@ -74,13 +74,13 @@ def forward_grad(outputs, inputs, grad_inputs=None):
 
     if not isinstance(outputs, (framework.Variable, typing.Sequence)):
         raise TypeError(
-            f'Expected outputs is Tensor|Sequence[Tesnor], '
+            f'Expected outputs is Tensor|Sequence[Tensor], '
             f'but got {type(outputs)}.'
         )
 
     if not isinstance(inputs, (framework.Variable, typing.Sequence)):
         raise TypeError(
-            f'Expected inputs is Tensor|Sequence[Tesnor], '
+            f'Expected inputs is Tensor|Sequence[Tensor], '
             f'but got {type(inputs)}.'
         )
 
@@ -165,13 +165,13 @@ def grad(outputs, inputs, grad_outputs=None):
 
     if not isinstance(outputs, (framework.Variable, typing.Sequence)):
         raise TypeError(
-            f'Expected outputs is Tensor|Sequence[Tesnor], '
+            f'Expected outputs is Tensor|Sequence[Tensor], '
             f'but got {type(outputs)}.'
         )
 
     if not isinstance(inputs, (framework.Variable, typing.Sequence)):
         raise TypeError(
-            f'Expected inputs is Tensor|Sequence[Tesnor], '
+            f'Expected inputs is Tensor|Sequence[Tensor], '
             f'but got {type(inputs)}.'
         )
 
diff --git a/python/paddle/incubate/autotune.py b/python/paddle/incubate/autotune.py
index 745ac9fc69c071..c99b3498946c4f 100644
--- a/python/paddle/incubate/autotune.py
+++ b/python/paddle/incubate/autotune.py
@@ -136,10 +136,10 @@ def set_config(config=None):
                 )
     if "dataloader" in config_dict:
         dataloader_config = config_dict["dataloader"]
-        use_autoune = False
+        use_autotune = False
         if "enable" in dataloader_config:
             if isinstance(dataloader_config['enable'], bool):
-                use_autoune = dataloader_config['enable']
+                use_autotune = dataloader_config['enable']
             else:
                 warnings.warn(
                     "The auto-tuning configuration of the dataloader is incorrect."
@@ -148,11 +148,11 @@ def set_config(config=None):
         if "tuning_steps" in dataloader_config:
             if isinstance(dataloader_config['tuning_steps'], int):
                 paddle.io.reader.set_autotune_config(
-                    use_autoune, dataloader_config['tuning_steps']
+                    use_autotune, dataloader_config['tuning_steps']
                 )
             else:
                 warnings.warn(
                     "The auto-tuning configuration of the dataloader is incorrect."
                     "The `tuning_steps` should be int. Use default parameter instead."
                 )
-                paddle.io.reader.set_autotune_config(use_autoune)
+                paddle.io.reader.set_autotune_config(use_autotune)
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
index f810014e93b3b3..c6b6eec0251074 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
@@ -257,14 +257,14 @@ def _init_transpiler_server(self, model_dir=None):
             sparse_varnames = self.compiled_config.get_sparse_varname_on_ps(
                 True
             )
-            distribtued_varnames = (
+            distributed_varnames = (
                 self.compiled_config.get_sparse_varname_on_ps(False)
             )
 
             remaining_vars = list(
                 filter(
                     FleetTranspiler.__exclude_vars(
-                        sparse_varnames + distribtued_varnames
+                        sparse_varnames + distributed_varnames
                     ),
                     self.main_program.list_vars(),
                 )
@@ -282,7 +282,7 @@ def _init_transpiler_server(self, model_dir=None):
             )
 
             # todo(tangwei12) load distributed vars
-            # self._load_sparse_params(dirname=model_dir, varnames=distribtued_varnames)
+            # self._load_sparse_params(dirname=model_dir, varnames=distributed_varnames)
 
     def init_server(self, model_dir=None, **kwargs):
         """
diff --git a/python/paddle/jit/dy2static/transformers/decorator_transformer.py b/python/paddle/jit/dy2static/transformers/decorator_transformer.py
index 143d1fb1e14d7d..c19ce1f95b587c 100644
--- a/python/paddle/jit/dy2static/transformers/decorator_transformer.py
+++ b/python/paddle/jit/dy2static/transformers/decorator_transformer.py
@@ -56,13 +56,13 @@ def visit_FunctionDef(self, node):
 
         # every decorator will append a node
         decofun_nodes = []
-        # func to be decoed next time
+        # func to be decoded next time
         deco_target = '_orig_' + node.name
-        # last decoed func
-        decoed_func = ''
+        # last decoded func
+        decoded_func = ''
 
         for deco in reversed(deco_list):
-            # skip INGNORE_NAMES
+            # skip IGNORE_NAMES
             deco_full_name = ast_to_source_code(deco).strip()
             if isinstance(deco, gast.Call):
                 # match case like :
@@ -90,7 +90,7 @@ def visit_FunctionDef(self, node):
                     "Dy2Static : A context manager decorator is used, this may not work correctly after transform."
                 )
 
-            decoed_func = '_decoedby_' + deco_name
+            decoded_func = '_decoedby_' + deco_name
 
             # get function after decoration
             if isinstance(deco, gast.Call):
@@ -104,7 +104,7 @@ def visit_FunctionDef(self, node):
                     re_args = rematch.group(2)
                     re_args_with_func = deco_target + ', ' + re_args
                     decofun_str = 'try:\n\t{0} = _jst.Call({1})({2})\nexcept:\n\t{0} = _jst.Call({1})({3})({4})'.format(
-                        decoed_func,
+                        decoded_func,
                         re_name,
                         re_args_with_func,
                         re_args,
@@ -117,7 +117,7 @@ def visit_FunctionDef(self, node):
                     re_args = rematch.group(2)
                     re_args_with_func = deco_target + ', ' + re_args
                     decofun_str = 'try:\n\t{0} = {1}({2})\nexcept:\n\t{0} = {1}({3})({4})'.format(
-                        decoed_func,
+                        decoded_func,
                         re_name,
                         re_args_with_func,
                         re_args,
@@ -126,11 +126,11 @@ def visit_FunctionDef(self, node):
 
             else:
                 decofun_str = '{} = _jst.Call({})({})'.format(
-                    decoed_func, deco_full_name, deco_target
+                    decoded_func, deco_full_name, deco_target
                 )
 
             decofun_nodes.extend(gast.parse(decofun_str).body)
-            deco_target = decoed_func
+            deco_target = decoded_func
 
         if not decofun_nodes:
             return node
@@ -146,7 +146,7 @@ def visit_FunctionDef(self, node):
 
         args = [arg.id for arg in node.args.args]
         arg_str = ','.join(args)
-        callfun_str = f'return {decoed_func}({arg_str})'
+        callfun_str = f'return {decoded_func}({arg_str})'
         callfun_node = gast.parse(callfun_str).body[0]
 
         node.body = [orig_func_node] + decofun_nodes + [callfun_node]
diff --git a/python/paddle/jit/dy2static/transformers/tensorhook_transformer.py b/python/paddle/jit/dy2static/transformers/tensorhook_transformer.py
index b0a5c56063ab41..04abaa34ef38bd 100644
--- a/python/paddle/jit/dy2static/transformers/tensorhook_transformer.py
+++ b/python/paddle/jit/dy2static/transformers/tensorhook_transformer.py
@@ -38,7 +38,7 @@ def transform(self):
         self.visit(self.root)
 
     def reorder_block_statements(self, stmts):
-        regisiter_hook_nodes = [
+        register_hook_nodes = [
             n
             for n in stmts
             for stmt in gast.walk(n)
@@ -46,7 +46,7 @@ def reorder_block_statements(self, stmts):
         ]
         # Analyze the register_hook nodes name dependency
         dependents = {}
-        for n in regisiter_hook_nodes:
+        for n in register_hook_nodes:
             if n not in stmts:
                 continue
             for load_node in get_loads(n):
diff --git a/python/paddle/jit/dy2static/utils.py b/python/paddle/jit/dy2static/utils.py
index 582dd370aa4b4f..ce1c26afcb333f 100644
--- a/python/paddle/jit/dy2static/utils.py
+++ b/python/paddle/jit/dy2static/utils.py
@@ -309,7 +309,7 @@ def func_prefix(func):
 
     global DEL_TEMP_DIR
     if delete_on_exit and DEL_TEMP_DIR:
-        # Clear temporary files in TEMP_DIR while exitting Python process
+        # Clear temporary files in TEMP_DIR while exiting Python process
         atexit.register(remove_if_exit, dir_path=temp_dir)
         DEL_TEMP_DIR = False
 
@@ -576,16 +576,16 @@ def name_judge():
 @signature_safe_contextmanager
 def backend_guard(backend):
     core.check_and_set_prim_all_enabled()
-    orign_fwd = core._is_fwd_prim_enabled()
-    orign_bwd = core._is_bwd_prim_enabled()
+    origin_fwd = core._is_fwd_prim_enabled()
+    origin_bwd = core._is_bwd_prim_enabled()
 
     if backend == 'CINN':
         core._set_prim_all_enabled(True)
     try:
         yield
     finally:
-        core._set_prim_forward_enabled(orign_fwd)
-        core._set_prim_backward_enabled(orign_bwd)
+        core._set_prim_forward_enabled(origin_fwd)
+        core._set_prim_backward_enabled(origin_bwd)
 
 
 def construct_grad_names(grad_info_map, x_vars, param_vars, out_vars):
diff --git a/python/paddle/jit/sot/symbolic/export.py b/python/paddle/jit/sot/symbolic/export.py
index 720ef70730d20e..39b06eca1891c5 100644
--- a/python/paddle/jit/sot/symbolic/export.py
+++ b/python/paddle/jit/sot/symbolic/export.py
@@ -31,8 +31,8 @@ def __init__(self, *lines):
 
     def get_lines(self, prefix=""):
         lines = [prefix + line for line in self.lines]
-        for statment in self.sub_statement:
-            lines.extend(statment.get_lines(self.tab + prefix))
+        for statement in self.sub_statement:
+            lines.extend(statement.get_lines(self.tab + prefix))
         return lines
 
     def add_sub(self, *lines):
@@ -302,7 +302,7 @@ def create_tail(self):
         )
 
     def init_sub_layer(self, layer, layer_name):
-        # TODO @wuzhanfei need more effecient way to create a sub layer
+        # TODO @wuzhanfei need more efficient way to create a sub layer
         # now, we just close call_Layer behavior
         raise ExportError("Not support create sub layer now.")
 
@@ -385,4 +385,6 @@ def export(SIR, path):
 
     with open(os.path.join(path, f"{SIR.name}.py"), "w") as f:
         f.write(string)
-        print(f"[SOT] Export {SIR.name} Sucess with size {len(SIR.statements)}")
+        print(
+            f"[SOT] Export {SIR.name} Success with size {len(SIR.statements)}"
+        )
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index f057a261e9da79..a931912ae95727 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -1132,7 +1132,7 @@ def multiply_(x, y, name=None):
     return _C_ops.multiply_(x, y)
 
 
-def _elementwise_op_with_axis(x, y, axis=-1, name=None, op_type="Undifined"):
+def _elementwise_op_with_axis(x, y, axis=-1, name=None, op_type="Undefined"):
     assert (
         in_dynamic_or_pir_mode()
     ), "You can only call `_elementwise_op_with_axis` function within in_dynamic_or_pir_mode"
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index 0ea8bb96566abe..40c4e68d2e2a57 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -589,7 +589,7 @@ def win_custom_spawn(cmd):
             finally:
                 self.compiler.spawn = original_spawn
 
-        def object_filenames_with_cuda(origina_func, build_directory):
+        def object_filenames_with_cuda(original_func, build_directory):
             """
             Decorated the function to add customized naming mechanism.
             Originally, both .cc/.cu will have .o object output that will
@@ -598,7 +598,7 @@ def object_filenames_with_cuda(origina_func, build_directory):
 
             def wrapper(source_filenames, strip_dir=0, output_dir=''):
                 try:
-                    objects = origina_func(
+                    objects = original_func(
                         source_filenames, strip_dir, output_dir
                     )
                     for i, source in enumerate(source_filenames):
@@ -618,7 +618,7 @@ def wrapper(source_filenames, strip_dir=0, output_dir=''):
                     # ensure to use abspath
                     objects = [os.path.abspath(obj) for obj in objects]
                 finally:
-                    self.compiler.object_filenames = origina_func
+                    self.compiler.object_filenames = original_func
 
                 return objects