Fix, test=document_fix

PaddlePaddle · Feb 23, 2024 · 1e401bb · 1e401bb
1 parent 2e3dc91
commit 1e401bb
Show file tree

Hide file tree

Showing 86 changed files with 142 additions and 142 deletions.
diff --git a/paddle/cinn/backends/codegen_cuda_dev.cc b/paddle/cinn/backends/codegen_cuda_dev.cc
@@ -436,7 +436,7 @@ void CodeGenCUDA_Dev::Visit(const ir::Let *op) {
     str_ += " ";
     IrPrinter::Visit(op->symbol);
     vectorized_tensor_names_.insert(utils::GetStreamCnt(op->symbol));
-    // skip "=0" in "half8 temp = 0;" sincethe operator= of half8 may not
+    // skip "=0" in "half8 temp = 0;" since the operator= of half8 may not
     // overloaded.
     if (op->body.As<ir::IntImm>() && op->body.As<ir::IntImm>()->value == 0) {
       return;

diff --git a/paddle/cinn/frontend/interpreter.cc b/paddle/cinn/frontend/interpreter.cc
@@ -108,7 +108,7 @@ void Interpreter::Impl::Build(const Target& target,
                               const std::string& model_name) {
   CHECK(!var_map_.empty());
   VLOG(3) << "Program:\n" << *program_;
-  // applay frontend pass
+  // apply frontend pass
   std::unordered_set<std::string> fetch_var_ids;
   for (auto& name : fetch_names_) {
     CHECK(var_map_.count(name)) << "var_map finds no fetch var " << name;

diff --git a/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/cinn_group_cluster_pass.cc
@@ -496,7 +496,7 @@ bool CanOpMergeNode(
   }
 
   // TODO(phlrain): need update here
-  // diffrent loop range can merge, like [128, 128, 1], with [128, 128]
+  // different loop range can merge, like [128, 128, 1], with [128, 128]
   if ((cinn::hlir::framework::pir::CompatibleInfo::OpKind(*cur_op) !=
        cinn::hlir::framework::kBroadcast) &&
       (op_path_info.at(cur_op).loop_ranges !=
@@ -517,7 +517,7 @@ bool ShouldOutputPreNode(
   }
 
   // TODO(phlrain): need update here
-  // diffrent loop range can merge, like [128, 128, 1], with [128, 128]
+  // different loop range can merge, like [128, 128, 1], with [128, 128]
   if ((cinn::hlir::framework::pir::CompatibleInfo::OpKind(*cur_op) !=
        cinn::hlir::framework::kBroadcast) &&
       (op_path_info.at(cur_op).loop_ranges !=
@@ -592,7 +592,7 @@ std::vector<GroupClusterNode> GroupSplit(cinn::dialect::GroupOp group_op) {
   }
   // stage 2 merge
   // for now we merge node in same pass
-  // only for vertial fuse
+  // only for vertical fuse
   std::vector<GroupClusterNode> second_stage_output = first_stage_output;
   while (true) {
     bool fused = false;

diff --git a/paddle/cinn/hlir/framework/graph.h b/paddle/cinn/hlir/framework/graph.h
@@ -189,7 +189,7 @@ class Graph : public cinn::common::Graph {
                        SharedGroupHasher,
                        SharedGroupComparator>
         producer_groups_;
-    // output grous
+    // output groups
     std::unordered_set<std::shared_ptr<Group>,
                        SharedGroupHasher,
                        SharedGroupComparator>
@@ -271,7 +271,7 @@ class Graph : public cinn::common::Graph {
       const std::unordered_set<std::string>& fetch_var_ids = {});
 
   /**
-   * \brief Genereate the python test code for group test
+   * \brief Generate the python test code for group test
    */
   std::string GenerateGroupPythonCode(
       const std::vector<Node*>& group,

diff --git a/paddle/cinn/hlir/framework/instruction_test.cc b/paddle/cinn/hlir/framework/instruction_test.cc
@@ -104,7 +104,7 @@ TEST(Instruction, RunWithRawPodArgs) {
   const auto& shape = Shape({M, N});
 
   std::map<std::string, cinn_pod_value_t> name2podargs;
-  // case 1: create cinn_pod_value_t arguments dicrectly
+  // case 1: create cinn_pod_value_t arguments directly
   std::vector<cinn_buffer_t> args_buffer(
       3);  // store {"x", "y", "z"} buffer objects
   auto* default_memory_mng = MemoryManager::Global().RetrieveSafely(

diff --git a/paddle/cinn/hlir/framework/memory.h b/paddle/cinn/hlir/framework/memory.h
@@ -37,7 +37,7 @@ class MemoryInterface {
 };
 
 /**
- * MemoryManager holds a map of MemoryInterface for each articture.
+ * MemoryManager holds a map of MemoryInterface for each architecture.
  */
 class MemoryManager final {
  public:

diff --git a/paddle/cinn/hlir/framework/node.h b/paddle/cinn/hlir/framework/node.h
@@ -77,7 +77,7 @@ class Node : public cinn::common::GraphNode {
   std::tuple<cinn::common::GraphEdge *, cinn::common::GraphEdge *> LinkTo(
       NodeData *other);
 
-  // This node determines another node, which means the other node depeneds on
+  // This node determines another node, which means the other node depends on
   // this node.
   void Controls(NodeData *other);
 
@@ -161,7 +161,7 @@ class NodeData : public cinn::common::GraphNode {
   std::tuple<cinn::common::GraphEdge *, cinn::common::GraphEdge *> LinkTo(
       Node *other);
 
-  // This node determines another node, which means the other node depeneds on
+  // This node determines another node, which means the other node depends on
   // this node.
   void Controls(Node *other);
 

diff --git a/paddle/cinn/hlir/framework/op_lowering_impl.cc b/paddle/cinn/hlir/framework/op_lowering_impl.cc
@@ -547,7 +547,7 @@ ir::Expr OpLowererImpl::DoGroupSchedule(
               << ir_sch.GetModule().GetExprs().at(0);
       continue;
     }
-    // find master to computeat.
+    // find master to compute at.
     auto master = GetMasterToComputeAt(node,
                                        nodes_in_order,
                                        nodes_inline,

diff --git a/paddle/cinn/hlir/framework/op_lowering_impl.h b/paddle/cinn/hlir/framework/op_lowering_impl.h
@@ -29,7 +29,7 @@
 
 // Fusion Op lowering, there are four kinds of lowering function:
 // Elementwise/Broadcast/Injective,Reduce,OutEWiseFusable,NonFusible.
-// Elementwise/Broadcast/Injective Ops is with same shcedule.
+// Elementwise/Broadcast/Injective Ops is with same schedule.
 // Reduce,OutEWiseFusable,NonFusible are using different schedule.
 
 namespace cinn {
@@ -183,7 +183,7 @@ class OpLowererImpl : public OpLowererImplBase<GroupPtr> {
   const absl::flat_hash_map<std::string, Type>& type_dict_;
   const absl::flat_hash_map<std::string, shape_t>& shape_dict_;
 
-  // fucntion name prefix
+  // function name prefix
   const std::string func_name_prefix = "fn_";
 };
 

diff --git a/paddle/cinn/hlir/framework/op_lowering_impl_base.h b/paddle/cinn/hlir/framework/op_lowering_impl_base.h
@@ -20,7 +20,7 @@
 
 // Fusion Op lowering, there are four kinds of lowering function:
 // Elementwise/Broadcast/Injective,Reduce,OutEWiseFusable,NonFusible.
-// Elementwise/Broadcast/Injective Ops is with same shcedule.
+// Elementwise/Broadcast/Injective Ops is with same schedule.
 // Reduce,OutEWiseFusable,NonFusible are using different schedule.
 
 namespace cinn {

diff --git a/paddle/cinn/hlir/framework/op_lowering_test.cc b/paddle/cinn/hlir/framework/op_lowering_test.cc
@@ -208,7 +208,7 @@ TEST(OP_LOWERING, Reduce_Fuse_Broadcast_Softmax) {
   {
     // softmax
     auto A = net_builder.CreateInput(Float(32), {h, w}, "A");
-    // redece max
+    // reduce max
     auto B = net_builder.ReduceMax(A, {1});
     // broadcast
     auto C = net_builder.BroadcastTo(B, {h, w}, {0});

diff --git a/paddle/cinn/hlir/framework/op_lowering_util.cc b/paddle/cinn/hlir/framework/op_lowering_util.cc
@@ -622,7 +622,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch,  // NOLINT
           // the loop size at axis is 1, need remove
           axes_shift_num[j] = -1;
         } else if (axes[j] > idx) {
-          // the axies value need left shift
+          // the axes value need left shift
           axes_shift_num[j]++;
         }
       }
@@ -902,7 +902,7 @@ Node* GetMasterToComputeAt(
         done_schedule.insert(tmp);
       }
     }
-    // remove all consuemr reducer node of node from done_schedule.
+    // remove all consumer reducer node of node from done_schedule.
     std::unordered_set<Node*> visited;
     std::queue<Node*> candidates;
     candidates.push(node);

diff --git a/paddle/cinn/hlir/framework/pir_compiler.h b/paddle/cinn/hlir/framework/pir_compiler.h
@@ -28,7 +28,7 @@ namespace hlir {
 namespace framework {
 
 // TODO(Aurelius84): Need abstract this logic to implement Proxy for
-// the co-existance with GraphCompiler.
+// the co-existence with GraphCompiler.
 class PirCompiler final {
  public:
   PirCompiler(const ::pir::Program& prog,

diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass/default_horizontal_fuse_pass.cc b/paddle/cinn/hlir/pass/general_fusion_merge_pass/default_horizontal_fuse_pass.cc
@@ -71,7 +71,7 @@ class DefaultHorizontalFusePass final : public HorizontalFusePass {
         break;
       }
 
-      // if can't fuse to othors Groups, new Groups.
+      // if can't fuse to other Groups, new Groups.
       if (!fusionable) {
         fusionable_consumers.push_back({candidate});
       }

diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass/default_input_fuse_pass.cc b/paddle/cinn/hlir/pass/general_fusion_merge_pass/default_input_fuse_pass.cc
@@ -72,7 +72,7 @@ class DefaultInputFusePass final : public InputFusePass {
         break;
       }
 
-      // if can't fuse to othors Groups, new Groups.
+      // if can't fuse to other Groups, new Groups.
       if (!fusionable) {
         fusionable_consumers.push_back({candidate});
       }

diff --git a/paddle/cinn/hlir/pass/op_fusion_pass.cc b/paddle/cinn/hlir/pass/op_fusion_pass.cc
@@ -361,7 +361,7 @@ class OpFusionPassHelper : public FusionHelperBase {
   struct FusionRelation {
     // producer -> consumer
     std::unordered_set<framework::OpPatternKind> op_kind = {};
-    // producer -> fusion sonsumer
+    // producer -> fusion consumer
     std::unordered_map<framework::OpPatternKind, ConditionFunction>
         fusion_op_kind = {};
   };

diff --git a/paddle/cinn/hlir/pe/load_x86_params.cc b/paddle/cinn/hlir/pe/load_x86_params.cc
@@ -156,7 +156,7 @@ void LoadX86DefaultParams(
   InputX86Param(model_data,
                 "X86ScheduleConv input 1 256 56 56 weight 512 256 1 1 stride 2 "
                 "2 padding 0 0 dilation 1 1",
-                // Todo: tempory fix, enhance alterlayout and test performance
+                // Todo: temporary fix, enhance alterlayout and test performance
                 {{"ic_bn", {1, 256}},
                  {"oc_bn", {16, 32}},
                  {"ow_bn", {7, 4}},

diff --git a/paddle/cinn/hlir/pe/reduction.cc b/paddle/cinn/hlir/pe/reduction.cc
@@ -287,7 +287,7 @@ std::vector<Tensor> WarpReduce(const ir::Tensor& A,
     reduce_width = reduce_width * A->shape[idx].as_int32();
   }
 
-  // comput tmp output shape.
+  // compute tmp output shape.
   std::vector<Expr> tmp_shape(A->shape.begin(),
                               A->shape.begin() + shape_size_without_reduce_dim);
   tmp_shape.push_back(Expr(32));
@@ -390,7 +390,7 @@ std::vector<ir::Tensor> BlockReduceInternal(const ir::Tensor& A,
   auto tmp_out = Compute(
       tmp_shape,
       [=](const std::vector<Expr>& indexs) -> Expr {
-        // comput index map from output to input.
+        // compute index map from output to input.
         auto last_index = indexs.back();
         std::vector<Expr> input_indexs(indexs.begin(),
                                        indexs.begin() + indexs.size() - 1);

diff --git a/paddle/cinn/hlir/pe/schedule.cc b/paddle/cinn/hlir/pe/schedule.cc
@@ -220,7 +220,7 @@ void MatmulScheduleCPU(poly::StageMap stages,
   int packed_last_dim = packedB->shape[packedB_dims - 1].as_int32();
   int packedB_split_factor =
       GetBetterSplitFactor(packed_last_dim, basic_split_factor);
-  // tempory solution for indivisible case
+  // temporary solution for indivisible case
   if (packedB_split_factor >= 8 &&
       packed_last_dim % packedB_split_factor == 0) {
     stages[packedB]->Vectorize(packedB_dims - 1, packedB_split_factor);
@@ -243,7 +243,7 @@ void MatmulScheduleCPU(poly::StageMap stages,
   std::vector<poly::Iterator> all_axes_inner;
   bool is_m_splited = false;
   bool is_n_splited = false;
-  // tempory solution for isl for1 wrong elimination
+  // temporary solution for isl for1 wrong elimination
   if (bm >= 4 && M != bm) {
     auto axes = stages[output]->Split(i_axis, bm);
     all_axes_outer.push_back(std::get<0>(axes));
@@ -305,7 +305,7 @@ void MatmulScheduleCPU(poly::StageMap stages,
     std::swap(all_axes[out_axis_dims - 1], all_axes[out_axis_dims - 2]);
   }
   stages[output]->Reorder(all_axes);
-  // vectorize output's last dimemsion
+  // vectorize output's last dimension
   auto out_domain = stages[output]->transformed_domain();
   auto range =
       poly::isl_set_get_axis_range(out_domain.get(), out_axis_dims - 1);
@@ -315,7 +315,7 @@ void MatmulScheduleCPU(poly::StageMap stages,
   int out_last_dim = max.get_num_si() + 1;
   int output_split_factor =
       GetBetterSplitFactor(out_last_dim, basic_split_factor);
-  // tempory solution for indivisible case
+  // temporary solution for indivisible case
   if (output_split_factor >= 8 && packed_last_dim % output_split_factor == 0) {
     stages[output]->Vectorize(out_axis_dims - 1, output_split_factor);
   }
@@ -945,7 +945,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
   // oh_inner, ow, oc_inner, ic, kh, kw]
   stages[CC]->ComputeAt2(stages[packed_out], 0);
   VLOG(3) << "cache write shape: " << utils::Join(CC->shape, ", ");
-  // tempory solution because reorder may be wrong before ComputeAt
+  // temporary solution because reorder may be wrong before ComputeAt
   // reorder: [batch_oc_outer_oh_outer_fused, oh_inner, ow_outer, ow_inner,
   // oc_inner] -> [batch_oc_outer_oh_outer_fused, ow_outer, oh_inner, ow_inner,
   // oc_inner]
@@ -1082,7 +1082,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
           << stages[packed_out]->transformed_domain();
   VLOG(3) << "stages[CC]->transformed_domain()"
           << stages[CC]->transformed_domain();
-  // tempory solution because reordering before computeAt may be wrong
+  // temporary solution because reordering before computeAt may be wrong
   // reorder: [batch, oc_outer, oh_outer, oh_inner, ow_outer, ow_inner,
   // oc_inner] -> [batch, oc_outer, oh_outer, ow_outer, oh_inner, ow_inner,
   // oc_inner]
@@ -2700,7 +2700,7 @@ void CudaScheduleInjectiveWithVectorize(poly::Stage *stage,
   // the first bind position from tail
   int bind_idx = stage->n_out_dims() - 1;
   // it will add a new dim by split before vectorize, but the new dim will
-  // be eleminated when vectorizng, so the bind_idx does't need to increase
+  // be eliminated when vectorizing, so the bind_idx does't need to increase
   if (vector_width > 1) {
     stage->Split(bind_idx, vector_width);
   }

diff --git a/paddle/cinn/hlir/pe/schedule.h b/paddle/cinn/hlir/pe/schedule.h
@@ -182,7 +182,7 @@ void CudaScheduleMul(poly::StageMap stages,
                      const std::vector<int> &output_shape,
                      const cinn::common::Target &target);
 
-// reduce shedules.
+// reduce schedules.
 void CudaReduceSchedule(poly::StageMap stages,
                         ir::Tensor output,
                         int last_dimension_num,

diff --git a/paddle/cinn/hlir/pe/transform.h b/paddle/cinn/hlir/pe/transform.h
@@ -154,7 +154,7 @@ ir::Tensor Reverse(const ir::Tensor& input,
 /**
  * @brief Perform meta op Transpose
  * @param input The input tensor
- * @param axis tranpsoe axis
+ * @param axis transpose axis
  * @param output_name the name of the output tensor
  */
 ir::Tensor Transpose(
@@ -197,8 +197,8 @@ ir::Tensor SliceSymbolic(const ir::Tensor& A,
  * @param input The input tensor
  * @param assign The assign tensor
  * @param axis select axis
- * @param starts select reigon starts
- * @param strides select reigon strides
+ * @param starts select region starts
+ * @param strides select region strides
  * @param output_name the name of the output tensor
  */
 ir::Tensor SliceAssign(

diff --git a/paddle/cinn/ir/buffer.h b/paddle/cinn/ir/buffer.h
@@ -83,7 +83,7 @@ class _Buffer_ : public ExprNode<_Buffer_> {
   int offset_factor{0};
   //! The place the buffer locates.
   Target target{UnkTarget()};
-  //! Aignment requirement of data pointer in bytes.
+  //! Alignment requirement of data pointer in bytes.
   mutable int data_alignment{0};
   //! The memory type of the buffer.
   MemoryType memory_type{MemoryType::Heap};

diff --git a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc
@@ -373,7 +373,7 @@ void StaticShapeGroupScheduler::DoLoopAlignment() {
       source_loops = {source_loop};
     }
 
-    // 3. Rerorder loops to match the target loops
+    // 3. Reorder loops to match the target loops
     if (total_source_extent == total_master_loop_extents) {
       ir_sch_->Reorder(node->id(), recover_loop_order);
     }

diff --git a/paddle/cinn/ir/group_schedule/tactic/compute_inline_tactic.cc b/paddle/cinn/ir/group_schedule/tactic/compute_inline_tactic.cc
@@ -32,7 +32,7 @@ void ComputeInlineTactic::Init(ScheduleContext* context) {
 
 void ComputeInlineTactic::Apply(ir::IRSchedule* sch,
                                 const std::string& block_id) {
-  // TODO(LiuYang): Compute of ops will be rewrited so that we
+  // TODO(LiuYang): Compute of ops will be rewritten so that we
   // don't use it in dynamic group_schedule rules temporarily.
   // if (IsProhibitScheduleExternCallBlock(node->Block())) {
   //    return;

diff --git a/paddle/cinn/ir/schedule/impl/loop_transformation.cc b/paddle/cinn/ir/schedule/impl/loop_transformation.cc
@@ -166,7 +166,7 @@ std::vector<Expr> DyScheduleImpl::Split(const Expr& loop,
   CINN_IR_SCHEDULE_END(this->err_msg_level_);
 }
 
-// TODO(@LiuYang): now -1 can't exsit in factors,
+// TODO(@LiuYang): now -1 can't exist in factors.
 std::vector<Expr> DyScheduleImpl::Split(const Expr& loop,
                                         const std::vector<Expr>& factors) {
   CINN_IR_SCHEDULE_BEGIN();

diff --git a/paddle/cinn/ir/schedule/ir_schedule_util.h b/paddle/cinn/ir/schedule/ir_schedule_util.h
@@ -128,7 +128,7 @@ void ReplaceExpr(Expr* source,
  * and change -1 to positive integer.
  * @param factors The original factors.
  * @param total_extent The extent of the loop to be splitted.
- * @return return The valiated factors.
+ * @return return The validated factors.
  */
 std::vector<int> ValidateFactors(const std::vector<int>& factors,
                                  int total_extent,
@@ -312,7 +312,7 @@ IterRange RangeUnion(const IterRange& range1, const IterRange& range2);
  * block
  * \param is_store_provided Whether Store nodes of the block provide the
  * tensor, true means it is in compute_at case, otherwise false means in
- * reverse_compuate_at case
+ * reverse_compute_at case
  * \return Each index's range and can_keep_loop flag of block's tensor.
  * Indicating the buffer region being required.
  */

diff --git a/paddle/cinn/ir/schedule/schedule_desc.h b/paddle/cinn/ir/schedule/schedule_desc.h
@@ -31,10 +31,10 @@ namespace ir {
 // records all transform/getting operations executed by a corresponding
 // ir::IRSchedule. A ScheduleDesc can be serialized to JSON format and saved to
 // file. For deserializing, it can be re-applied to a new IRSchedule that is
-// initialzied by a semantics-equal original ir::ModuleExpr, and then achieves
+// initialized by a semantics-equal original ir::ModuleExpr, and then achieves
 // the same result.
 
-class IRSchedule;  // forward declartion to avoid cross-reference
+class IRSchedule;  // forward declaration to avoid cross-reference
 class ScheduleDesc {
  public:
   // each operation executed through IRSchedule is recorded as a step
@@ -77,7 +77,7 @@ class ScheduleDesc {
   void Pop();
 
   /**
-   * \brief Replay this description to a new IRSchedule that is initialzied by a
+   * \brief Replay this description to a new IRSchedule that is initialized by a
    * semantics-equal original ModuleExpr.
    * @param schedule The original IRSchedule to be replayed the description on.
    * @param without_post_schedule Determine whether to delete the post
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,7 +37,7 @@ class MemoryInterface { @@
     };
     /**
-     * MemoryManager holds a map of MemoryInterface for each articture.
+     * MemoryManager holds a map of MemoryInterface for each architecture.
      */
     class MemoryManager final {
      public:
@@ Expand Down @@