Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into winters009
Browse files Browse the repository at this point in the history
  • Loading branch information
WintersMontagne10335 committed Oct 28, 2023
2 parents ea1e6fc + dbba655 commit 9f95328
Show file tree
Hide file tree
Showing 616 changed files with 17,623 additions and 7,812 deletions.
3 changes: 1 addition & 2 deletions cmake/external/jemalloc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ set(JEMALLOC_DOWNLOAD_DIR
set(JEMALLOC_PROJECT "extern_jemalloc")
set(JEMALLOC_BUILD ${THIRD_PARTY_PATH}/jemalloc/src/extern_jemalloc)
set(JEMALLOC_PREFIX_DIR ${THIRD_PARTY_PATH}/jemalloc)
set(JEMALLOC_URL
${GIT_URL}/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2)
set(JEMALLOC_URL https://paddle-ci.gz.bcebos.com/jemalloc-5.1.0.tar.bz2)
set(JEMALLOC_INSTALL ${THIRD_PARTY_PATH}/install/jemalloc)
set(JEMALLOC_INCLUDE_DIR ${JEMALLOC_INSTALL}/include)

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")

if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20230926")
set(XPU_BASE_DATE "20231023")
endif()
set(XPU_XCCL_BASE_VERSION "1.0.53.6")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
Expand Down
16 changes: 10 additions & 6 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -499,12 +499,15 @@ function(cc_test_run TARGET_NAME)
NAME ${TARGET_NAME}
COMMAND ${cc_test_COMMAND} ${cc_test_ARGS}
WORKING_DIRECTORY ${cc_test_DIR})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
FLAGS_cudnn_deterministic=true)
set_property(
TEST ${TARGET_NAME}
PROPERTY
ENVIRONMENT
FLAGS_cpu_deterministic=true
FLAGS_init_allocated_mem=true
FLAGS_cudnn_deterministic=true
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_BINARY_DIR}/python/paddle/libs:${PADDLE_BINARY_DIR}/python/paddle/base
)
# No unit test should exceed 2 minutes.
if(WIN32)
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150)
Expand Down Expand Up @@ -726,6 +729,7 @@ function(nv_test TARGET_NAME)
# 2. cuda_add_executable does not support ccache.
# Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html
add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_compile_definitions(${TARGET_NAME} PUBLIC STATIC_PADDLE)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}
${os_dependency_modules} paddle_gtest_main phi)
Expand Down
5 changes: 5 additions & 0 deletions cmake/hip.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ list(APPEND HIP_CXX_FLAGS -Wno-unused-value)
list(APPEND HIP_CXX_FLAGS -Wno-braced-scalar-init)
list(APPEND HIP_CXX_FLAGS -Wno-return-type)
list(APPEND HIP_CXX_FLAGS -Wno-pragma-once-outside-header)
list(APPEND HIP_CXX_FLAGS -Wno-deprecated-builtins)
list(APPEND HIP_CXX_FLAGS -Wno-switch)
list(APPEND HIP_CXX_FLAGS -Wno-literal-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-constant-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-defaulted-function-deleted)

if(WITH_CINN)
list(APPEND HIP_CXX_FLAGS -std=c++14)
Expand Down
3 changes: 3 additions & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,9 @@ function(prune_pybind_h)
list(APPEND op_list "load_combine")
list(APPEND op_list "tensorrt_engine")

# TODO(ming1753): conditional_block_infer is temporarily reserved here to avoid link errors in functions of standalone_executor
list(APPEND op_list "conditional_block_infer")

# add fused_op in op_list
list(APPEND op_list "fc")
list(APPEND op_list "conv2d_fusion")
Expand Down
121 changes: 121 additions & 0 deletions paddle/cinn/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
```
___ ___ ___
/\__\ /\ \ /\ \
/:/ / ___ \:\ \ \:\ \
/:/ / /\__\ \:\ \ \:\ \
/:/ / ___ /:/__/ _____\:\ \ _____\:\ \
/:/__/ /\__\/::\ \ /::::::::\__\/::::::::\__\
\:\ \ /:/ /\/\:\ \__\:\~~\~~\/__/\:\~~\~~\/__/
\:\ /:/ / \:\/\__\\:\ \ \:\ \
\:\/:/ / \::/ / \:\ \ \:\ \
\::/ / /:/ / \:\__\ \:\__\
\/__/ \/__/ \/__/ \/__/
```


# CINN : Compiler Infrastructure for Neural Networks

The project CINN is a machine learning compiler and executor for multiple hardware backends.
It is designed to provide multiple layers of APIs to make tensor computation easier to define, faster to execute, and more convenient to extend with hardware backends.
Currently, it targets x86 CPUs and Nvidia GPUs.

This project is under active development.

## How it works

The CINN lowers a traditional DNN model into a two-level intermediate representation(IR), the high-level IR(HLIR) and CINN IR.

The HLIR helps to define some domain-specific computation and perform some overall optimization on the IR-graph;
the CINN IR helps to represent some computation semantic and finally lower to a hardware backend.

Both levels of IR have the similar SSA graph, analysis and optimization facilities.
The schedule transform is applied on the CINN IR to do optimizations.

For more details, you can refer to:
https://github.com/PaddlePaddle/docs/tree/develop/docs/guides/cinn

## Getting Started

### Compile

Clone PaddlePaddle first.

```
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
mkdir build
cd build
```

Build paddle with cinn:

```
cmake .. -DCINN_ONLY=OFF -DWITH_CINN=ON -DWITH_GPU=ON
```

Build cinn only:

```
cmake .. -DCINN_ONLY=ON -DWITH_CINN=ON -DWITH_GPU=ON
```

And then

```
make -j
```

### Install

Install paddle with cinn:

```
pip install python/dist/paddlepaddle_gpu-xxx.whl
```

Install cinn only:

```
pip install python/dist/cinn_gpu-xxx.whl
```

Then you can import paddle in the python environment and check if a paddle version with CINN is installed.

```
import paddle
paddle.is_compiled_with_cinn()
```

### Concepts

There are two levels of APIs in CINN, the higher level is HLIR and the lower level is CINN IR, both contain some concepts.

In HLIR

- `frontend::Program`, the program helps to define a machine learning computation,
- `hlir::framework::Tensor`, multi-dimensional arrays helps to manage a memory buffer.
- `hlir::framework::Program`, the final executable program in runtime. It holds many basic executable elements.
- `hlir::framework::Graph`, the graph that represents the structure of a model. Each node in the graph represents an operator (conv2d, relu, mul, etc.).
- `hlir::framework::GraphCompiler`, the compiler that transforms the graph representation(hlir::framework::Graph) of a model into an executable program(hlir::framework::Program).

In CINN IR

- `Compute`, the method to define a computation,
- `Lower`, the method to lower a computation to the corresponding IR,
- `LoweredFunc`, the function defined in CINN IR,
- `Var`, a scalar variable,
- `Expr`, an expression represents any CINN IR node(no specified Statement node),

## License

CINN is licensed under the [Apache 2.0 license](LICENSE).

## Acknowledgement

CINN learned a lot from the following projects:

- [Halide](https://github.com/halide/Halide): Referenced the design of most IR nodes,
- [TVM](https://github.com/apache/tvm): We learned many ideas including the semantics of some schedule primitives, TOPI, NNVM, and so on,
- [tiramisu](https://github.com/Tiramisu-Compiler): The isl usage, polyhedral compilation, schedule primitive implementation, and so on,
- [tensorflow/xla](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/xla): Referenced the semantics of the primitive operations.
2 changes: 1 addition & 1 deletion paddle/cinn/hlir/dialect/operator/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# TODO(Aurelius84): new_ir_compiler depends on pd_op_dialect and could
# TODO(Aurelius84): pir_compiler depends on pd_op_dialect and could
# not found under CINN_ONLY mode
if(NOT CINN_ONLY)
set(CINN_DIALECT_BINARY_DIR
Expand Down
6 changes: 3 additions & 3 deletions paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/cinn/hlir/framework/new_ir/utils.h"
#include "paddle/cinn/hlir/framework/op.h"
#include "paddle/cinn/hlir/framework/pir/utils.h"
#include "paddle/pir/core/attribute_base.h"
#include "paddle/pir/core/operation.h"

Expand Down Expand Up @@ -51,7 +51,7 @@ struct GroupInfo {
private:
void Initialize() {
op_pattern_kind = hlir::framework::OpPatternKind::kElementWise;
fn_name = hlir::framework::newir::CompatibleInfo::GroupOpsName(ops);
fn_name = hlir::framework::pir::CompatibleInfo::GroupOpsName(ops);
}
};

Expand All @@ -78,7 +78,7 @@ struct GroupInfoAttributeStorage : public pir::AttributeStorage {
};

struct JITInfoAttributeStorage : public pir::AttributeStorage {
using ParamKey = cinn::hlir::framework::newir::CUDAJITInfo;
using ParamKey = cinn::hlir::framework::pir::CUDAJITInfo;
explicit JITInfoAttributeStorage(const ParamKey& key) : data_(key) {}

static JITInfoAttributeStorage* Construct(const ParamKey& key) {
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/hlir/dialect/operator/ir/op_attribute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const GroupInfo &GroupInfoAttribute::data() const {
return storage()->GetAsKey();
}

const cinn::hlir::framework::newir::CUDAJITInfo &CUDAJITInfoAttribute::data()
const cinn::hlir::framework::pir::CUDAJITInfo &CUDAJITInfoAttribute::data()
const {
return storage()->GetAsKey();
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/hlir/dialect/operator/ir/op_attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class CUDAJITInfoAttribute : public pir::Attribute {
return storage() < right.storage();
}

const cinn::hlir::framework::newir::CUDAJITInfo& data() const;
const cinn::hlir::framework::pir::CUDAJITInfo& data() const;
};

} // namespace dialect
Expand Down
12 changes: 12 additions & 0 deletions paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,19 @@ if(NOT CINN_ONLY)
SRCS
group_with_group_merge_pass.cc
op_with_group_merge_pass.cc
cinn_group_lowering_pass.cc
tensor_node.cc
DEPS
pd_op_dialect
pir_compiler
cinn_runtime_dialect)

cinn_cc_library(
pd_to_cinn_pass
SRCS
pd_to_cinn_pass.cc
DEPS
drr
cinn_op_dialect
pd_op_dialect)
endif()
Loading

0 comments on commit 9f95328

Please sign in to comment.