Skip to content

Commit

Permalink
Remove ActorMsg::user_data
Browse files Browse the repository at this point in the history
  • Loading branch information
liujuncheng committed Jan 16, 2023
2 parents f939cae + 1e1bddf commit 0559f15
Show file tree
Hide file tree
Showing 95 changed files with 2,914 additions and 805 deletions.
2 changes: 2 additions & 0 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ if(BUILD_CUDA)
if(OF_CUDA_LINK_DYNAMIC_LIBRARY)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cusolver)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cufft)
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1")
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt)
Expand All @@ -53,6 +54,7 @@ if(BUILD_CUDA)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas_static)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand_static)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cufft_static)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cusolver_static)
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1")
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt_static)
endif()
Expand Down
4 changes: 2 additions & 2 deletions cmake/oneflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ generate_functional_api_and_pybind11_cpp(FUNCTIONAL_GENERATED_SRCS FUNCTIONAL_GE
FUNCTIONAL_PYBIND11_SRCS ${PROJECT_SOURCE_DIR})
oneflow_add_library(of_functional_obj STATIC ${FUNCTIONAL_GENERATED_SRCS}
${FUNCTIONAL_GENERATED_HRCS})
target_link_libraries(of_functional_obj LLVMSupportWithHeader glog::glog)
target_link_libraries(of_functional_obj LLVMSupportWithHeader glog::glog fmt)
add_dependencies(of_functional_obj prepare_oneflow_third_party)

if(BUILD_PYTHON)
Expand All @@ -207,7 +207,7 @@ if(BUILD_PYTHON)
of_functional_tensor_obj STATIC ${FUNCTIONAL_TENSOR_GENERATED_SRCS}
${FUNCTIONAL_TENSOR_GENERATED_HRCS} ${FUNCTIONAL_OPS_GENERATED_SRCS}
${FUNCTIONAL_OPS_GENERATED_HRCS})
target_link_libraries(of_functional_tensor_obj LLVMSupportWithHeader glog::glog)
target_link_libraries(of_functional_tensor_obj LLVMSupportWithHeader glog::glog fmt)
add_dependencies(of_functional_tensor_obj prepare_oneflow_third_party)
target_include_directories(of_functional_tensor_obj PRIVATE ${Python_INCLUDE_DIRS}
${Python_NumPy_INCLUDE_DIRS})
Expand Down
2 changes: 1 addition & 1 deletion cmake/op_schema.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,5 @@ set_source_files_properties(${GENERATED_OP_SCHEMA_H} ${GENERATED_OP_SCHEMA_CPP}
TRUE)

oneflow_add_library(of_op_schema OBJECT ${GENERATED_OP_SCHEMA_H} ${GENERATED_OP_SCHEMA_CPP})
target_link_libraries(of_op_schema LLVMSupportWithHeader glog::glog)
target_link_libraries(of_op_schema LLVMSupportWithHeader glog::glog fmt)
add_dependencies(of_op_schema prepare_oneflow_third_party)
4 changes: 2 additions & 2 deletions cmake/third_party/trt_flash_attention.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ find_package(Threads)
set(TRT_FLASH_ATTENTION_PROJECT trt_flash_attention)

set(TRT_FLASH_ATTENTION_URL
https://github.com/Oneflow-Inc/trt_flash_attention/archive/dac29803b711535ed11fcd6ca4a0acfb0f2f8d12.zip
https://github.com/Oneflow-Inc/trt_flash_attention/archive/d8b74631eb811c95a0d20f247238db6e91acafe3.zip
)
use_mirror(VARIABLE TRT_FLASH_ATTENTION_URL URL ${TRT_FLASH_ATTENTION_URL})
set(TRT_FLASH_ATTENTION_MD5 85d2bcb87f21a58cd8c4dbfa4ae8f2a8)
set(TRT_FLASH_ATTENTION_MD5 9e0e822ce1450e11515533fbe32e58a9)

set(TRT_FLASH_ATTENTION_INSTALL_DIR ${THIRD_PARTY_DIR}/trt_flash_attention)
set(TRT_FLASH_ATTENTION_INCLUDE_DIR ${TRT_FLASH_ATTENTION_INSTALL_DIR}/include CACHE PATH "" FORCE)
Expand Down
1 change: 1 addition & 0 deletions docs/source/linalg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ Matrix Properties
diagonal
inv
cross
det
1 change: 1 addition & 0 deletions docs/source/oneflow.rst
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ Reduction Ops
min
mean
median
mode
prod
nansum
std
Expand Down
1 change: 1 addition & 0 deletions docs/source/tensor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ Tensor class reference
Tensor.min
Tensor.minimum
Tensor.mish
Tensor.mode
Tensor.mul
Tensor.mul_
Tensor.nansum
Expand Down
3 changes: 3 additions & 0 deletions external/fmt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ FetchContent_Declare(fmt URL ${FMT_URL} URL_HASH MD5=${FMT_MD5})

FetchContent_MakeAvailable(fmt)

# Clang doesn't support __float128 when compiling CUDA
target_compile_definitions(fmt PUBLIC FMT_USE_FLOAT128=0)

install(
TARGETS fmt
EXPORT oneflow
Expand Down
2 changes: 2 additions & 0 deletions oneflow/api/python/env/env.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limitations under the License.
#include "oneflow/core/vm/virtual_machine.h"
#include "oneflow/core/framework/shut_down_util.h"
#include "oneflow/core/device/cuda_util.h"
#include "oneflow/core/common/mem_util.h"

#ifdef WITH_CUDA
#include <cuda.h>
Expand Down Expand Up @@ -94,6 +95,7 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
m.def("SetCudaDeviceIndex", &SetCudaDeviceIndex);
m.def("CudaSynchronize", &CudaSynchronize);
m.def("GetCUDAMemoryUsed", &GetCUDAMemoryUsed);
m.def("GetCPUMemoryUsed", &GetCPUMemoryUsed);
m.def(
"_get_device_properties",
[](int device) -> cudaDeviceProp* { return GetDeviceProperties(device); },
Expand Down
25 changes: 25 additions & 0 deletions oneflow/api/python/framework/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,28 @@ static PyObject* PyTensorObject_is_pinned(PyObject* self, PyObject* unused) {
END_HANDLE_ERRORS
}

static PyObject* PyTensorObject_offload(PyObject* self, PyObject* unused) {
HANDLE_ERRORS
const auto& t = PyTensor_Unpack(self);
CHECK_JUST(t->offload());
Py_RETURN_NONE;
END_HANDLE_ERRORS
}

static PyObject* PyTensorObject_load(PyObject* self, PyObject* unused) {
HANDLE_ERRORS
const auto& t = PyTensor_Unpack(self);
CHECK_JUST(t->load());
Py_RETURN_NONE;
END_HANDLE_ERRORS
}

static PyObject* PyTensorObject_is_offloaded(PyObject* self, PyObject* unused) {
HANDLE_ERRORS
return functional::CastToPyObject(CHECK_JUST(PyTensor_Unpack(self)->is_offloaded()));
END_HANDLE_ERRORS
}

static PyObject* PyTensorObject_is_floating_point(PyObject* self, PyObject* unused) {
HANDLE_ERRORS
if (PyTensor_Unpack(self)->dtype()->is_floating_point()) {
Expand Down Expand Up @@ -509,6 +531,9 @@ static PyMethodDef PyTensorObject_methods[] = {
{"contiguous_", PyTensorObject_contiguous_, METH_NOARGS, NULL},
{"pin_memory", PyTensorObject_pin_memory, METH_NOARGS, NULL},
{"is_pinned", PyTensorObject_is_pinned, METH_NOARGS, NULL},
{"offload", PyTensorObject_offload, METH_NOARGS, NULL},
{"load", PyTensorObject_load, METH_NOARGS, NULL},
{"is_offloaded", PyTensorObject_is_offloaded, METH_NOARGS, NULL},
{"is_floating_point", PyTensorObject_is_floating_point, METH_NOARGS, NULL},
{"requires_grad_", (PyCFunction)PyTensorObject_requires_grad_, METH_VARARGS | METH_KEYWORDS,
NULL},
Expand Down
2 changes: 2 additions & 0 deletions oneflow/api/python/framework/tensor_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ DIRECT_PASS_FUNC(PyTensorObject_unsqueeze, functional::unsqueeze)
DIRECT_PASS_FUNC(PyTensorObject_max, functional::max)
DIRECT_PASS_FUNC(PyTensorObject_min, functional::min)
DIRECT_PASS_FUNC(PyTensorObject_median, functional::median)
DIRECT_PASS_FUNC(PyTensorObject_mode, functional::mode)
DIRECT_PASS_FUNC(PyTensorObject_pow, functional::pow)
DIRECT_PASS_FUNC(PyTensorObject_chunk, functional::chunk)
DIRECT_PASS_FUNC(PyTensorObject_split, functional::split)
Expand Down Expand Up @@ -1005,6 +1006,7 @@ PyMethodDef PyTensorObject_extra_methods[] = {
{"max", (PyCFunction)PyTensorObject_max, METH_VARARGS | METH_KEYWORDS, NULL},
{"min", (PyCFunction)PyTensorObject_min, METH_VARARGS | METH_KEYWORDS, NULL},
{"median", (PyCFunction)PyTensorObject_median, METH_VARARGS | METH_KEYWORDS, NULL},
{"mode", (PyCFunction)PyTensorObject_mode, METH_VARARGS | METH_KEYWORDS, NULL},
{"pow", (PyCFunction)PyTensorObject_pow, METH_VARARGS | METH_KEYWORDS, NULL},
{"chunk", (PyCFunction)PyTensorObject_chunk, METH_VARARGS | METH_KEYWORDS, NULL},
{"split", (PyCFunction)PyTensorObject_split, METH_VARARGS | METH_KEYWORDS, NULL},
Expand Down
63 changes: 63 additions & 0 deletions oneflow/core/autograd/gradient_funcs/det.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/common/container_util.h"
#include "oneflow/core/functional/functional_api.yaml.h"

namespace oneflow {
namespace one {

struct DetCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
size_t input_index = 0;
size_t output_index = 0;
};

class Det : public OpExprGradFunction<DetCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(DetCaptureState* ctx, const TensorTuple& inputs, const TensorTuple& outputs,
const AttrMap& attrs) const override {
ctx->requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
if (ctx->requires_grad) {
ctx->input_index = ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
ctx->output_index = ctx->SaveTensorForBackward(JUST(VectorAt(outputs, 0)));
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const DetCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (ctx->requires_grad) {
const auto& output = JUST(VectorAt(ctx->SavedTensors(), ctx->output_index));
const auto& input = JUST(VectorAt(ctx->SavedTensors(), ctx->input_index));
const auto& dy = JUST(VectorAt(out_grads, 0));
const auto& dy_unsqueeze = JUST(functional::UnsqueezeMultiple(dy, {-2, -1}, dy->ndim() + 2));
const auto& output_unsqueeze =
JUST(functional::UnsqueezeMultiple(output, {-2, -1}, output->ndim() + 2));
JUST(VectorAt(*in_grads, 0)) = JUST(functional::Transpose2dim(
JUST(functional::Mul(
dy_unsqueeze, JUST(functional::Mul(JUST(functional::Inv(input)), output_unsqueeze)))),
-2, -1));
}
return Maybe<void>::Ok();
}
};

REGISTER_OP_EXPR_GRAD_FUNCTION("det", Det);

} // namespace one
} // namespace oneflow
62 changes: 62 additions & 0 deletions oneflow/core/autograd/gradient_funcs/mode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/framework/attr_map.h"
#include "oneflow/core/framework/op_expr_grad_function.h"
#include "oneflow/core/framework/op_builder.h"
#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
#include "oneflow/core/functional/functional.h"
#include "oneflow/core/functional/sequence_function.h"
#include "oneflow/core/common/container_util.h"

namespace oneflow {
namespace one {

struct ModeCaptureState : public AutoGradCaptureState {
bool requires_grad = false;
};

class Mode : public OpExprGradFunction<ModeCaptureState> {
public:
Maybe<void> Init(const OpExpr& op) override { return Maybe<void>::Ok(); }
Maybe<void> Capture(ModeCaptureState* ctx, const TensorTuple& inputs, const TensorTuple& outputs,
const AttrMap& attrs) const override {
ctx->requires_grad = JUST(VectorAt(inputs, 0))->requires_grad();
if (ctx->requires_grad) {
ctx->SaveTensorForBackward(JUST(VectorAt(inputs, 0)));
ctx->SaveTensorForBackward(JUST(VectorAt(outputs, 1)));
}
return Maybe<void>::Ok();
}
Maybe<void> Apply(const ModeCaptureState* ctx, const TensorTuple& out_grads,
TensorTuple* in_grads) const override {
if (ctx->requires_grad) {
in_grads->resize(1);
const auto& input = JUST(VectorAt(ctx->SavedTensors(), 0));
const auto& indices = JUST(functional::Unsqueeze(JUST(VectorAt(ctx->SavedTensors(), 1)), -1));
const auto& dout = JUST(functional::Unsqueeze(JUST(VectorAt(out_grads, 0)), -1));
JUST(VectorAt(*in_grads, 0)) = JUST(functional::DimScatterUpdate(
JUST(functional::Constant(*(input->shape()), Scalar(0), *dout->dtype(),
JUST(dout->device()))),
-1, indices, dout, /*inplace*/ false));
}
return Maybe<void>::Ok();
}
};

REGISTER_OP_EXPR_GRAD_FUNCTION("mode", Mode);

} // namespace one
} // namespace oneflow
14 changes: 14 additions & 0 deletions oneflow/core/common/mem_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/common/mem_util.h"
#include "oneflow/core/vm/vm_util.h"
#include "oneflow/core/vm/virtual_machine.h"

#include <unistd.h>
#include <sys/sysinfo.h>
Expand All @@ -30,6 +32,11 @@ struct ProcStat {
long rss = 0;
};

Maybe<void> CPUSynchronize() {
if (Singleton<VirtualMachine>::Get() != nullptr) { return vm::CurrentRankSync(); }
return Maybe<void>::Ok();
}

} // namespace

// Reference: https://stackoverflow.com/questions/669438/how-to-get-memory-usage-at-runtime-using-c
Expand Down Expand Up @@ -59,4 +66,11 @@ void ProcessMemUsage(double* vm_usage, double* resident_set) {
#endif // __linux__
}

Maybe<double> GetCPUMemoryUsed() {
JUST(CPUSynchronize());
double vm_ = 0, rss_ = 0;
ProcessMemUsage(&vm_, &rss_);
return rss_;
}

} // namespace oneflow
2 changes: 2 additions & 0 deletions oneflow/core/common/mem_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ limitations under the License.
#include <string>

#include "oneflow/core/common/util.h"
#include "oneflow/core/common/maybe.h"

namespace oneflow {
void ProcessMemUsage(double* vm_usage, double* resident_set);
Maybe<double> GetCPUMemoryUsed();
} // namespace oneflow

#define LOG_MEM(...) \
Expand Down
1 change: 1 addition & 0 deletions oneflow/core/common/small_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
#ifndef ONEFLOW_CORE_COMMON_SMALL_VECTOR_H_
#define ONEFLOW_CORE_COMMON_SMALL_VECTOR_H_

#include <glog/logging.h>
#include "llvm/ADT/SmallVector.h"

namespace oneflow {
Expand Down
33 changes: 21 additions & 12 deletions oneflow/core/common/stride.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,24 @@ limitations under the License.

namespace oneflow {

Stride::Stride(const ShapeView& shape) {
const int64_t ndim = shape.NumAxes();
resize(ndim);
if (ndim > 0 && shape.elem_cnt() > 0) {
std::exclusive_scan(shape.rbegin(), shape.rend(), rbegin(), (int64_t)1, std::multiplies<>{});
} else if (ndim > 0 && shape.elem_cnt() == 0) {
// 0-size shape
small_vector<int64_t, kMaxNumDims> tmp_shape(ndim);
for (int64_t i = 0; i < ndim; ++i) { tmp_shape[i] = shape.At(i) > 0 ? shape.At(i) : 1; }
std::exclusive_scan(tmp_shape.rbegin(), tmp_shape.rend(), rbegin(), (int64_t)1,
std::multiplies<>{});
}
}

Stride::Stride(const Shape& shape) {
if (shape.is_initialized()) {
const int64_t ndim = shape.NumAxes();
resize(shape.NumAxes());
if (ndim > 0 && shape.elem_cnt() > 0) {
std::exclusive_scan(shape.dim_vec().rbegin(), shape.dim_vec().rend(), rbegin(), (int64_t)1,
std::multiplies<>{});
} else if (ndim > 0 && shape.elem_cnt() == 0) {
// 0-size shape
small_vector<int64_t, kMaxNumDims> tmp_shape(ndim);
for (int64_t i = 0; i < ndim; ++i) { tmp_shape[i] = shape.At(i) > 0 ? shape.At(i) : 1; }
std::exclusive_scan(tmp_shape.rbegin(), tmp_shape.rend(), rbegin(), (int64_t)1,
std::multiplies<>{});
}
ShapeView shape_view(shape);
new (this) Stride(shape_view);
}
}

Expand Down Expand Up @@ -65,4 +69,9 @@ void Stride::ToProto(Int64ListProto* ret) const {
*(ret->mutable_dim()) = PbRf<int64_t>(begin(), end());
}

std::ostream& operator<<(std::ostream& out, const Stride& stride) {
out << stride.ToString();
return out;
}

} // namespace oneflow
Loading

0 comments on commit 0559f15

Please sign in to comment.