Skip to content

Commit

Permalink
Fix stack getter segfault caused by is_shutting_down and gil (#9681)
Browse files Browse the repository at this point in the history
Fixes https://github.com/Oneflow-Inc/OneTeam/issues/1861 ,在
TryCloseDefaultSession 之前设置 is_shutting_down 为
true(#9502 这个 PR 在
TryCloseDefaultSession 里引入了 Sync 操作),并把 python c 对象的指针保存在 PyFrame
对象里,GetCurrentFrame 里不再需要重复获取,修复因为没有拿到 gil 锁导致的 segfault

Signed-off-by: daquexian <daquexian566@gmail.com>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
daquexian and mergify[bot] authored Jan 5, 2023
1 parent e721797 commit 19bb0da
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 16 deletions.
38 changes: 23 additions & 15 deletions oneflow/extension/stack/python/stack_getter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,35 @@ namespace py = pybind11;
namespace oneflow {

namespace {
std::string RawPyUnicodeToStdString(const PyObject* py_str) {
std::string PyUnicodeToStdString(const PyObject* py_str) {
return PyBytes_AsString(PyUnicode_AsEncodedString(const_cast<PyObject*>(py_str), "utf-8", "~E~"));
}
static constexpr auto* PyUnicodeToStdString =
DECORATE(&RawPyUnicodeToStdString, ThreadLocalCachedCopiable);
} // namespace

class PyFrame final
: public Frame,
public intrusive::EnableObjectPool<PyFrame, intrusive::kThreadUnsafeAndDisableDestruct> {
public:
PyFrame() : EnableObjectPool(), lineno(0) {}
void __Init__(PyCodeObject* code, int lineno, intrusive::shared_ptr<PyFrame> back) {
this->filename = PyUnicodeToStdString(code->co_filename);
this->funcname = PyUnicodeToStdString(code->co_name);
this->lineno = lineno;
PyFrame()
: EnableObjectPool(),
filename(nullptr),
funcname(nullptr),
cpython_frame(nullptr),
lineno(0) {}
void __Init__(PyFrameObject* frame, intrusive::shared_ptr<PyFrame> back) {
// There is no need to increase the reference count of these cpython objects
// because they must be alive during the lifetime of `PyFrame`.
this->filename = frame->f_code->co_filename;
this->funcname = frame->f_code->co_name;
this->cpython_frame = frame;
this->back = std::move(back);
}
OF_DISALLOW_COPY_AND_MOVE(PyFrame);
~PyFrame() = default;

std::string filename;
std::string funcname;
PyObject* filename;
PyObject* funcname;
PyFrameObject* cpython_frame;
int lineno;
intrusive::shared_ptr<PyFrame> back;
};
Expand All @@ -67,16 +73,17 @@ class PyStackGetter final : public ForeignStackGetter {
auto* frame = PyEval_GetFrame();
// Get the first frame. It assumes `import oneflow` is called in global scope,
while (frame->f_back != nullptr) { frame = frame->f_back; }
current_frame_ = object_pool_.make_shared(frame->f_code, 0, nullptr);
current_frame_ = object_pool_.make_shared(frame, nullptr);
}
// indended to be called in main thread.
intrusive::shared_ptr<Frame> GetCurrentFrame() const override {
if (IsShuttingDown() || !current_frame_) { return nullptr; }
// See `RecordAndEvalFrame` for documentation.
current_frame_->lineno = PyFrame_GetLineNumber(PyEval_GetFrame());
current_frame_->lineno = PyFrame_GetLineNumber(current_frame_->cpython_frame);
return intrusive::shared_ptr<Frame>(current_frame_.get());
}

// bad path, performance is not a concern.
std::string GetFormattedStack(intrusive::shared_ptr<Frame> frame) const override {
if (frame == nullptr) { return " <unknown>\n"; }
std::string buffer;
Expand All @@ -86,7 +93,7 @@ class PyStackGetter final : public ForeignStackGetter {
const auto& lineno = py_frame->lineno;
const std::string line_text = [&]() -> std::string {
std::string line_text;
std::ifstream ifs(py_frame->filename);
std::ifstream ifs(PyUnicodeToStdString(py_frame->filename));
if (!ifs.is_open()) { return "<unknown>"; }
for (int j = 0; j < lineno; ++j) { std::getline(ifs, line_text); }
line_text.erase(line_text.find_last_not_of(' ') + 1); // suffixing spaces
Expand All @@ -95,7 +102,8 @@ class PyStackGetter final : public ForeignStackGetter {
}();
// immitate python's stack trace format
fmt::format_to(std::back_inserter(buffer), " File \"{}\", line {}, in {}\n {}\n",
py_frame->filename, lineno, py_frame->funcname, line_text);
PyUnicodeToStdString(py_frame->filename), lineno,
PyUnicodeToStdString(py_frame->funcname), line_text);
py_frame = py_frame->back.get();
}
return buffer;
Expand Down Expand Up @@ -134,7 +142,7 @@ class PyStackGetter final : public ForeignStackGetter {

void PushFrame(PyFrameObject* frame) {
if (auto* f = frame->f_back) { current_frame_->lineno = PyFrame_GetLineNumber(f); }
current_frame_ = object_pool_.make_shared(frame->f_code, 0, current_frame_);
current_frame_ = object_pool_.make_shared(frame, current_frame_);
}
void PopFrame() {
CHECK_NOTNULL(current_frame_);
Expand Down
2 changes: 1 addition & 1 deletion python/oneflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,8 @@ def is_normal_exit(self):


def atexit_hook(hook):
oneflow.framework.session_context.TryCloseDefaultSession()
__oneflow_global_unique_env.switch_to_shutting_down(hook.is_normal_exit())
oneflow.framework.session_context.TryCloseDefaultSession()


atexit.register(atexit_hook, hook)
Expand Down

0 comments on commit 19bb0da

Please sign in to comment.