Skip to content

Commit 07fd809

Browse files
cjolivier01lanking520
authored andcommitted
fix crash when profiler not enabled (apache#10306)
* fix crash when profiler not enabled * fix * Update graph_executor.cc * Update graph_executor.cc * use nosetests to try and prevent thehang * shutdown after GPU pass * remove temp * remove temp
1 parent 886d47b commit 07fd809

File tree

4 files changed

+29
-24
lines changed

4 files changed

+29
-24
lines changed

src/engine/threaded_engine.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,8 @@ class ThreadedEngine : public Engine {
398398
}
399399

400400
int bulk_size() const override {
401-
return profiler::Profiler::Get()->AggregateRunning() ? 0 : BulkStatusStore::Get()->bulk_size;
401+
const profiler::Profiler *prof = profiler::Profiler::Get();
402+
return (prof && prof->AggregateRunning()) ? 0 : BulkStatusStore::Get()->bulk_size;
402403
}
403404

404405
int set_bulk_size(int bulk_size) override {

src/executor/graph_executor.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -1348,8 +1348,9 @@ void GraphExecutor::InitOpSegs() {
13481348
// Generate segments based on the graph structure
13491349
bool prefer_bulk_exec_inference = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_INFERENCE", true);
13501350
// Whether to perform bulk exec for training
1351+
const profiler::Profiler *prof = profiler::Profiler::Get();
13511352
bool prefer_bulk_exec = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_TRAIN", 1)
1352-
&& !profiler::Profiler::Get()->AggregateEnabled();
1353+
&& (!prof || !prof->AggregateEnabled());
13531354

13541355
bool is_training = num_forward_nodes_ != total_num_nodes;
13551356

@@ -1362,6 +1363,7 @@ void GraphExecutor::InitOpSegs() {
13621363
}
13631364
}
13641365

1366+
13651367
void GraphExecutor::BulkTrainingOpSegs(size_t total_num_nodes) {
13661368
// The maximum number of node in a segment executed in bulk
13671369
size_t num_nodes_threshold = dmlc::GetEnv("MXNET_EXEC_BULK_EXEC_MAX_NODE_TRAIN", 15);

tests/python/gpu/test_operator_gpu.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -904,81 +904,81 @@ def test_1d_pooling(pool_type):
904904
kernel = (4,)
905905
pad = (2,)
906906
stride = (2,)
907-
907+
908908
ctx_list = []
909909
sym_list = []
910-
910+
911911
pooling_convention = 'valid'
912-
912+
913913
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
914914
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
915915
pooling_convention=pooling_convention, global_pool=True, name='pool'))
916-
916+
917917
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
918918
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
919919
pooling_convention=pooling_convention, global_pool=True, name='pool'))
920-
920+
921921
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
922922
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
923923
pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
924-
924+
925925
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
926926
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
927927
pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
928-
928+
929929
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
930930
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
931931
pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
932-
932+
933933
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
934934
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
935935
pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
936-
936+
937937
check_consistency(sym_list, ctx_list)
938-
938+
939939
def test_2d_pooling(pool_type):
940940
data = (2, 3, 20, 20)
941941
kernel = (4, 4)
942942
pad = (2, 2)
943943
stride = (2, 2)
944-
944+
945945
ctx_list = []
946946
sym_list = []
947-
947+
948948
pooling_convention = 'valid'
949-
949+
950950
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
951951
sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
952952
pooling_convention=pooling_convention, global_pool=True, name='pool'))
953-
953+
954954
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
955955
sym_list.append(mx.sym.Pooling_v1(kernel=kernel, pool_type=pool_type,
956956
pooling_convention=pooling_convention, global_pool=True, name='pool'))
957-
957+
958958
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
959959
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
960960
pooling_convention=pooling_convention, global_pool=True, name='pool'))
961-
961+
962962
ctx_list.append({'ctx': mx.cpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
963963
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
964964
pooling_convention=pooling_convention, global_pool=True, name='pool'))
965-
965+
966966
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
967967
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
968968
pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
969-
969+
970970
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
971971
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
972972
pooling_convention=pooling_convention, global_pool=True, cudnn_off=False, name='pool'))
973-
973+
974974
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
975975
sym_list.append(mx.sym.Pooling(kernel=kernel, pad=pad, stride=stride, pool_type=pool_type,
976976
pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
977-
977+
978978
ctx_list.append({'ctx': mx.gpu(0), 'pool_data': data, 'type_dict': {'pool_data': np.float32}})
979979
sym_list.append(mx.sym.Pooling(kernel=kernel, pool_type=pool_type,
980980
pooling_convention=pooling_convention, global_pool=True, cudnn_off=True, name='pool'))
981-
981+
982982
check_consistency(sym_list, ctx_list)
983983

984984
test_1d_pooling('max')
@@ -1784,3 +1784,4 @@ def test_kernel_error_checking():
17841784
if __name__ == '__main__':
17851785
import nose
17861786
nose.runmodule()
1787+

tests/python/gpu/test_tvm_bridge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ def check(target, dtype):
6262

6363

6464
if __name__ == "__main__":
65-
test_tvm_bridge()
65+
import nose
66+
nose.runmodule()

0 commit comments

Comments
 (0)