diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 693c1cd47b0de0..d61380608c01b8 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -195,7 +195,7 @@ class Allocator : public phi::Allocator { protected: virtual phi::Allocation* AllocateImpl(size_t size) = 0; - virtual void FreeImpl(phi::Allocation* allocation); + TEST_API virtual void FreeImpl(phi::Allocation* allocation); virtual uint64_t ReleaseImpl(const platform::Place& place UNUSED) { return 0; } diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index f80fcac1b2a38a..984b7197c51997 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -64,7 +64,7 @@ class AllocatorFacade { std::shared_ptr AllocShared(const platform::Place& place, size_t size); // Allocate a unique allocation. - AllocationPtr Alloc(const platform::Place& place, size_t size); + TEST_API AllocationPtr Alloc(const platform::Place& place, size_t size); // Release unused memory pool. uint64_t Release(const platform::Place& place); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 7e3a18955ac67e..47f8ed80bb2e53 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -105,13 +105,13 @@ class BestFitAllocation : public Allocation { // the prev-chunk and the next-chunk when possible. class BestFitAllocator : public Allocator { public: - explicit BestFitAllocator(phi::Allocation* allocation); + TEST_API explicit BestFitAllocator(phi::Allocation* allocation); void* BasePtr() const { return allocation_->ptr(); } const platform::Place& Place() const { return allocation_->place(); } - size_t NumFreeChunks() const; + TEST_API size_t NumFreeChunks() const; bool IsAllocThreadSafe() const override { return true; } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index 08a005dc793597..f15f3e1006848f 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -32,7 +32,7 @@ namespace allocation { // underlying_allocator_ class BufferedAllocator : public Allocator { public: - explicit BufferedAllocator(std::shared_ptr allocator); + TEST_API explicit BufferedAllocator(std::shared_ptr allocator); ~BufferedAllocator(); @@ -42,7 +42,7 @@ class BufferedAllocator : public Allocator { inline void ClearCache() { FreeCache(-1UL); } private: - void FreeCache(size_t size); + TEST_API void FreeCache(size_t size); protected: void FreeImpl(phi::Allocation *allocation) override; diff --git a/paddle/fluid/memory/allocation/cpu_allocator.h b/paddle/fluid/memory/allocation/cpu_allocator.h index 30e367f6e7f7ce..5f5420281f0ee7 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.h +++ b/paddle/fluid/memory/allocation/cpu_allocator.h @@ -34,11 +34,11 @@ namespace allocation { class CPUAllocator : public Allocator { public: constexpr static size_t kAlignment = 4096UL; - bool IsAllocThreadSafe() const override; + TEST_API bool IsAllocThreadSafe() const override; protected: - void FreeImpl(phi::Allocation* allocation) override; - phi::Allocation* AllocateImpl(size_t size) override; + TEST_API void FreeImpl(phi::Allocation* allocation) override; + TEST_API phi::Allocation* AllocateImpl(size_t size) override; }; } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h index 3d6500d0f56426..7a16287c75cb30 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h @@ -35,9 +35,9 @@ class NaiveBestFitAllocator : public Allocator { bool IsAllocThreadSafe() const override { return true; } protected: - phi::Allocation *AllocateImpl(size_t size) override; - void FreeImpl(phi::Allocation *allocation) override; - uint64_t ReleaseImpl(const platform::Place &place) override; + TEST_API phi::Allocation *AllocateImpl(size_t size) override; + TEST_API void FreeImpl(phi::Allocation *allocation) override; + TEST_API uint64_t ReleaseImpl(const platform::Place &place) override; private: platform::Place place_; diff --git a/paddle/fluid/memory/stats.h b/paddle/fluid/memory/stats.h index b6d722b62a4b03..1416ec1e034724 100644 --- a/paddle/fluid/memory/stats.h +++ b/paddle/fluid/memory/stats.h @@ -121,17 +121,21 @@ class Stat : public StatBase { // performance than the macro function xxx_MEMORY_STAT_CURRENT_VALUE, // xxx_MEMORY_STAT_PEAK_VALUE, and xxx_MEMORY_STAT_UPDATE. Try to use the macro // functions where ultra-low performance overhead is required. -int64_t DeviceMemoryStatCurrentValue(const std::string& stat_type, int dev_id); -int64_t DeviceMemoryStatPeakValue(const std::string& stat_type, int dev_id); -void DeviceMemoryStatUpdate(const std::string& stat_type, - int dev_id, - int64_t increment); - -int64_t HostMemoryStatCurrentValue(const std::string& stat_type, int dev_id); -int64_t HostMemoryStatPeakValue(const std::string& stat_type, int dev_id); -void HostMemoryStatUpdate(const std::string& stat_type, - int dev_id, - int64_t increment); +TEST_API int64_t DeviceMemoryStatCurrentValue(const std::string& stat_type, + int dev_id); +TEST_API int64_t DeviceMemoryStatPeakValue(const std::string& stat_type, + int dev_id); +TEST_API void DeviceMemoryStatUpdate(const std::string& stat_type, + int dev_id, + int64_t increment); + +TEST_API int64_t HostMemoryStatCurrentValue(const std::string& stat_type, + int dev_id); +TEST_API int64_t HostMemoryStatPeakValue(const std::string& stat_type, + int dev_id); +TEST_API void HostMemoryStatUpdate(const std::string& stat_type, + int dev_id, + int64_t increment); void LogDeviceMemoryStats(const platform::Place& place, const std::string& op_name); diff --git a/test/cpp/fluid/memory/CMakeLists.txt b/test/cpp/fluid/memory/CMakeLists.txt index 5bb36f73982287..c4bf57aa7bae7c 100644 --- a/test/cpp/fluid/memory/CMakeLists.txt +++ b/test/cpp/fluid/memory/CMakeLists.txt @@ -1,17 +1,9 @@ -cc_test( - memory_stats_test - SRCS memory_stats_test.cc - DEPS) -cc_test( - stats_test - SRCS stats_test.cc - DEPS) - -cc_test( - naive_best_fit_allocator_test - SRCS naive_best_fit_allocator_test.cc - DEPS allocator) -cc_test( +paddle_test(memory_stats_test SRCS memory_stats_test.cc) +paddle_test(stats_test SRCS stats_test.cc) + +paddle_test(naive_best_fit_allocator_test SRCS naive_best_fit_allocator_test.cc + DEPS allocator) +nv_test( buffered_allocator_test SRCS buffered_allocator_test.cc DEPS allocator) @@ -40,18 +32,15 @@ elseif(WITH_ROCM) SRCS best_fit_allocator_test.cc best_fit_allocator_test.cu DEPS allocator) else() - cc_test( - best_fit_allocator_test - SRCS best_fit_allocator_test.cc - DEPS allocator) + paddle_test(best_fit_allocator_test SRCS best_fit_allocator_test.cc) endif() -cc_test( +nv_test( test_aligned_allocator SRCS test_aligned_allocator.cc DEPS allocator) -cc_test( +nv_test( retry_allocator_test SRCS retry_allocator_test.cc DEPS allocator) @@ -60,12 +49,10 @@ if(TEST retry_allocator_test) "RUN_TYPE=EXCLUSIVE") endif() -cc_test( - allocator_facade_abs_flags_test - SRCS allocator_facade_abs_flags_test.cc - DEPS allocator) +paddle_test(allocator_facade_abs_flags_test SRCS + allocator_facade_abs_flags_test.cc) -cc_test( +nv_test( allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator) @@ -131,28 +118,36 @@ if(WITH_GPU AND WITH_TESTING) FLAGS_use_stream_safe_cuda_allocator=true;") endif() -cc_test( +paddle_test( auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS allocator) -cc_test( +nv_test( auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS allocator) if(NOT WIN32) - cc_test( + paddle_test( mmap_allocator_test SRCS mmap_allocator_test.cc DEPS allocator) endif() -cc_test( +paddle_test( system_allocator_test SRCS system_allocator_test.cc DEPS allocator) -cc_test( +paddle_test( buddy_allocator_test SRCS buddy_allocator_test.cc DEPS allocator) + +if(WITH_ONNXRUNTIME AND WIN32) + # Copy onnxruntime for some c++ test in Windows, since the test will + # be build only in CI, so suppose the generator in Windows is Ninja. + copy_onnx(memory_stats_test) + copy_onnx(stats_test) + copy_onnx(naive_best_fit_allocator_test) +endif()