From 5c838298c8fd56d15ffb716905df818120034888 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 14 Sep 2024 00:57:05 +0200 Subject: [PATCH 1/2] Remove legacy CUDA modules for HCAL local reconstruction --- CUDADataFormats/HcalRecHitSoA/BuildFile.xml | 7 - .../interface/RecHitCollection.h | 38 - CUDADataFormats/HcalRecHitSoA/src/classes.h | 3 - .../HcalRecHitSoA/src/classes_def.xml | 13 - .../interface/HcalCombinedRecordsGPU.h | 19 - .../DataRecord/src/HcalCombinedRecordsGPU.cc | 5 - CondFormats/HcalObjects/BuildFile.xml | 6 - .../interface/HcalChannelQualityGPU.h | 37 - .../HcalConvertedEffectivePedestalWidthsGPU.h | 12 - .../HcalConvertedEffectivePedestalsGPU.h | 14 - .../HcalConvertedPedestalWidthsGPU.h | 43 - .../interface/HcalConvertedPedestalsGPU.h | 42 - .../HcalObjects/interface/HcalGainWidthsGPU.h | 40 - .../HcalObjects/interface/HcalGainsGPU.h | 37 - .../HcalObjects/interface/HcalLUTCorrsGPU.h | 36 - .../interface/HcalPedestalWidthsGPU.h | 71 - .../HcalObjects/interface/HcalPedestalsGPU.h | 46 - .../HcalObjects/interface/HcalQIECodersGPU.h | 41 - .../HcalObjects/interface/HcalQIETypesGPU.h | 36 - .../HcalObjects/interface/HcalRecoParamsGPU.h | 40 - .../HcalObjects/interface/HcalRespCorrsGPU.h | 36 - .../interface/HcalSiPMCharacteristicsGPU.h | 43 - .../interface/HcalSiPMParametersGPU.h | 42 - .../HcalObjects/interface/HcalTimeCorrsGPU.h | 36 - .../HcalObjects/src/HcalChannelQualityGPU.cc | 39 - ...HcalConvertedEffectivePedestalWidthsGPU.cc | 4 - .../src/HcalConvertedEffectivePedestalsGPU.cc | 4 - .../src/HcalConvertedPedestalWidthsGPU.cc | 150 -- .../src/HcalConvertedPedestalsGPU.cc | 122 -- .../HcalObjects/src/HcalGainWidthsGPU.cc | 54 - CondFormats/HcalObjects/src/HcalGainsGPU.cc | 46 - .../HcalObjects/src/HcalLUTCorrsGPU.cc | 38 - .../HcalObjects/src/HcalPedestalWidthsGPU.cc | 121 -- .../HcalObjects/src/HcalPedestalsGPU.cc | 65 - .../HcalObjects/src/HcalQIECodersGPU.cc | 51 - .../HcalObjects/src/HcalQIETypesGPU.cc | 38 - .../HcalObjects/src/HcalRecoParamsGPU.cc | 44 - .../HcalObjects/src/HcalRespCorrsGPU.cc | 38 - .../src/HcalSiPMCharacteristicsGPU.cc | 63 - .../HcalObjects/src/HcalSiPMParametersGPU.cc | 61 - .../HcalObjects/src/HcalTimeCorrsGPU.cc | 38 - .../python/hcalGlobalReco_cff.py | 21 +- .../Configuration/python/hcalLocalReco_cff.py | 42 +- RecoLocalCalo/HcalRecAlgos/BuildFile.xml | 3 - .../interface/HcalMahiPulseOffsetsGPU.h | 35 - .../HcalRecoParamsWithPulseShapesGPU.h | 54 - .../src/HcalMahiPulseOffsetsGPU.cc | 34 - .../src/HcalRecoParamsWithPulseShapesGPU.cc | 222 --- RecoLocalCalo/HcalRecProducers/BuildFile.xml | 9 - .../HcalRecProducers/bin/BuildFile.xml | 6 - .../bin/makeHcalRecHitGpuValidationPlots.cpp | 283 ---- .../python/hbheRecHitProducerGPUTask_cff.py | 67 - .../HcalRecProducers/src/DeclsForKernels.h | 107 -- .../src/HBHERecHitProducerGPU.cc | 269 ---- .../HcalRecProducers/src/HCALGPUAnalyzer.cc | 307 ---- .../src/HcalCPURecHitsProducer.cc | 132 -- .../src/HcalESProducersGPUDefs.cc | 127 -- .../src/HcalMahiPulseOffsetsGPUESProducer.cc | 59 - .../HcalRecProducers/src/KernelHelpers.h | 220 --- RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu | 1272 ----------------- .../HcalRecProducers/src/SimpleAlgoGPU.h | 19 - .../test/make_GPUvsCPU_HCAL_plots.py | 28 - .../test/make_GPUvsCPU_HCAL_rechits.py | 149 -- 63 files changed, 14 insertions(+), 5170 deletions(-) delete mode 100644 CUDADataFormats/HcalRecHitSoA/BuildFile.xml delete mode 100644 CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h delete mode 100644 CUDADataFormats/HcalRecHitSoA/src/classes.h delete mode 100644 CUDADataFormats/HcalRecHitSoA/src/classes_def.xml delete mode 100644 CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h delete mode 100644 CondFormats/DataRecord/src/HcalCombinedRecordsGPU.cc delete mode 100644 CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalGainsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalPedestalsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalQIECodersGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalQIETypesGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h delete mode 100644 CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h delete mode 100644 CondFormats/HcalObjects/src/HcalChannelQualityGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalWidthsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalConvertedPedestalWidthsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalConvertedPedestalsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalGainWidthsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalGainsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalLUTCorrsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalPedestalWidthsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalPedestalsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalQIECodersGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalQIETypesGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalRecoParamsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalRespCorrsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalSiPMCharacteristicsGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalSiPMParametersGPU.cc delete mode 100644 CondFormats/HcalObjects/src/HcalTimeCorrsGPU.cc delete mode 100644 RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h delete mode 100644 RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h delete mode 100644 RecoLocalCalo/HcalRecAlgos/src/HcalMahiPulseOffsetsGPU.cc delete mode 100644 RecoLocalCalo/HcalRecAlgos/src/HcalRecoParamsWithPulseShapesGPU.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/bin/BuildFile.xml delete mode 100644 RecoLocalCalo/HcalRecProducers/bin/makeHcalRecHitGpuValidationPlots.cpp delete mode 100644 RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerGPUTask_cff.py delete mode 100644 RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h delete mode 100644 RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/src/HCALGPUAnalyzer.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/src/HcalCPURecHitsProducer.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/src/HcalESProducersGPUDefs.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/src/HcalMahiPulseOffsetsGPUESProducer.cc delete mode 100644 RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h delete mode 100644 RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu delete mode 100644 RecoLocalCalo/HcalRecProducers/src/SimpleAlgoGPU.h delete mode 100644 RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_plots.py delete mode 100644 RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_rechits.py diff --git a/CUDADataFormats/HcalRecHitSoA/BuildFile.xml b/CUDADataFormats/HcalRecHitSoA/BuildFile.xml deleted file mode 100644 index 245701de5fdb0..0000000000000 --- a/CUDADataFormats/HcalRecHitSoA/BuildFile.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h b/CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h deleted file mode 100644 index 424b2c0813b4c..0000000000000 --- a/CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef CUDADataFormats_HcalRecHitCollectionSoA_interface_RecHitCollection_h -#define CUDADataFormats_HcalRecHitCollectionSoA_interface_RecHitCollection_h - -#include - -#include "CUDADataFormats/CaloCommon/interface/Common.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" - -namespace hcal { - - template - struct RecHitCollection : public ::calo::common::AddSize { - RecHitCollection() = default; - RecHitCollection(const RecHitCollection&) = default; - RecHitCollection& operator=(const RecHitCollection&) = default; - - RecHitCollection(RecHitCollection&&) = default; - RecHitCollection& operator=(RecHitCollection&&) = default; - - typename StoragePolicy::template StorageSelector::type energy; - typename StoragePolicy::template StorageSelector::type chi2; - typename StoragePolicy::template StorageSelector::type energyM0; - typename StoragePolicy::template StorageSelector::type timeM0; - typename StoragePolicy::template StorageSelector::type did; - - template - typename std::enable_if::value, void>::type resize(size_t size) { - energy.resize(size); - chi2.resize(size); - energyM0.resize(size); - timeM0.resize(size); - did.resize(size); - } - }; - -} // namespace hcal - -#endif // RecoLocalCalo_HcalRecAlgos_interface_RecHitCollection_h diff --git a/CUDADataFormats/HcalRecHitSoA/src/classes.h b/CUDADataFormats/HcalRecHitSoA/src/classes.h deleted file mode 100644 index a13782165c413..0000000000000 --- a/CUDADataFormats/HcalRecHitSoA/src/classes.h +++ /dev/null @@ -1,3 +0,0 @@ -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h" -#include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/HcalRecHitSoA/src/classes_def.xml b/CUDADataFormats/HcalRecHitSoA/src/classes_def.xml deleted file mode 100644 index 2ae114c84b945..0000000000000 --- a/CUDADataFormats/HcalRecHitSoA/src/classes_def.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - diff --git a/CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h b/CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h deleted file mode 100644 index c802c599ed4c5..0000000000000 --- a/CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef CondFormats_DataRecord_interface_HcalCombinedRecordsGPU_h -#define CondFormats_DataRecord_interface_HcalCombinedRecordsGPU_h - -#include "CondFormats/DataRecord/interface/HcalPedestalWidthsRcd.h" -#include "CondFormats/DataRecord/interface/HcalPedestalsRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIEDataRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIETypesRcd.h" -#include "FWCore/Framework/interface/DependentRecordImplementation.h" - -template -class HcalCombinedRecord : public edm::eventsetup::DependentRecordImplementation, - edm::mpl::Vector> {}; - -using HcalConvertedPedestalsRcd = HcalCombinedRecord; - -using HcalConvertedPedestalWidthsRcd = - HcalCombinedRecord; - -#endif // CondFormats_DataRecord_interface_HcalCombinedRecordsGPU_h diff --git a/CondFormats/DataRecord/src/HcalCombinedRecordsGPU.cc b/CondFormats/DataRecord/src/HcalCombinedRecordsGPU.cc deleted file mode 100644 index 6da5efbef4f20..0000000000000 --- a/CondFormats/DataRecord/src/HcalCombinedRecordsGPU.cc +++ /dev/null @@ -1,5 +0,0 @@ -#include "CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h" -#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" - -EVENTSETUP_RECORD_REG(HcalConvertedPedestalsRcd); -EVENTSETUP_RECORD_REG(HcalConvertedPedestalWidthsRcd); diff --git a/CondFormats/HcalObjects/BuildFile.xml b/CondFormats/HcalObjects/BuildFile.xml index 34f054dc82699..f3e01e6d7499c 100644 --- a/CondFormats/HcalObjects/BuildFile.xml +++ b/CondFormats/HcalObjects/BuildFile.xml @@ -10,12 +10,6 @@ - - - - - - diff --git a/CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h b/CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h deleted file mode 100644 index 4abdcc3e52c82..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalChannelQualityGPU_h -#define CondFormats_HcalObjects_interface_HcalChannelQualityGPU_h - -#include "CondFormats/HcalObjects/interface/HcalChannelQuality.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalChannelQualityGPU { -public: - struct Product { - edm::propagate_const_array> status; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalChannelQualityGPU(HcalChannelQuality const &); - - // will trigger deallocation of Product thru ~Product - ~HcalChannelQualityGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> status_; - - cms::cuda::ESProduct product_; -#endif // __CUDACC__ -}; - -#endif // RecoLocalCalo_HcalRecAlgos_interface_HcalChannelQualityGPU_h diff --git a/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h b/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h deleted file mode 100644 index b2232d5d647a2..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalConvertedEffectivePedestalWidthsGPU_h -#define CondFormats_HcalObjects_interface_HcalConvertedEffectivePedestalWidthsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h" - -// similar to converted effective pedestals -class HcalConvertedEffectivePedestalWidthsGPU final : public HcalConvertedPedestalWidthsGPU { -public: - using HcalConvertedPedestalWidthsGPU::HcalConvertedPedestalWidthsGPU; -}; - -#endif // RecoLocalCalo_HcalRecAlgos_interface_HcalConvertedEffectivePedestalWidthsGPU_h diff --git a/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h b/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h deleted file mode 100644 index 311485ac7275a..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalConvertedEffectivePedestalsGPU_h -#define CondFormats_HcalObjects_interface_HcalConvertedEffectivePedestalsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h" - -// Separate access to effective and regular pedestals -// No need to transfer/rearrange effective or vice versa if they are not going -// to be used -class HcalConvertedEffectivePedestalsGPU final : public HcalConvertedPedestalsGPU { -public: - using HcalConvertedPedestalsGPU::HcalConvertedPedestalsGPU; -}; - -#endif // RecoLocalCalo_HcalRecAlgos_interface_HcalConvertedEffectivePedestalsGPU_h diff --git a/CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h b/CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h deleted file mode 100644 index bf212265fe5ae..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalConvertedPedestalWidthsGPU_h -#define CondFormats_HcalObjects_interface_HcalConvertedPedestalWidthsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalPedestalWidths.h" -#include "CondFormats/HcalObjects/interface/HcalPedestals.h" -#include "CondFormats/HcalObjects/interface/HcalQIEData.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalConvertedPedestalWidthsGPU { -public: - struct Product { - edm::propagate_const_array> values; - }; - -#ifndef __CUDACC__ - // order matters! - HcalConvertedPedestalWidthsGPU(HcalPedestals const&, - HcalPedestalWidths const&, - HcalQIEData const&, - HcalQIETypes const&); - - // will trigger deallocation of Product thru ~Product - ~HcalConvertedPedestalWidthsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h b/CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h deleted file mode 100644 index 84824fbeb1d2a..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalConvertedPedestalsGPU_h -#define CondFormats_HcalObjects_interface_HcalConvertedPedestalsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalPedestals.h" -#include "CondFormats/HcalObjects/interface/HcalQIEData.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalConvertedPedestalsGPU { -public: - struct Product { - edm::propagate_const_array> values; - }; - -#ifndef __CUDACC__ - // order matters! - HcalConvertedPedestalsGPU(HcalPedestals const&, HcalQIEData const&, HcalQIETypes const&); - - // will trigger deallocation of Product thru ~Product - ~HcalConvertedPedestalsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - - uint32_t offsetForHashes() const { return offsetForHashes_; } - -protected: - uint64_t totalChannels_; - uint32_t offsetForHashes_; - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h b/CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h deleted file mode 100644 index 4851239f171df..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalGainWidthsGPU_h -#define CondFormats_HcalObjects_interface_HcalGainWidthsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalGainWidths.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalGainWidthsGPU { -public: - struct Product { - edm::propagate_const_array> value0; - edm::propagate_const_array> value1; - edm::propagate_const_array> value2; - edm::propagate_const_array> value3; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalGainWidthsGPU(HcalGainWidths const &); - - // will trigger deallocation of Product thru ~Product - ~HcalGainWidthsGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> value0_, value1_, value2_, value3_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalGainsGPU.h b/CondFormats/HcalObjects/interface/HcalGainsGPU.h deleted file mode 100644 index 030146ffc7cdf..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalGainsGPU.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalGainsGPU_h -#define CondFormats_HcalObjects_interface_HcalGainsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalGains.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalGainsGPU { -public: - struct Product { - edm::propagate_const_array> values; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalGainsGPU(HcalGains const&); - - // will trigger deallocation of Product thru ~Product - ~HcalGainsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h b/CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h deleted file mode 100644 index 699dfb9706147..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalLUTCorrsGPU_h -#define CondFormats_HcalObjects_interface_HcalLUTCorrsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalLUTCorrs.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalLUTCorrsGPU { -public: - struct Product { - edm::propagate_const_array> value; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalLUTCorrsGPU(HcalLUTCorrs const&); - - // will trigger deallocation of Product thru ~Product - ~HcalLUTCorrsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - std::vector> value_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h b/CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h deleted file mode 100644 index b39ed2545c5e2..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalPedestalWidthsGPU_h -#define CondFormats_HcalObjects_interface_HcalPedestalWidthsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalPedestalWidths.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalPedestalWidthsGPU { -public: - struct Product { - edm::propagate_const_array> sigma00; - edm::propagate_const_array> sigma01; - edm::propagate_const_array> sigma02; - edm::propagate_const_array> sigma03; - edm::propagate_const_array> sigma10; - edm::propagate_const_array> sigma11; - edm::propagate_const_array> sigma12; - edm::propagate_const_array> sigma13; - edm::propagate_const_array> sigma20; - edm::propagate_const_array> sigma21; - edm::propagate_const_array> sigma22; - edm::propagate_const_array> sigma23; - edm::propagate_const_array> sigma30; - edm::propagate_const_array> sigma31; - edm::propagate_const_array> sigma32; - edm::propagate_const_array> sigma33; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalPedestalWidthsGPU(HcalPedestalWidths const&); - - // will trigger deallocation of Product thru ~Product - ~HcalPedestalWidthsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - - // as in cpu version - bool unitIsADC() const { return unitIsADC_; } - -private: - bool unitIsADC_; - uint64_t totalChannels_; - std::vector> sigma00_; - std::vector> sigma01_; - std::vector> sigma02_; - std::vector> sigma03_; - std::vector> sigma10_; - std::vector> sigma11_; - std::vector> sigma12_; - std::vector> sigma13_; - std::vector> sigma20_; - std::vector> sigma21_; - std::vector> sigma22_; - std::vector> sigma23_; - std::vector> sigma30_; - std::vector> sigma31_; - std::vector> sigma32_; - std::vector> sigma33_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalPedestalsGPU.h b/CondFormats/HcalObjects/interface/HcalPedestalsGPU.h deleted file mode 100644 index 957f6f88ef2fa..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalPedestalsGPU.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalPedestalsGPU_h -#define CondFormats_HcalObjects_interface_HcalPedestalsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalPedestals.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalPedestalsGPU { -public: - struct Product { - edm::propagate_const_array> values; - edm::propagate_const_array> widths; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalPedestalsGPU(HcalPedestals const &); - - // will trigger deallocation of Product thru ~Product - ~HcalPedestalsGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - - // as in cpu version - bool unitIsADC() const { return unitIsADC_; } - - uint32_t offsetForHashes() const { return offsetForHashes_; } - -private: - bool unitIsADC_; - uint64_t totalChannels_; - uint32_t offsetForHashes_; - std::vector> values_; - std::vector> widths_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalQIECodersGPU.h b/CondFormats/HcalObjects/interface/HcalQIECodersGPU.h deleted file mode 100644 index 0ed389f9c9cca..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalQIECodersGPU.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalQIECodersGPU_h -#define CondFormats_HcalObjects_interface_HcalQIECodersGPU_h - -#include "CondFormats/HcalObjects/interface/HcalQIEData.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalQIECodersGPU { -public: - static constexpr uint32_t numValuesPerChannel = 16; - - struct Product { - edm::propagate_const_array> offsets; - edm::propagate_const_array> slopes; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalQIECodersGPU(HcalQIEData const &); - - // will trigger deallocation of Product thru ~Product - ~HcalQIECodersGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> offsets_; - std::vector> slopes_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalQIETypesGPU.h b/CondFormats/HcalObjects/interface/HcalQIETypesGPU.h deleted file mode 100644 index cdb49764c9145..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalQIETypesGPU.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalQIETypesGPU_h -#define CondFormats_HcalObjects_interface_HcalQIETypesGPU_h - -#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalQIETypesGPU { -public: - struct Product { - edm::propagate_const_array> values; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalQIETypesGPU(HcalQIETypes const&); - - // will trigger deallocation of Product thru ~Product - ~HcalQIETypesGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h b/CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h deleted file mode 100644 index f85d26a431ebb..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalRecoParamsGPU_h -#define CondFormats_HcalObjects_interface_HcalRecoParamsGPU_h - -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalRecoParams; - -class HcalRecoParamsGPU { -public: - struct Product { - edm::propagate_const_array> param1; - edm::propagate_const_array> param2; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalRecoParamsGPU(HcalRecoParams const&); - - // will trigger deallocation of Product thru ~Product - ~HcalRecoParamsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; // hb + he - std::vector> param1_; - std::vector> param2_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h b/CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h deleted file mode 100644 index 6bc570ab52575..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalRespCorrsGPU_h -#define CondFormats_HcalObjects_interface_HcalRespCorrsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalRespCorrs.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalRespCorrsGPU { -public: - struct Product { - edm::propagate_const_array> values; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalRespCorrsGPU(HcalRespCorrs const&); - - // will trigger deallocation of Product thru ~Product - ~HcalRespCorrsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h b/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h deleted file mode 100644 index 9615d8011a256..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalSiPMCharacteristicsGPU_h -#define CondFormats_HcalObjects_interface_HcalSiPMCharacteristicsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristics.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalSiPMCharacteristicsGPU { -public: - struct Product { - edm::propagate_const_array> pixels; - edm::propagate_const_array> parLin1; - edm::propagate_const_array> parLin2; - edm::propagate_const_array> parLin3; - edm::propagate_const_array> crossTalk; - edm::propagate_const_array> auxi1; - edm::propagate_const_array> auxi2; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalSiPMCharacteristicsGPU(HcalSiPMCharacteristics const &); - - // will trigger deallocation of Product thru ~Product - ~HcalSiPMCharacteristicsGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - std::vector> pixels_, auxi1_; - std::vector> parLin1_, parLin2_, parLin3_, crossTalk_, auxi2_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h b/CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h deleted file mode 100644 index 10a69f70defa2..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalSiPMParametersGPU_h -#define CondFormats_HcalObjects_interface_HcalSiPMParametersGPU_h - -#include "CondFormats/HcalObjects/interface/HcalSiPMParameters.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalSiPMParametersGPU { -public: - struct Product { - edm::propagate_const_array> type; - edm::propagate_const_array> auxi1; - edm::propagate_const_array> fcByPE; - edm::propagate_const_array> darkCurrent; - edm::propagate_const_array> auxi2; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalSiPMParametersGPU(HcalSiPMParameters const &); - - // will trigger deallocation of Product thru ~Product - ~HcalSiPMParametersGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; - std::vector> type_, auxi1_; - std::vector> fcByPE_, darkCurrent_, auxi2_; - - cms::cuda::ESProduct product_; -#endif // __CUDACC__ -}; - -#endif // RecoLocalCalo_HcalRecAlgos_interface_HcalSiPMParametersGPU_h diff --git a/CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h b/CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h deleted file mode 100644 index bac5be06198d4..0000000000000 --- a/CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef CondFormats_HcalObjects_interface_HcalTimeCorrsGPU_h -#define CondFormats_HcalObjects_interface_HcalTimeCorrsGPU_h - -#include "CondFormats/HcalObjects/interface/HcalTimeCorrs.h" -#include "FWCore/Utilities/interface/propagate_const_array.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalTimeCorrsGPU { -public: - struct Product { - edm::propagate_const_array> value; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalTimeCorrsGPU(HcalTimeCorrs const&); - - // will trigger deallocation of Product thru ~Product - ~HcalTimeCorrsGPU() = default; - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - std::vector> value_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/CondFormats/HcalObjects/src/HcalChannelQualityGPU.cc b/CondFormats/HcalObjects/src/HcalChannelQualityGPU.cc deleted file mode 100644 index 103dcdc86c26a..0000000000000 --- a/CondFormats/HcalObjects/src/HcalChannelQualityGPU.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalChannelQuality.h" -#include "CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalChannelQualityGPU::HcalChannelQualityGPU(HcalChannelQuality const& quality) - : totalChannels_{quality.getAllContainers()[0].second.size() + quality.getAllContainers()[1].second.size()}, - status_(totalChannels_) { - auto const containers = quality.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - status_[i] = barrelValues[i].getValue(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - status_[i + offset] = endcapValues[i].getValue(); - } -} - -HcalChannelQualityGPU::Product const& HcalChannelQualityGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalChannelQualityGPU::Product& product, cudaStream_t stream) { - // allocate - product.status = cms::cuda::make_device_unique(status_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.status, status_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalChannelQualityGPU); diff --git a/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalWidthsGPU.cc b/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalWidthsGPU.cc deleted file mode 100644 index dfc9a9e099ea1..0000000000000 --- a/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalWidthsGPU.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h" - -#include "FWCore/Utilities/interface/typelookup.h" -TYPELOOKUP_DATA_REG(HcalConvertedEffectivePedestalWidthsGPU); diff --git a/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalsGPU.cc b/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalsGPU.cc deleted file mode 100644 index 92cb76edc6d6f..0000000000000 --- a/CondFormats/HcalObjects/src/HcalConvertedEffectivePedestalsGPU.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h" - -#include "FWCore/Utilities/interface/typelookup.h" -TYPELOOKUP_DATA_REG(HcalConvertedEffectivePedestalsGPU); diff --git a/CondFormats/HcalObjects/src/HcalConvertedPedestalWidthsGPU.cc b/CondFormats/HcalObjects/src/HcalConvertedPedestalWidthsGPU.cc deleted file mode 100644 index c03bef2e3439b..0000000000000 --- a/CondFormats/HcalObjects/src/HcalConvertedPedestalWidthsGPU.cc +++ /dev/null @@ -1,150 +0,0 @@ -#include - -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -namespace { - float convert( - float const value, float const width, int const i, HcalQIECoder const& coder, HcalQIEShape const& shape) { - float const y = value; - float const x = width; - unsigned const x1 = static_cast(std::floor(y)); - unsigned const x2 = static_cast(std::floor(y + 1.)); - unsigned iun = static_cast(i); - float const y1 = coder.charge(shape, x1, iun); - float const y2 = coder.charge(shape, x2, iun); - return (y2 - y1) * x; - } -} // namespace - -// FIXME: add proper getters to conditions -HcalConvertedPedestalWidthsGPU::HcalConvertedPedestalWidthsGPU(HcalPedestals const& pedestals, - HcalPedestalWidths const& pedestalWidths, - HcalQIEData const& qieData, - HcalQIETypes const& qieTypes) - : totalChannels_{pedestals.getAllContainers()[0].second.size() + pedestals.getAllContainers()[1].second.size()}, - values_(totalChannels_ * 4) { -#ifdef HCAL_MAHI_CPUDEBUG - std::cout << "hello from converted pedestal widths" << std::endl; - std::cout << "pedestals HB values = " << pedestals.getAllContainers()[0].second.size() - << " HE values = " << pedestals.getAllContainers()[1].second.size() << std::endl; - std::cout << "qiedata HB values = " << qieData.getAllContainers()[0].second.size() - << " HE values = " << qieData.getAllContainers()[1].second.size() << std::endl; -#endif - - // retrieve all collections - auto const pedestalsAll = pedestals.getAllContainers(); - auto const pedestalWidthsAll = pedestalWidths.getAllContainers(); - auto const qieDataAll = qieData.getAllContainers(); - auto const qieTypesAll = qieTypes.getAllContainers(); - - // have to convert to fc if stored in adc - auto const unitIsADC = pedestals.isADC(); - - // fill in barrel - auto const& pedestalBarrelValues = pedestalsAll[0].second; - auto const& pedestalWidthBarrelValues = pedestalWidthsAll[0].second; - auto const& qieDataBarrelValues = qieDataAll[0].second; - auto const& qieTypesBarrelValues = qieTypesAll[0].second; - -#ifdef HCAL_MAHI_CPUDEBUG - assert(pedestalWidthBarrelValues.size() == pedestalBarrelValues.size()); - assert(pedestalBarrelValues.size() == qieDataBarrelValues.size()); - assert(pedestalBarrelValues.size() == qieTypesBarrelValues.size()); -#endif - - for (uint64_t i = 0; i < pedestalBarrelValues.size(); ++i) { - auto const& qieCoder = qieDataBarrelValues[i]; - auto const qieType = qieTypesBarrelValues[i].getValue() > 1 ? 1 : 0; - auto const& qieShape = qieData.getShape(qieType); - - values_[i * 4] = - unitIsADC - ? convert( - pedestalBarrelValues[i].getValue(0), pedestalWidthBarrelValues[i].getWidth(0), 0, qieCoder, qieShape) - : pedestalWidthBarrelValues[i].getWidth(0); - values_[i * 4 + 1] = - unitIsADC - ? convert( - pedestalBarrelValues[i].getValue(1), pedestalWidthBarrelValues[i].getWidth(1), 1, qieCoder, qieShape) - : pedestalWidthBarrelValues[i].getWidth(1); - values_[i * 4 + 2] = - unitIsADC - ? convert( - pedestalBarrelValues[i].getValue(2), pedestalWidthBarrelValues[i].getWidth(2), 2, qieCoder, qieShape) - : pedestalWidthBarrelValues[i].getWidth(2); - values_[i * 4 + 3] = - unitIsADC - ? convert( - pedestalBarrelValues[i].getValue(3), pedestalWidthBarrelValues[i].getWidth(3), 3, qieCoder, qieShape) - : pedestalWidthBarrelValues[i].getWidth(3); - } - - // fill in endcap - auto const& pedestalEndcapValues = pedestalsAll[1].second; - auto const& pedestalWidthEndcapValues = pedestalWidthsAll[1].second; - auto const& qieDataEndcapValues = qieDataAll[1].second; - auto const& qieTypesEndcapValues = qieTypesAll[1].second; - -#ifdef HCAL_MAHI_CPUDEBUG - assert(pedestalWidthEndcapValues.size() == pedestalEndcapValues.size()); - assert(pedestalEndcapValues.size() == qieDataEndcapValues.size()); - assert(pedestalEndcapValues.size() == qieTypesEndcapValues.size()); -#endif - - auto const offset = pedestalWidthBarrelValues.size(); - for (uint64_t i = 0; i < pedestalEndcapValues.size(); ++i) { - auto const& qieCoder = qieDataEndcapValues[i]; - auto const qieType = qieTypesEndcapValues[i].getValue() > 1 ? 1 : 0; - auto const& qieShape = qieData.getShape(qieType); - auto const off = offset + i; - - values_[off * 4] = - unitIsADC - ? convert( - pedestalEndcapValues[i].getValue(0), pedestalWidthEndcapValues[i].getWidth(0), 0, qieCoder, qieShape) - : pedestalWidthEndcapValues[i].getWidth(0); - values_[off * 4 + 1] = - unitIsADC - ? convert( - pedestalEndcapValues[i].getValue(1), pedestalWidthEndcapValues[i].getWidth(1), 1, qieCoder, qieShape) - : pedestalWidthEndcapValues[i].getWidth(1); - values_[off * 4 + 2] = - unitIsADC - ? convert( - pedestalEndcapValues[i].getValue(2), pedestalWidthEndcapValues[i].getWidth(2), 2, qieCoder, qieShape) - : pedestalWidthEndcapValues[i].getWidth(2); - values_[off * 4 + 3] = - unitIsADC - ? convert( - pedestalEndcapValues[i].getValue(3), pedestalWidthEndcapValues[i].getWidth(3), 3, qieCoder, qieShape) - : pedestalWidthEndcapValues[i].getWidth(3); - -#ifdef HCAL_MAHI_CPUDEBUG - if (pedestalEndcapValues[i].rawId() == DETID_TO_DEBUG) { - for (int i = 0; i < 4; i++) - printf("pedestalWidth(%d) = %f original pedestalWidth(%d) = %f\n", - i, - values_[off * 4 + i], - i, - pedestalWidthEndcapValues[i].getWidth(3)); - } -#endif - } -} - -HcalConvertedPedestalWidthsGPU::Product const& HcalConvertedPedestalWidthsGPU::getProduct(cudaStream_t stream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - stream, [this](HcalConvertedPedestalWidthsGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalConvertedPedestalWidthsGPU); diff --git a/CondFormats/HcalObjects/src/HcalConvertedPedestalsGPU.cc b/CondFormats/HcalObjects/src/HcalConvertedPedestalsGPU.cc deleted file mode 100644 index 5d44f1fd6bd2e..0000000000000 --- a/CondFormats/HcalObjects/src/HcalConvertedPedestalsGPU.cc +++ /dev/null @@ -1,122 +0,0 @@ -#include - -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -namespace { - float convert(float const x, int const i, HcalQIECoder const& coder, HcalQIEShape const& shape) { - int const x1 = static_cast(std::floor(x)); - int const x2 = static_cast(std::floor(x + 1)); - float const y2 = coder.charge(shape, x2, i); - float const y1 = coder.charge(shape, x1, i); - return (y2 - y1) * (x - x1) + y1; - } -} // namespace - -// FIXME: add proper getters to conditions -HcalConvertedPedestalsGPU::HcalConvertedPedestalsGPU(HcalPedestals const& pedestals, - HcalQIEData const& qieData, - HcalQIETypes const& qieTypes) - : totalChannels_{pedestals.getAllContainers()[0].second.size() + pedestals.getAllContainers()[1].second.size()}, - offsetForHashes_{static_cast(pedestals.getAllContainers()[0].second.size())}, - values_(totalChannels_ * 4) { -#ifdef HCAL_MAHI_CPUDEBUG - std::cout << "hello from converted pedestals" << std::endl; - std::cout << "pedestals HB values = " << pedestals.getAllContainers()[0].second.size() - << " HE values = " << pedestals.getAllContainers()[1].second.size() << std::endl; - std::cout << "qiedata HB values = " << qieData.getAllContainers()[0].second.size() - << " HE values = " << qieData.getAllContainers()[1].second.size() << std::endl; -#endif - - // retrieve all collections - auto const pedestalsAll = pedestals.getAllContainers(); - auto const qieDataAll = qieData.getAllContainers(); - auto const qieTypesAll = qieTypes.getAllContainers(); - - // have to convert to fc if stored in adc - auto const unitIsADC = pedestals.isADC(); - - // fill in barrel - auto const& pedestalBarrelValues = pedestalsAll[0].second; - auto const& qieDataBarrelValues = qieDataAll[0].second; - auto const& qieTypesBarrelValues = qieTypesAll[0].second; - -#ifdef HCAL_MAHI_CPUDEBUG - assert(pedestalBarrelValues.size() == qieDataBarrelValues.size()); - assert(pedestalBarrelValues.size() == qieTypesBarrelValues.size()); -#endif - - for (uint64_t i = 0; i < pedestalBarrelValues.size(); ++i) { - auto const& qieCoder = qieDataBarrelValues[i]; - auto const qieType = qieTypesBarrelValues[i].getValue() > 1 ? 1 : 0; - auto const& qieShape = qieData.getShape(qieType); - - values_[i * 4] = unitIsADC ? convert(pedestalBarrelValues[i].getValue(0), 0, qieCoder, qieShape) - : pedestalBarrelValues[i].getValue(0); - values_[i * 4 + 1] = unitIsADC ? convert(pedestalBarrelValues[i].getValue(1), 1, qieCoder, qieShape) - : pedestalBarrelValues[i].getValue(1); - values_[i * 4 + 2] = unitIsADC ? convert(pedestalBarrelValues[i].getValue(2), 2, qieCoder, qieShape) - : pedestalBarrelValues[i].getValue(2); - values_[i * 4 + 3] = unitIsADC ? convert(pedestalBarrelValues[i].getValue(3), 3, qieCoder, qieShape) - : pedestalBarrelValues[i].getValue(3); - } - - // fill in endcap - auto const& pedestalEndcapValues = pedestalsAll[1].second; - auto const& qieDataEndcapValues = qieDataAll[1].second; - auto const& qieTypesEndcapValues = qieTypesAll[1].second; - -#ifdef HCAL_MAHI_CPUDEBUG - assert(pedestalEndcapValues.size() == qieDataEndcapValues.size()); - assert(pedestalEndcapValues.size() == qieTypesEndcapValues.size()); -#endif - - auto const offset = pedestalBarrelValues.size(); - for (uint64_t i = 0; i < pedestalEndcapValues.size(); ++i) { - auto const& qieCoder = qieDataEndcapValues[i]; - auto const qieType = qieTypesEndcapValues[i].getValue() > 1 ? 1 : 0; - auto const& qieShape = qieData.getShape(qieType); - auto const off = offset + i; - - values_[off * 4] = unitIsADC ? convert(pedestalEndcapValues[i].getValue(0), 0, qieCoder, qieShape) - : pedestalEndcapValues[i].getValue(0); - values_[off * 4 + 1] = unitIsADC ? convert(pedestalEndcapValues[i].getValue(1), 1, qieCoder, qieShape) - : pedestalEndcapValues[i].getValue(1); - values_[off * 4 + 2] = unitIsADC ? convert(pedestalEndcapValues[i].getValue(2), 2, qieCoder, qieShape) - : pedestalEndcapValues[i].getValue(2); - values_[off * 4 + 3] = unitIsADC ? convert(pedestalEndcapValues[i].getValue(3), 3, qieCoder, qieShape) - : pedestalEndcapValues[i].getValue(3); - -#ifdef HCAL_MAHI_CPUDEBUG - if (pedestalEndcapValues[i].rawId() == DETID_TO_DEBUG) { - printf("qietype = %d\n", qieType); - printf("ped0 = %f ped1 = %f ped2 = %f ped3 = %f\n", - pedestalEndcapValues[i].getValue(0), - pedestalEndcapValues[i].getValue(1), - pedestalEndcapValues[i].getValue(2), - pedestalEndcapValues[i].getValue(3)); - printf("converted: ped0 = %f ped1 = %f ped2 = %f ped3 = %f\n", - values_[off * 4], - values_[off * 4 + 1], - values_[off * 4 + 2], - values_[off * 4 + 3]); - } -#endif - } -} - -HcalConvertedPedestalsGPU::Product const& HcalConvertedPedestalsGPU::getProduct(cudaStream_t stream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - stream, [this](HcalConvertedPedestalsGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalConvertedPedestalsGPU); diff --git a/CondFormats/HcalObjects/src/HcalGainWidthsGPU.cc b/CondFormats/HcalObjects/src/HcalGainWidthsGPU.cc deleted file mode 100644 index fc86ce24b8e2c..0000000000000 --- a/CondFormats/HcalObjects/src/HcalGainWidthsGPU.cc +++ /dev/null @@ -1,54 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalGainWidths.h" -#include "CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalGainWidthsGPU::HcalGainWidthsGPU(HcalGainWidths const& gains) - : totalChannels_{gains.getAllContainers()[0].second.size() + gains.getAllContainers()[1].second.size()}, - value0_(totalChannels_), - value1_(totalChannels_), - value2_(totalChannels_), - value3_(totalChannels_) { - auto const gainContainers = gains.getAllContainers(); - - // fill in eb - auto const& barrelValues = gainContainers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - value0_[i] = barrelValues[i].getValue(0); - value1_[i] = barrelValues[i].getValue(1); - value2_[i] = barrelValues[i].getValue(2); - value3_[i] = barrelValues[i].getValue(3); - } - - // fill in ee - auto const& endcapValues = gainContainers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - value0_[i + offset] = endcapValues[i].getValue(0); - value1_[i + offset] = endcapValues[i].getValue(1); - value2_[i + offset] = endcapValues[i].getValue(2); - value3_[i + offset] = endcapValues[i].getValue(3); - } -} - -HcalGainWidthsGPU::Product const& HcalGainWidthsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalGainWidthsGPU::Product& product, cudaStream_t stream) { - // allocate - product.value0 = cms::cuda::make_device_unique(value0_.size(), stream); - product.value1 = cms::cuda::make_device_unique(value1_.size(), stream); - product.value2 = cms::cuda::make_device_unique(value2_.size(), stream); - product.value3 = cms::cuda::make_device_unique(value3_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.value0, value0_, stream); - cms::cuda::copyAsync(product.value1, value1_, stream); - cms::cuda::copyAsync(product.value2, value2_, stream); - cms::cuda::copyAsync(product.value3, value3_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalGainWidthsGPU); diff --git a/CondFormats/HcalObjects/src/HcalGainsGPU.cc b/CondFormats/HcalObjects/src/HcalGainsGPU.cc deleted file mode 100644 index 27f7d548aa7b2..0000000000000 --- a/CondFormats/HcalObjects/src/HcalGainsGPU.cc +++ /dev/null @@ -1,46 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalGains.h" -#include "CondFormats/HcalObjects/interface/HcalGainsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalGainsGPU::HcalGainsGPU(HcalGains const& gains) - : totalChannels_{gains.getAllContainers()[0].second.size() + gains.getAllContainers()[1].second.size()}, - values_(totalChannels_ * 4) { - auto const gainContainers = gains.getAllContainers(); - - // fill in eb - auto const& barrelValues = gainContainers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - values_[i * 4] = barrelValues[i].getValue(0); - values_[i * 4 + 1] = barrelValues[i].getValue(1); - values_[i * 4 + 2] = barrelValues[i].getValue(2); - values_[i * 4 + 3] = barrelValues[i].getValue(3); - } - - // fill in ee - auto const& endcapValues = gainContainers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - auto const off = offset + i; - values_[off * 4] = endcapValues[i].getValue(0); - values_[off * 4 + 1] = endcapValues[i].getValue(1); - values_[off * 4 + 2] = endcapValues[i].getValue(2); - values_[off * 4 + 3] = endcapValues[i].getValue(3); - } -} - -HcalGainsGPU::Product const& HcalGainsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalGainsGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalGainsGPU); diff --git a/CondFormats/HcalObjects/src/HcalLUTCorrsGPU.cc b/CondFormats/HcalObjects/src/HcalLUTCorrsGPU.cc deleted file mode 100644 index 889125e92783b..0000000000000 --- a/CondFormats/HcalObjects/src/HcalLUTCorrsGPU.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalLUTCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalLUTCorrsGPU::HcalLUTCorrsGPU(HcalLUTCorrs const& lutcorrs) - : value_(lutcorrs.getAllContainers()[0].second.size() + lutcorrs.getAllContainers()[1].second.size()) { - auto const containers = lutcorrs.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - value_[i] = barrelValues[i].getValue(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - value_[i + offset] = endcapValues[i].getValue(); - } -} - -HcalLUTCorrsGPU::Product const& HcalLUTCorrsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalLUTCorrsGPU::Product& product, cudaStream_t stream) { - // allocate - product.value = cms::cuda::make_device_unique(value_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.value, value_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalLUTCorrsGPU); diff --git a/CondFormats/HcalObjects/src/HcalPedestalWidthsGPU.cc b/CondFormats/HcalObjects/src/HcalPedestalWidthsGPU.cc deleted file mode 100644 index 5e006aba764f8..0000000000000 --- a/CondFormats/HcalObjects/src/HcalPedestalWidthsGPU.cc +++ /dev/null @@ -1,121 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalPedestalWidths.h" -#include "CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalPedestalWidthsGPU::HcalPedestalWidthsGPU(HcalPedestalWidths const& pedestals) - : unitIsADC_{pedestals.isADC()}, - totalChannels_{pedestals.getAllContainers()[0].second.size() + pedestals.getAllContainers()[1].second.size()}, - sigma00_(totalChannels_), - sigma01_(totalChannels_), - sigma02_(totalChannels_), - sigma03_(totalChannels_), - sigma10_(totalChannels_), - sigma11_(totalChannels_), - sigma12_(totalChannels_), - sigma13_(totalChannels_), - sigma20_(totalChannels_), - sigma21_(totalChannels_), - sigma22_(totalChannels_), - sigma23_(totalChannels_), - sigma30_(totalChannels_), - sigma31_(totalChannels_), - sigma32_(totalChannels_), - sigma33_(totalChannels_) { - auto const containers = pedestals.getAllContainers(); - - // fill in hb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - sigma00_[i] = *(barrelValues[i].getValues() /* + 0 */); - sigma01_[i] = *(barrelValues[i].getValues() + 1); - sigma02_[i] = *(barrelValues[i].getValues() + 2); - sigma03_[i] = *(barrelValues[i].getValues() + 3); - sigma10_[i] = *(barrelValues[i].getValues() + 3); - sigma11_[i] = *(barrelValues[i].getValues() + 5); - sigma12_[i] = *(barrelValues[i].getValues() + 6); - sigma13_[i] = *(barrelValues[i].getValues() + 7); - sigma20_[i] = *(barrelValues[i].getValues() + 8); - sigma21_[i] = *(barrelValues[i].getValues() + 9); - sigma22_[i] = *(barrelValues[i].getValues() + 10); - sigma23_[i] = *(barrelValues[i].getValues() + 11); - sigma30_[i] = *(barrelValues[i].getValues() + 12); - sigma31_[i] = *(barrelValues[i].getValues() + 13); - sigma32_[i] = *(barrelValues[i].getValues() + 14); - sigma33_[i] = *(barrelValues[i].getValues() + 15); - } - - // fill in he - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - sigma00_[i + offset] = *(endcapValues[i].getValues() /* + 0 */); - sigma01_[i + offset] = *(endcapValues[i].getValues() + 1); - sigma02_[i + offset] = *(endcapValues[i].getValues() + 2); - sigma03_[i + offset] = *(endcapValues[i].getValues() + 3); - sigma10_[i + offset] = *(endcapValues[i].getValues() + 3); - sigma11_[i + offset] = *(endcapValues[i].getValues() + 5); - sigma12_[i + offset] = *(endcapValues[i].getValues() + 6); - sigma13_[i + offset] = *(endcapValues[i].getValues() + 7); - sigma20_[i + offset] = *(endcapValues[i].getValues() + 8); - sigma21_[i + offset] = *(endcapValues[i].getValues() + 9); - sigma22_[i + offset] = *(endcapValues[i].getValues() + 10); - sigma23_[i + offset] = *(endcapValues[i].getValues() + 11); - sigma30_[i + offset] = *(endcapValues[i].getValues() + 12); - sigma31_[i + offset] = *(endcapValues[i].getValues() + 13); - sigma32_[i + offset] = *(endcapValues[i].getValues() + 14); - sigma33_[i + offset] = *(endcapValues[i].getValues() + 15); - } -} - -HcalPedestalWidthsGPU::Product const& HcalPedestalWidthsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalPedestalWidthsGPU::Product& product, cudaStream_t stream) { - // allocate - product.sigma00 = cms::cuda::make_device_unique(sigma00_.size(), stream); - product.sigma01 = cms::cuda::make_device_unique(sigma01_.size(), stream); - product.sigma02 = cms::cuda::make_device_unique(sigma02_.size(), stream); - product.sigma03 = cms::cuda::make_device_unique(sigma03_.size(), stream); - - product.sigma10 = cms::cuda::make_device_unique(sigma10_.size(), stream); - product.sigma11 = cms::cuda::make_device_unique(sigma11_.size(), stream); - product.sigma12 = cms::cuda::make_device_unique(sigma12_.size(), stream); - product.sigma13 = cms::cuda::make_device_unique(sigma13_.size(), stream); - - product.sigma20 = cms::cuda::make_device_unique(sigma20_.size(), stream); - product.sigma21 = cms::cuda::make_device_unique(sigma21_.size(), stream); - product.sigma22 = cms::cuda::make_device_unique(sigma22_.size(), stream); - product.sigma23 = cms::cuda::make_device_unique(sigma23_.size(), stream); - - product.sigma30 = cms::cuda::make_device_unique(sigma30_.size(), stream); - product.sigma31 = cms::cuda::make_device_unique(sigma31_.size(), stream); - product.sigma32 = cms::cuda::make_device_unique(sigma32_.size(), stream); - product.sigma33 = cms::cuda::make_device_unique(sigma33_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.sigma00, sigma00_, stream); - cms::cuda::copyAsync(product.sigma01, sigma01_, stream); - cms::cuda::copyAsync(product.sigma02, sigma02_, stream); - cms::cuda::copyAsync(product.sigma03, sigma03_, stream); - - cms::cuda::copyAsync(product.sigma10, sigma10_, stream); - cms::cuda::copyAsync(product.sigma11, sigma11_, stream); - cms::cuda::copyAsync(product.sigma12, sigma12_, stream); - cms::cuda::copyAsync(product.sigma13, sigma13_, stream); - - cms::cuda::copyAsync(product.sigma20, sigma20_, stream); - cms::cuda::copyAsync(product.sigma21, sigma21_, stream); - cms::cuda::copyAsync(product.sigma22, sigma22_, stream); - cms::cuda::copyAsync(product.sigma23, sigma23_, stream); - - cms::cuda::copyAsync(product.sigma30, sigma30_, stream); - cms::cuda::copyAsync(product.sigma31, sigma31_, stream); - cms::cuda::copyAsync(product.sigma32, sigma32_, stream); - cms::cuda::copyAsync(product.sigma33, sigma33_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalPedestalWidthsGPU); diff --git a/CondFormats/HcalObjects/src/HcalPedestalsGPU.cc b/CondFormats/HcalObjects/src/HcalPedestalsGPU.cc deleted file mode 100644 index 57088a4f39621..0000000000000 --- a/CondFormats/HcalObjects/src/HcalPedestalsGPU.cc +++ /dev/null @@ -1,65 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalPedestals.h" -#include "CondFormats/HcalObjects/interface/HcalPedestalsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalPedestalsGPU::HcalPedestalsGPU(HcalPedestals const& pedestals) - : unitIsADC_{pedestals.isADC()}, - totalChannels_{pedestals.getAllContainers()[0].second.size() + pedestals.getAllContainers()[1].second.size()}, - offsetForHashes_{static_cast(pedestals.getAllContainers()[0].second.size())}, - values_(totalChannels_ * 4), - widths_(totalChannels_ * 4) { -#ifdef HCAL_MAHI_CPUDEBUG - std::cout << "unitIsADC = " << unitIsADC_ << std::endl; -#endif - - auto const containers = pedestals.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - values_[i * 4] = barrelValues[i].getValue(0); - values_[i * 4 + 1] = barrelValues[i].getValue(1); - values_[i * 4 + 2] = barrelValues[i].getValue(2); - values_[i * 4 + 3] = barrelValues[i].getValue(3); - - widths_[i * 4] = barrelValues[i].getWidth(0); - widths_[i * 4 + 1] = barrelValues[i].getWidth(1); - widths_[i * 4 + 2] = barrelValues[i].getWidth(2); - widths_[i * 4 + 3] = barrelValues[i].getWidth(3); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - auto const off = offset + i; - values_[off * 4] = endcapValues[i].getValue(0); - values_[off * 4 + 1] = endcapValues[i].getValue(1); - values_[off * 4 + 2] = endcapValues[i].getValue(2); - values_[off * 4 + 3] = endcapValues[i].getValue(3); - - widths_[off * 4] = endcapValues[i].getWidth(0); - widths_[off * 4 + 1] = endcapValues[i].getWidth(1); - widths_[off * 4 + 2] = endcapValues[i].getWidth(2); - widths_[off * 4 + 3] = endcapValues[i].getWidth(3); - } -} - -HcalPedestalsGPU::Product const& HcalPedestalsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalPedestalsGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - product.widths = cms::cuda::make_device_unique(widths_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - cms::cuda::copyAsync(product.widths, widths_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalPedestalsGPU); diff --git a/CondFormats/HcalObjects/src/HcalQIECodersGPU.cc b/CondFormats/HcalObjects/src/HcalQIECodersGPU.cc deleted file mode 100644 index 2b9d9d4821e64..0000000000000 --- a/CondFormats/HcalObjects/src/HcalQIECodersGPU.cc +++ /dev/null @@ -1,51 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalQIECodersGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -HcalQIECodersGPU::HcalQIECodersGPU(HcalQIEData const& qiedata) - : totalChannels_{qiedata.getAllContainers()[0].second.size() + qiedata.getAllContainers()[1].second.size()}, - offsets_(totalChannels_ * numValuesPerChannel), - slopes_(totalChannels_ * numValuesPerChannel) { - auto const containers = qiedata.getAllContainers(); - - // fill in hb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - for (uint32_t k = 0; k < 4; k++) - for (uint32_t l = 0; l < 4; l++) { - auto const linear = k * 4 + l; - offsets_[i * numValuesPerChannel + linear] = barrelValues[i].offset(k, l); - slopes_[i * numValuesPerChannel + linear] = barrelValues[i].slope(k, l); - } - } - - // fill in he - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - auto const off = (i + offset) * numValuesPerChannel; - for (uint32_t k = 0; k < 4; k++) - for (uint32_t l = 0; l < 4; l++) { - auto const linear = k * 4u + l; - offsets_[off + linear] = endcapValues[i].offset(k, l); - slopes_[off + linear] = endcapValues[i].slope(k, l); - } - } -} - -HcalQIECodersGPU::Product const& HcalQIECodersGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalQIECodersGPU::Product& product, cudaStream_t stream) { - // allocate - product.offsets = cms::cuda::make_device_unique(offsets_.size(), stream); - product.slopes = cms::cuda::make_device_unique(slopes_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.offsets, offsets_, stream); - cms::cuda::copyAsync(product.slopes, slopes_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalQIECodersGPU); diff --git a/CondFormats/HcalObjects/src/HcalQIETypesGPU.cc b/CondFormats/HcalObjects/src/HcalQIETypesGPU.cc deleted file mode 100644 index 77a7bf81c33f0..0000000000000 --- a/CondFormats/HcalObjects/src/HcalQIETypesGPU.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypesGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalQIETypesGPU::HcalQIETypesGPU(HcalQIETypes const& parameters) - : values_(parameters.getAllContainers()[0].second.size() + parameters.getAllContainers()[1].second.size()) { - auto const containers = parameters.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - values_[i] = barrelValues[i].getValue(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - values_[i + offset] = endcapValues[i].getValue(); - } -} - -HcalQIETypesGPU::Product const& HcalQIETypesGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalQIETypesGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalQIETypesGPU); diff --git a/CondFormats/HcalObjects/src/HcalRecoParamsGPU.cc b/CondFormats/HcalObjects/src/HcalRecoParamsGPU.cc deleted file mode 100644 index 195028acf5746..0000000000000 --- a/CondFormats/HcalObjects/src/HcalRecoParamsGPU.cc +++ /dev/null @@ -1,44 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalRecoParams.h" -#include "CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalRecoParamsGPU::HcalRecoParamsGPU(HcalRecoParams const& recoParams) - : totalChannels_{recoParams.getAllContainers()[0].second.size() + recoParams.getAllContainers()[1].second.size()}, - param1_(totalChannels_), - param2_(totalChannels_) { - auto const& containers = recoParams.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - param1_[i] = barrelValues[i].param1(); - param2_[i] = barrelValues[i].param2(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - param1_[i + offset] = endcapValues[i].param1(); - param2_[i + offset] = endcapValues[i].param2(); - } -} - -HcalRecoParamsGPU::Product const& HcalRecoParamsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalRecoParamsGPU::Product& product, cudaStream_t stream) { - // allocate - product.param1 = cms::cuda::make_device_unique(param1_.size(), stream); - product.param2 = cms::cuda::make_device_unique(param2_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.param1, param1_, stream); - cms::cuda::copyAsync(product.param2, param2_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalRecoParamsGPU); diff --git a/CondFormats/HcalObjects/src/HcalRespCorrsGPU.cc b/CondFormats/HcalObjects/src/HcalRespCorrsGPU.cc deleted file mode 100644 index f688191352353..0000000000000 --- a/CondFormats/HcalObjects/src/HcalRespCorrsGPU.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalRespCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalRespCorrsGPU::HcalRespCorrsGPU(HcalRespCorrs const& respcorrs) - : values_(respcorrs.getAllContainers()[0].second.size() + respcorrs.getAllContainers()[1].second.size()) { - auto const containers = respcorrs.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - values_[i] = barrelValues[i].getValue(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - values_[i + offset] = endcapValues[i].getValue(); - } -} - -HcalRespCorrsGPU::Product const& HcalRespCorrsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalRespCorrsGPU::Product& product, cudaStream_t stream) { - // allocate - product.values = cms::cuda::make_device_unique(values_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.values, values_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalRespCorrsGPU); diff --git a/CondFormats/HcalObjects/src/HcalSiPMCharacteristicsGPU.cc b/CondFormats/HcalObjects/src/HcalSiPMCharacteristicsGPU.cc deleted file mode 100644 index 059eb8617c37d..0000000000000 --- a/CondFormats/HcalObjects/src/HcalSiPMCharacteristicsGPU.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristics.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h" -#include "FWCore/Utilities/interface/Exception.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -HcalSiPMCharacteristicsGPU::HcalSiPMCharacteristicsGPU(HcalSiPMCharacteristics const& parameters) - : pixels_(parameters.getTypes()), - auxi1_(parameters.getTypes()), - parLin1_(parameters.getTypes()), - parLin2_(parameters.getTypes()), - parLin3_(parameters.getTypes()), - crossTalk_(parameters.getTypes()), - auxi2_(parameters.getTypes()) { - for (uint32_t i = 0; i < parameters.getTypes(); i++) { - auto const type = parameters.getType(i); -#ifdef HCAL_MAHI_CPUDEBUG - printf("index = %u type = %d\n", i, type); -#endif - - // for now... - if (static_cast(type) != i + 1) - throw cms::Exception("HcalSiPMCharacteristics") - << "Wrong assumption for HcalSiPMcharacteristics type values, " - << "should be type value <- type index + 1" << std::endl - << "Observed type value = " << type << " and index = " << i << std::endl; - - pixels_[i] = parameters.getPixels(type); - auxi1_[i] = parameters.getAuxi1(type); - parLin1_[i] = parameters.getNonLinearities(type)[0]; - parLin2_[i] = parameters.getNonLinearities(type)[1]; - parLin3_[i] = parameters.getNonLinearities(type)[2]; - crossTalk_[i] = parameters.getCrossTalk(type); - auxi2_[i] = parameters.getAuxi2(type); - } -} - -HcalSiPMCharacteristicsGPU::Product const& HcalSiPMCharacteristicsGPU::getProduct(cudaStream_t stream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - stream, [this](HcalSiPMCharacteristicsGPU::Product& product, cudaStream_t stream) { - // allocate - product.pixels = cms::cuda::make_device_unique(pixels_.size(), stream); - product.auxi1 = cms::cuda::make_device_unique(auxi1_.size(), stream); - product.parLin1 = cms::cuda::make_device_unique(parLin1_.size(), stream); - product.parLin2 = cms::cuda::make_device_unique(parLin2_.size(), stream); - product.parLin3 = cms::cuda::make_device_unique(parLin3_.size(), stream); - product.crossTalk = cms::cuda::make_device_unique(crossTalk_.size(), stream); - product.auxi2 = cms::cuda::make_device_unique(auxi2_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.pixels, pixels_, stream); - cms::cuda::copyAsync(product.auxi1, auxi1_, stream); - cms::cuda::copyAsync(product.parLin1, parLin1_, stream); - cms::cuda::copyAsync(product.parLin2, parLin2_, stream); - cms::cuda::copyAsync(product.parLin3, parLin3_, stream); - cms::cuda::copyAsync(product.crossTalk, crossTalk_, stream); - cms::cuda::copyAsync(product.auxi2, auxi2_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalSiPMCharacteristicsGPU); diff --git a/CondFormats/HcalObjects/src/HcalSiPMParametersGPU.cc b/CondFormats/HcalObjects/src/HcalSiPMParametersGPU.cc deleted file mode 100644 index 88120e02e54b5..0000000000000 --- a/CondFormats/HcalObjects/src/HcalSiPMParametersGPU.cc +++ /dev/null @@ -1,61 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalSiPMParameters.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -HcalSiPMParametersGPU::HcalSiPMParametersGPU(HcalSiPMParameters const& parameters) - : totalChannels_{parameters.getAllContainers()[0].second.size() + parameters.getAllContainers()[1].second.size()}, - type_(totalChannels_), - auxi1_(totalChannels_), - fcByPE_(totalChannels_), - darkCurrent_(totalChannels_), - auxi2_(totalChannels_) { - auto const containers = parameters.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - auto const& item = barrelValues[i]; - type_[i] = item.getType(); - auxi1_[i] = item.getauxi1(); - fcByPE_[i] = item.getFCByPE(); - darkCurrent_[i] = item.getDarkCurrent(); - auxi2_[i] = item.getauxi2(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - auto const off = offset + i; - auto const& item = endcapValues[i]; - type_[off] = item.getType(); - auxi1_[off] = item.getauxi1(); - fcByPE_[off] = item.getFCByPE(); - darkCurrent_[off] = item.getDarkCurrent(); - auxi2_[off] = item.getauxi2(); - } -} - -HcalSiPMParametersGPU::Product const& HcalSiPMParametersGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalSiPMParametersGPU::Product& product, cudaStream_t stream) { - // allocate - product.type = cms::cuda::make_device_unique(type_.size(), stream); - product.auxi1 = cms::cuda::make_device_unique(auxi1_.size(), stream); - product.fcByPE = cms::cuda::make_device_unique(fcByPE_.size(), stream); - product.darkCurrent = cms::cuda::make_device_unique(darkCurrent_.size(), stream); - product.auxi2 = cms::cuda::make_device_unique(auxi2_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.type, type_, stream); - cms::cuda::copyAsync(product.auxi1, auxi1_, stream); - cms::cuda::copyAsync(product.fcByPE, fcByPE_, stream); - cms::cuda::copyAsync(product.darkCurrent, darkCurrent_, stream); - cms::cuda::copyAsync(product.auxi2, auxi2_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalSiPMParametersGPU); diff --git a/CondFormats/HcalObjects/src/HcalTimeCorrsGPU.cc b/CondFormats/HcalObjects/src/HcalTimeCorrsGPU.cc deleted file mode 100644 index 02ce05132479c..0000000000000 --- a/CondFormats/HcalObjects/src/HcalTimeCorrsGPU.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "CondFormats/HcalObjects/interface/HcalTimeCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" - -// FIXME: add proper getters to conditions -HcalTimeCorrsGPU::HcalTimeCorrsGPU(HcalTimeCorrs const& timecorrs) - : value_(timecorrs.getAllContainers()[0].second.size() + timecorrs.getAllContainers()[1].second.size()) { - auto const containers = timecorrs.getAllContainers(); - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - value_[i] = barrelValues[i].getValue(); - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - value_[i + offset] = endcapValues[i].getValue(); - } -} - -HcalTimeCorrsGPU::Product const& HcalTimeCorrsGPU::getProduct(cudaStream_t stream) const { - auto const& product = - product_.dataForCurrentDeviceAsync(stream, [this](HcalTimeCorrsGPU::Product& product, cudaStream_t stream) { - // allocate - product.value = cms::cuda::make_device_unique(value_.size(), stream); - - // transfer - cms::cuda::copyAsync(product.value, value_, stream); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalTimeCorrsGPU); diff --git a/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py b/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py index 078e8f2804f60..c26e9f93cc7f0 100644 --- a/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py +++ b/RecoLocalCalo/Configuration/python/hcalGlobalReco_cff.py @@ -1,14 +1,10 @@ import FWCore.ParameterSet.Config as cms -from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA #--- for Run 1 and Run 2 from RecoLocalCalo.HcalRecProducers.HBHEIsolatedNoiseReflagger_cfi import hbhereco as _phase0_hbhereco -hbhereco = SwitchProducerCUDA( - cpu = _phase0_hbhereco.clone() -) +hbhereco = _phase0_hbhereco.clone() hbherecoLegacy = _phase0_hbhereco.clone() - hcalGlobalRecoTask = cms.Task(hbhereco) hcalGlobalRecoSequence = cms.Sequence(hcalGlobalRecoTask) @@ -22,26 +18,17 @@ from Configuration.Eras.Modifier_run3_HB_cff import run3_HB from RecoLocalCalo.HcalRecProducers.HBHEPhase1Reconstructor_cfi import hbheprereco as _phase1_hbheprereco -run3_HB.toReplaceWith(hbhereco.cpu, _phase1_hbheprereco) +run3_HB.toReplaceWith(hbhereco, _phase1_hbheprereco) run3_HB.toReplaceWith(hcalOnlyGlobalRecoTask, cms.Task(hbhereco)) run3_HB.toReplaceWith(hbherecoLegacy, _phase1_hbheprereco) run3_HB.toReplaceWith(hcalOnlyLegacyGlobalRecoTask, cms.Task(hbherecoLegacy)) - #--- for Run 3 on GPU -from Configuration.ProcessModifiers.gpu_cff import gpu from Configuration.ProcessModifiers.alpaka_cff import alpaka -from RecoLocalCalo.HcalRecProducers.hcalCPURecHitsProducer_cfi import hcalCPURecHitsProducer as _hbherecoFromCUDA -(run3_HB & gpu).toModify(hbhereco, - cuda = _hbherecoFromCUDA.clone( - produceSoA = False - ) -) - from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy -(alpaka & run3_HB).toModify(hbhereco, - cpu = hcalRecHitSoAToLegacy.clone( +(alpaka & run3_HB).toReplaceWith(hbhereco, + hcalRecHitSoAToLegacy.clone( src = ("hbheRecHitProducerPortable","") ) ) diff --git a/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py b/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py index 1b797e466f76e..2d0043f28e160 100644 --- a/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py +++ b/RecoLocalCalo/Configuration/python/hcalLocalReco_cff.py @@ -1,18 +1,15 @@ import FWCore.ParameterSet.Config as cms -from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from RecoLocalCalo.HcalRecAlgos.hcalRecAlgoESProd_cfi import * from RecoLocalCalo.HcalRecAlgos.hcalChannelPropertiesESProd_cfi import * hcalOOTPileupESProducer = cms.ESProducer('OOTPileupDBCompatibilityESProducer') from RecoLocalCalo.HcalRecProducers.HBHEPhase1Reconstructor_cfi import hbheprereco as _phase1_hbheprereco -hbheprereco = SwitchProducerCUDA( - cpu = _phase1_hbheprereco.clone( - processQIE11 = False, - tsFromDB = True, - pulseShapeParametersQIE8 = dict( - TrianglePeakTS = 4, - ) +hbheprereco = _phase1_hbheprereco.clone( + processQIE11 = False, + tsFromDB = True, + pulseShapeParametersQIE8 = dict( + TrianglePeakTS = 4, ) ) @@ -37,9 +34,7 @@ run2_HF_2017.toReplaceWith(hcalLocalRecoTask, _phase1_hcalLocalRecoTask) run2_HF_2017.toReplaceWith(hfreco, _phase1_hfreco) from Configuration.Eras.Modifier_run2_HCAL_2017_cff import run2_HCAL_2017 -run2_HCAL_2017.toModify(hbheprereco, - cpu = _phase1_hbheprereco.clone() -) +run2_HCAL_2017.toReplaceWith(hbheprereco, _phase1_hbheprereco.clone()) _plan1_hcalLocalRecoTask = _phase1_hcalLocalRecoTask.copy() _plan1_hcalLocalRecoTask.add(hbheplan1) @@ -53,7 +48,7 @@ run2_HECollapse_2018.toReplaceWith(hcalLocalRecoTask, _collapse_hcalLocalRecoTask) #--- Legacy HCAL Only Task -hbheprerecoLegacy = hbheprereco.cpu.clone() +hbheprerecoLegacy = hbheprereco.clone() hcalOnlyLegacyLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco,hbheprereco]) hcalOnlyLegacyLocalRecoTask.add(hbheprerecoLegacy) @@ -67,14 +62,6 @@ from Configuration.Eras.Modifier_run3_common_cff import run3_common run3_common.toReplaceWith(hcalLocalRecoTask, _run3_hcalLocalRecoTask) -#--- for Run 3 on GPU -from Configuration.ProcessModifiers.gpu_cff import gpu - -from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerGPUTask_cff import * -_run3_hcalLocalRecoGPUTask = hcalLocalRecoTask.copy() -_run3_hcalLocalRecoGPUTask.add(hbheRecHitProducerGPUTask) -gpu.toReplaceWith(hcalLocalRecoTask, _run3_hcalLocalRecoGPUTask) - #--- for alpaka from Configuration.ProcessModifiers.alpaka_cff import alpaka from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerPortableTask_cff import * @@ -87,19 +74,10 @@ #--- HCAL-only workflow for Run 2 on GPU from Configuration.Eras.Modifier_run3_HB_cff import run3_HB -from RecoLocalCalo.HcalRecProducers.hcalCPURecHitsProducer_cfi import hcalCPURecHitsProducer as _hbheprerecoFromCUDA -(gpu & ~run3_HB).toModify(hbheprereco, - cuda = _hbheprerecoFromCUDA.clone( - produceSoA = False - ) -) -#--- HCAL-only workflow for Run 2 on GPU -from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy -(alpaka & ~run3_HB).toModify(hbheprereco, - cpu = hcalRecHitSoAToLegacy.clone() -) +from RecoLocalCalo.HcalRecProducers.hcalRecHitSoAToLegacy_cfi import hcalRecHitSoAToLegacy +(alpaka & ~run3_HB).toReplaceWith(hbheprereco, hcalRecHitSoAToLegacy.clone()) #--- for FastSim _fastSim_hcalLocalRecoTask = hcalLocalRecoTask.copyAndExclude([zdcreco,zdcrecoRun3]) from Configuration.Eras.Modifier_fastSim_cff import fastSim -fastSim.toReplaceWith( hcalLocalRecoTask, _fastSim_hcalLocalRecoTask ) +fastSim.toReplaceWith(hcalLocalRecoTask, _fastSim_hcalLocalRecoTask) diff --git a/RecoLocalCalo/HcalRecAlgos/BuildFile.xml b/RecoLocalCalo/HcalRecAlgos/BuildFile.xml index 85da8e5e3980f..172c6920b8c31 100644 --- a/RecoLocalCalo/HcalRecAlgos/BuildFile.xml +++ b/RecoLocalCalo/HcalRecAlgos/BuildFile.xml @@ -1,6 +1,5 @@ - @@ -14,8 +13,6 @@ - - diff --git a/RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h b/RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h deleted file mode 100644 index 9be49a8f7bd2d..0000000000000 --- a/RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef RecoLocalCalo_HcalRecAlgos_interface_HcalMahiPulseOffsetsGPU_h -#define RecoLocalCalo_HcalRecAlgos_interface_HcalMahiPulseOffsetsGPU_h - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalMahiPulseOffsetsGPU { -public: - struct Product { - ~Product(); - int* values; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalMahiPulseOffsetsGPU(std::vector const& values); - - // will trigger deallocation of Product thru ~Product - ~HcalMahiPulseOffsetsGPU() = default; - - std::vector> const& getValues() const { return values_; } - - // get device pointers - Product const& getProduct(cudaStream_t) const; - -private: - std::vector> values_; - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h b/RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h deleted file mode 100644 index 965fb873bcf88..0000000000000 --- a/RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef RecoLocalCalo_HcalRecAlgos_interface_HcalRecoParamsWithPulseShapesGPU_h -#define RecoLocalCalo_HcalRecAlgos_interface_HcalRecoParamsWithPulseShapesGPU_h - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#endif - -class HcalRecoParams; - -// -// TODO: HcalPulseShapes will need to be used via ESSource -// This is a workaround: precompute/store/transfer what's needed only -// -class HcalRecoParamsWithPulseShapesGPU { -public: - struct Product { - ~Product(); - uint32_t *param1 = nullptr, *param2 = nullptr; - uint32_t *ids = nullptr; - - // These guys come directly from PulseShapeFunctor class - float *acc25nsVec = nullptr, *diff25nsItvlVec = nullptr, *accVarLenIdxMinusOneVec = nullptr, - *diffVarItvlIdxMinusOneVec = nullptr, *accVarLenIdxZEROVec = nullptr, *diffVarItvlIdxZEROVec = nullptr; - }; - -#ifndef __CUDACC__ - // rearrange reco params - HcalRecoParamsWithPulseShapesGPU(HcalRecoParams const &); - - // will trigger deallocation of Product thru ~Product - ~HcalRecoParamsWithPulseShapesGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - -private: - uint64_t totalChannels_; // hb + he - std::vector> param1_; - std::vector> param2_; - std::vector> ids_; - - std::vector> acc25nsVec_; // 256 - std::vector> diff25nsItvlVec_; // 256 - std::vector> accVarLenIdxMinusOneVec_; // 25 - std::vector> diffVarItvlIdxMinusOneVec_; // 25 - std::vector> accVarLenIdxZEROVec_; // 25 - std::vector> diffVarItvlIdxZEROVec_; // 25 - - cms::cuda::ESProduct product_; -#endif -}; - -#endif diff --git a/RecoLocalCalo/HcalRecAlgos/src/HcalMahiPulseOffsetsGPU.cc b/RecoLocalCalo/HcalRecAlgos/src/HcalMahiPulseOffsetsGPU.cc deleted file mode 100644 index d36d00600f7a0..0000000000000 --- a/RecoLocalCalo/HcalRecAlgos/src/HcalMahiPulseOffsetsGPU.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h" - -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -// FIXME: add proper getters to conditions -HcalMahiPulseOffsetsGPU::HcalMahiPulseOffsetsGPU(std::vector const& values) { - values_.resize(values.size()); - std::copy(values.begin(), values.end(), values_.begin()); -} - -HcalMahiPulseOffsetsGPU::Product::~Product() { - // deallocation - cudaCheck(cudaFree(values)); -} - -HcalMahiPulseOffsetsGPU::Product const& HcalMahiPulseOffsetsGPU::getProduct(cudaStream_t cudaStream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - cudaStream, [this](HcalMahiPulseOffsetsGPU::Product& product, cudaStream_t cudaStream) { - // malloc - cudaCheck(cudaMalloc((void**)&product.values, this->values_.size() * sizeof(int))); - - // transfer - cudaCheck(cudaMemcpyAsync(product.values, - this->values_.data(), - this->values_.size() * sizeof(int), - cudaMemcpyHostToDevice, - cudaStream)); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalMahiPulseOffsetsGPU); diff --git a/RecoLocalCalo/HcalRecAlgos/src/HcalRecoParamsWithPulseShapesGPU.cc b/RecoLocalCalo/HcalRecAlgos/src/HcalRecoParamsWithPulseShapesGPU.cc deleted file mode 100644 index b42621b98908e..0000000000000 --- a/RecoLocalCalo/HcalRecAlgos/src/HcalRecoParamsWithPulseShapesGPU.cc +++ /dev/null @@ -1,222 +0,0 @@ -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h" - -#include "CondFormats/HcalObjects/interface/HcalRecoParams.h" -#include "CalibCalorimetry/HcalAlgos/interface/HcalPulseShapes.h" -#include "RecoLocalCalo/HcalRecAlgos/interface/PulseShapeFunctor.h" - -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include - -// FIXME: add proper getters to conditions -HcalRecoParamsWithPulseShapesGPU::HcalRecoParamsWithPulseShapesGPU(HcalRecoParams const& recoParams) - : totalChannels_{recoParams.getAllContainers()[0].second.size() + recoParams.getAllContainers()[1].second.size()}, - param1_(totalChannels_), - param2_(totalChannels_), - ids_(totalChannels_) { -#ifdef HCAL_MAHI_CPUDEBUG - printf("hello from a reco params with pulse shapes\n"); -#endif - - auto const containers = recoParams.getAllContainers(); - - HcalPulseShapes pulseShapes; - std::unordered_map idCache; - - // fill in eb - auto const& barrelValues = containers[0].second; - for (uint64_t i = 0; i < barrelValues.size(); ++i) { - param1_[i] = barrelValues[i].param1(); - param2_[i] = barrelValues[i].param2(); - - auto const pulseShapeId = barrelValues[i].pulseShapeID(); - // FIXME: 0 throws upon look up to HcalPulseShapes - // although comments state that 0 is reserved, - // HcalPulseShapes::getShape throws on 0! - if (pulseShapeId == 0) { - ids_[i] = 0; - continue; - } - if (auto const iter = idCache.find(pulseShapeId); iter == idCache.end()) { - // new guy - auto const newId = idCache.size(); - idCache[pulseShapeId] = newId; - // this will be the id - ids_[i] = newId; - - // resize value arrays - acc25nsVec_.resize(acc25nsVec_.size() + hcal::constants::maxPSshapeBin); - diff25nsItvlVec_.resize(diff25nsItvlVec_.size() + hcal::constants::maxPSshapeBin); - accVarLenIdxMinusOneVec_.resize(accVarLenIdxMinusOneVec_.size() + hcal::constants::nsPerBX); - diffVarItvlIdxMinusOneVec_.resize(diffVarItvlIdxMinusOneVec_.size() + hcal::constants::nsPerBX); - accVarLenIdxZEROVec_.resize(accVarLenIdxZEROVec_.size() + hcal::constants::nsPerBX); - diffVarItvlIdxZEROVec_.resize(diffVarItvlIdxZEROVec_.size() + hcal::constants::nsPerBX); - - // precompute and get values from the functor - auto const& pulseShape = pulseShapes.getShape(pulseShapeId); - FitterFuncs::PulseShapeFunctor functor{pulseShape, false, false, false, 1, 0, 0, hcal::constants::maxSamples}; - auto const offset256 = newId * hcal::constants::maxPSshapeBin; - auto const offset25 = newId * hcal::constants::nsPerBX; - auto const numShapes = newId; - for (int i = 0; i < hcal::constants::maxPSshapeBin; i++) { - acc25nsVec_[offset256 * numShapes + i] = functor.acc25nsVec()[i]; - diff25nsItvlVec_[offset256 * numShapes + i] = functor.diff25nsItvlVec()[i]; - } - - for (int i = 0; i < hcal::constants::nsPerBX; i++) { - accVarLenIdxMinusOneVec_[offset25 * numShapes + i] = functor.accVarLenIdxMinusOneVec()[i]; - diffVarItvlIdxMinusOneVec_[offset25 * numShapes + i] = functor.diffVarItvlIdxMinusOneVec()[i]; - accVarLenIdxZEROVec_[offset25 * numShapes + i] = functor.accVarLenIdxZEROVec()[i]; - diffVarItvlIdxZEROVec_[offset25 * numShapes + i] = functor.diffVarItvlIdxZEROVec()[i]; - } - } else { - // already recorded this pulse shape, just set id - ids_[i] = iter->second; - } -#ifdef HCAL_MAHI_CPUDEBUG - if (barrelValues[i].rawId() == DETID_TO_DEBUG) { - printf("recoShapeId = %u myid = %u\n", pulseShapeId, ids_[i]); - } -#endif - } - - // fill in ee - auto const& endcapValues = containers[1].second; - auto const offset = barrelValues.size(); - for (uint64_t i = 0; i < endcapValues.size(); ++i) { - param1_[i + offset] = endcapValues[i].param1(); - param2_[i + offset] = endcapValues[i].param2(); - - auto const pulseShapeId = endcapValues[i].pulseShapeID(); - // FIXME: 0 throws upon look up to HcalPulseShapes - // although comments state that 0 is reserved, - // HcalPulseShapes::getShape throws on 0! - if (pulseShapeId == 0) { - ids_[i + offset] = 0; - continue; - } - if (auto const iter = idCache.find(pulseShapeId); iter == idCache.end()) { - // new guy - auto const newId = idCache.size(); - idCache[pulseShapeId] = newId; - // this will be the id - ids_[i + offset] = newId; - - // resize value arrays - acc25nsVec_.resize(acc25nsVec_.size() + hcal::constants::maxPSshapeBin); - diff25nsItvlVec_.resize(diff25nsItvlVec_.size() + hcal::constants::maxPSshapeBin); - accVarLenIdxMinusOneVec_.resize(accVarLenIdxMinusOneVec_.size() + hcal::constants::nsPerBX); - diffVarItvlIdxMinusOneVec_.resize(diffVarItvlIdxMinusOneVec_.size() + hcal::constants::nsPerBX); - accVarLenIdxZEROVec_.resize(accVarLenIdxZEROVec_.size() + hcal::constants::nsPerBX); - diffVarItvlIdxZEROVec_.resize(diffVarItvlIdxZEROVec_.size() + hcal::constants::nsPerBX); - - // precompute and get values from the functor - auto const& pulseShape = pulseShapes.getShape(pulseShapeId); - FitterFuncs::PulseShapeFunctor functor{pulseShape, false, false, false, 1, 0, 0, hcal::constants::maxSamples}; - auto const offset256 = newId * hcal::constants::maxPSshapeBin; - auto const offset25 = newId * hcal::constants::nsPerBX; - auto const numShapes = newId; - for (int i = 0; i < hcal::constants::maxPSshapeBin; i++) { - acc25nsVec_[offset256 * numShapes + i] = functor.acc25nsVec()[i]; - diff25nsItvlVec_[offset256 * numShapes + i] = functor.diff25nsItvlVec()[i]; - } - - for (int i = 0; i < hcal::constants::nsPerBX; i++) { - accVarLenIdxMinusOneVec_[offset25 * numShapes + i] = functor.accVarLenIdxMinusOneVec()[i]; - diffVarItvlIdxMinusOneVec_[offset25 * numShapes + i] = functor.diffVarItvlIdxMinusOneVec()[i]; - accVarLenIdxZEROVec_[offset25 * numShapes + i] = functor.accVarLenIdxZEROVec()[i]; - diffVarItvlIdxZEROVec_[offset25 * numShapes + i] = functor.diffVarItvlIdxZEROVec()[i]; - } - } else { - // already recorded this pulse shape, just set id - ids_[i + offset] = iter->second; - } - } - -#ifdef HCAL_MAHI_CPUDEBUG - for (auto const& p : idCache) - printf("recoPulseShapeId = %u id = %u\n", p.first, p.second); -#endif -} - -HcalRecoParamsWithPulseShapesGPU::Product::~Product() { - // deallocation - cudaCheck(cudaFree(param1)); - cudaCheck(cudaFree(param2)); - cudaCheck(cudaFree(ids)); - cudaCheck(cudaFree(acc25nsVec)); - cudaCheck(cudaFree(diff25nsItvlVec)); - cudaCheck(cudaFree(accVarLenIdxMinusOneVec)); - cudaCheck(cudaFree(diffVarItvlIdxMinusOneVec)); - cudaCheck(cudaFree(accVarLenIdxZEROVec)); - cudaCheck(cudaFree(diffVarItvlIdxZEROVec)); -} - -HcalRecoParamsWithPulseShapesGPU::Product const& HcalRecoParamsWithPulseShapesGPU::getProduct( - cudaStream_t cudaStream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - cudaStream, [this](HcalRecoParamsWithPulseShapesGPU::Product& product, cudaStream_t cudaStream) { - // malloc - cudaCheck(cudaMalloc((void**)&product.param1, this->param1_.size() * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&product.param2, this->param2_.size() * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&product.ids, this->ids_.size() * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&product.acc25nsVec, this->acc25nsVec_.size() * sizeof(float))); - cudaCheck(cudaMalloc((void**)&product.diff25nsItvlVec, this->diff25nsItvlVec_.size() * sizeof(float))); - cudaCheck(cudaMalloc((void**)&product.accVarLenIdxMinusOneVec, - this->accVarLenIdxMinusOneVec_.size() * sizeof(float))); - cudaCheck(cudaMalloc((void**)&product.diffVarItvlIdxMinusOneVec, - this->diffVarItvlIdxMinusOneVec_.size() * sizeof(float))); - cudaCheck(cudaMalloc((void**)&product.accVarLenIdxZEROVec, this->accVarLenIdxZEROVec_.size() * sizeof(float))); - cudaCheck( - cudaMalloc((void**)&product.diffVarItvlIdxZEROVec, this->diffVarItvlIdxZEROVec_.size() * sizeof(float))); - - // transfer - cudaCheck(cudaMemcpyAsync(product.param1, - this->param1_.data(), - this->param1_.size() * sizeof(uint32_t), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.param2, - this->param2_.data(), - this->param2_.size() * sizeof(uint32_t), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync( - product.ids, this->ids_.data(), this->ids_.size() * sizeof(uint32_t), cudaMemcpyHostToDevice, cudaStream)); - cudaCheck(cudaMemcpyAsync(product.acc25nsVec, - this->acc25nsVec_.data(), - this->acc25nsVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.diff25nsItvlVec, - this->diff25nsItvlVec_.data(), - this->diff25nsItvlVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.accVarLenIdxMinusOneVec, - this->accVarLenIdxMinusOneVec_.data(), - this->accVarLenIdxMinusOneVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.diffVarItvlIdxMinusOneVec, - this->diffVarItvlIdxMinusOneVec_.data(), - this->diffVarItvlIdxMinusOneVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.accVarLenIdxZEROVec, - this->accVarLenIdxZEROVec_.data(), - this->accVarLenIdxZEROVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.diffVarItvlIdxZEROVec, - this->diffVarItvlIdxZEROVec_.data(), - this->diffVarItvlIdxZEROVec_.size() * sizeof(float), - cudaMemcpyHostToDevice, - cudaStream)); - }); - - return product; -} - -TYPELOOKUP_DATA_REG(HcalRecoParamsWithPulseShapesGPU); diff --git a/RecoLocalCalo/HcalRecProducers/BuildFile.xml b/RecoLocalCalo/HcalRecProducers/BuildFile.xml index c493d2a6f72af..6a33febef85e0 100644 --- a/RecoLocalCalo/HcalRecProducers/BuildFile.xml +++ b/RecoLocalCalo/HcalRecProducers/BuildFile.xml @@ -1,7 +1,4 @@ - - - @@ -14,12 +11,6 @@ - - - - - - diff --git a/RecoLocalCalo/HcalRecProducers/bin/BuildFile.xml b/RecoLocalCalo/HcalRecProducers/bin/BuildFile.xml deleted file mode 100644 index 2b1079ddb6874..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/bin/BuildFile.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/RecoLocalCalo/HcalRecProducers/bin/makeHcalRecHitGpuValidationPlots.cpp b/RecoLocalCalo/HcalRecProducers/bin/makeHcalRecHitGpuValidationPlots.cpp deleted file mode 100644 index 866ebbb304cd3..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/bin/makeHcalRecHitGpuValidationPlots.cpp +++ /dev/null @@ -1,283 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "DataFormats/HcalRecHit/interface/HcalRecHitCollections.h" -#include "DataFormats/Common/interface/Wrapper.h" -//#include "CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h" - -#define CREATE_HIST_1D(varname, nbins, first, last) auto varname = new TH1D(#varname, #varname, nbins, first, last) - -#define CREATE_HIST_2D(varname, nbins, first, last) \ - auto varname = new TH2D(#varname, #varname, nbins, first, last, nbins, first, last) - -int main(int argc, char* argv[]) { - if (argc < 3) { - std::cout << "run with: ./ \n"; - exit(0); - } - - std::string inFileName{argv[1]}; - std::string outFileName{argv[2]}; - - // branches to use - edm::Wrapper* wcpu = nullptr; - edm::Wrapper* wgpu = nullptr; - // edm::Wrapper>> *wgpu=nullptr; - - // prep output - TFile rfout{outFileName.c_str(), "recreate"}; - - CREATE_HIST_1D(hEnergyM0HBGPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyM0HEGPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyM0HBCPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyM0HECPU, 1000, 0, 100); - - CREATE_HIST_1D(hEnergyHBGPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyHBCPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyHEGPU, 1000, 0, 100); - CREATE_HIST_1D(hEnergyHECPU, 1000, 0, 100); - - CREATE_HIST_1D(hChi2HBGPU, 1000, 0, 100); - CREATE_HIST_1D(hChi2HBCPU, 1000, 0, 100); - CREATE_HIST_1D(hChi2HEGPU, 1000, 0, 100); - CREATE_HIST_1D(hChi2HECPU, 1000, 0, 100); - - CREATE_HIST_2D(hEnergyHBGPUvsCPU, 1000, 0, 100); - CREATE_HIST_2D(hEnergyHEGPUvsCPU, 1000, 0, 100); - CREATE_HIST_2D(hChi2HBGPUvsCPU, 1000, 0, 100); - CREATE_HIST_2D(hChi2HEGPUvsCPU, 1000, 0, 100); - - CREATE_HIST_2D(hEnergyM0HBGPUvsCPU, 1000, 0, 100); - CREATE_HIST_2D(hEnergyM0HEGPUvsCPU, 1000, 0, 100); - - // prep input - TFile rfin{inFileName.c_str()}; - TTree* rt = (TTree*)rfin.Get("Events"); - rt->SetBranchAddress("HBHERecHitsSorted_hcalCPURecHitsProducer_recHitsLegacyHBHE_RECO.", &wgpu); - // rt->SetBranchAddress("hcalCUDAHostAllocatorAliashcalcommonVecStoragePolicyhcalRecHitCollection_hcalCPURecHitsProducer_recHitsM0LabelOut_RECO.", &wgpu); - rt->SetBranchAddress("HBHERecHitsSorted_hbheprereco__RECO.", &wcpu); - - // accumulate - auto const nentries = rt->GetEntries(); - std::cout << ">>> nentries = " << nentries << std::endl; - for (int ie = 0; ie < nentries; ++ie) { - rt->GetEntry(ie); - - auto const& gpuProduct = wgpu->bareProduct(); - auto const& cpuProduct = wcpu->bareProduct(); - - auto const ncpu = cpuProduct.size(); - auto const ngpu = gpuProduct.size(); - // auto const ngpu = gpuProduct.energy.size(); - - if (ngpu != ncpu) { - std::cerr << "*** mismatch in number of rec hits for event " << ie << std::endl - << ">>> ngpu = " << ngpu << std::endl - << ">>> ncpu = " << ncpu << std::endl; - } - - for (uint32_t ich = 0; ich < ncpu; ich++) { - auto const& cpurh = cpuProduct[ich]; - auto const& did = cpurh.id(); - auto iter2gpu = gpuProduct.find(did); - // auto iter2idgpu = std::find( - // gpuProduct.did.begin(), gpuProduct.did.end(), did.rawId()); - - if (iter2gpu == gpuProduct.end()) { - std::cerr << "missing " << did << std::endl; - continue; - } - - assert(iter2gpu->id().rawId() == did.rawId()); - - auto const gpu_energy_m0 = iter2gpu->eraw(); - auto const cpu_energy_m0 = cpurh.eraw(); - auto const gpu_energy = iter2gpu->energy(); - auto const cpu_energy = cpurh.energy(); - auto const gpu_chi2 = iter2gpu->chi2(); - auto const cpu_chi2 = cpurh.chi2(); - - if (did.subdetId() == HcalBarrel) { - hEnergyM0HBGPU->Fill(gpu_energy_m0); - hEnergyM0HBCPU->Fill(cpu_energy_m0); - hEnergyM0HBGPUvsCPU->Fill(cpu_energy_m0, gpu_energy_m0); - - hEnergyHBGPU->Fill(gpu_energy); - hEnergyHBCPU->Fill(cpu_energy); - hEnergyHBGPUvsCPU->Fill(cpu_energy, gpu_energy); - hChi2HBGPU->Fill(gpu_chi2); - hChi2HBCPU->Fill(cpu_chi2); - hChi2HBGPUvsCPU->Fill(cpu_chi2, gpu_chi2); - } else if (did.subdetId() == HcalEndcap) { - hEnergyM0HEGPU->Fill(gpu_energy_m0); - hEnergyM0HECPU->Fill(cpu_energy_m0); - hEnergyM0HEGPUvsCPU->Fill(cpu_energy_m0, gpu_energy_m0); - - hEnergyHEGPU->Fill(gpu_energy); - hEnergyHECPU->Fill(cpu_energy); - hEnergyHEGPUvsCPU->Fill(cpu_energy, gpu_energy); - - hChi2HEGPU->Fill(gpu_chi2); - hChi2HECPU->Fill(cpu_chi2); - hChi2HEGPUvsCPU->Fill(cpu_chi2, gpu_chi2); - } - } - } - - { - TCanvas c{"plots", "plots", 4200, 6200}; - c.Divide(4, 3); - c.cd(1); - { - gPad->SetLogy(); - hEnergyM0HBCPU->SetLineColor(kBlack); - hEnergyM0HBCPU->SetLineWidth(1.); - hEnergyM0HBCPU->Draw(""); - hEnergyM0HBGPU->SetLineColor(kBlue); - hEnergyM0HBGPU->SetLineWidth(1.); - hEnergyM0HBGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hEnergyM0HBGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(2); - { - gPad->SetLogz(); - hEnergyM0HBGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hEnergyM0HBGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hEnergyM0HBGPUvsCPU->Draw("colz"); - } - c.cd(3); - { - gPad->SetLogy(); - hEnergyM0HECPU->SetLineColor(kBlack); - hEnergyM0HECPU->SetLineWidth(1.); - hEnergyM0HECPU->Draw(""); - hEnergyM0HEGPU->SetLineColor(kBlue); - hEnergyM0HEGPU->SetLineWidth(1.); - hEnergyM0HEGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hEnergyM0HEGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(4); - { - gPad->SetLogz(); - hEnergyM0HEGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hEnergyM0HEGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hEnergyM0HEGPUvsCPU->Draw("colz"); - } - c.cd(5); - { - gPad->SetLogy(); - hEnergyHBCPU->SetLineColor(kBlack); - hEnergyHBCPU->SetLineWidth(1.); - hEnergyHBCPU->Draw(""); - hEnergyHBGPU->SetLineColor(kBlue); - hEnergyHBGPU->SetLineWidth(1.); - hEnergyHBGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hEnergyHBGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(6); - { - gPad->SetLogz(); - hEnergyHBGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hEnergyHBGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hEnergyHBGPUvsCPU->Draw("colz"); - } - c.cd(7); - { - gPad->SetLogy(); - hEnergyHECPU->SetLineColor(kBlack); - hEnergyHECPU->SetLineWidth(1.); - hEnergyHECPU->Draw(""); - hEnergyHEGPU->SetLineColor(kBlue); - hEnergyHEGPU->SetLineWidth(1.); - hEnergyHEGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hEnergyHEGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(8); - { - gPad->SetLogz(); - hEnergyHEGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hEnergyHEGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hEnergyHEGPUvsCPU->Draw("colz"); - } - c.cd(9); - { - gPad->SetLogy(); - hChi2HBCPU->SetLineColor(kBlack); - hChi2HBCPU->SetLineWidth(1.); - hChi2HBCPU->Draw(""); - hChi2HBGPU->SetLineColor(kBlue); - hChi2HBGPU->SetLineWidth(1.); - hChi2HBGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hChi2HBGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(10); - { - gPad->SetLogz(); - hChi2HBGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hChi2HBGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hChi2HBGPUvsCPU->Draw("colz"); - } - c.cd(11); - { - gPad->SetLogy(); - hChi2HECPU->SetLineColor(kBlack); - hChi2HECPU->SetLineWidth(1.); - hChi2HECPU->Draw(""); - hChi2HEGPU->SetLineColor(kBlue); - hChi2HEGPU->SetLineWidth(1.); - hChi2HEGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hChi2HEGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(12); - { - gPad->SetLogz(); - hChi2HEGPUvsCPU->GetXaxis()->SetTitle("cpu"); - hChi2HEGPUvsCPU->GetYaxis()->SetTitle("gpu"); - hChi2HEGPUvsCPU->Draw("colz"); - } - c.SaveAs("plots.pdf"); - } - - rfin.Close(); - rfout.Write(); - rfout.Close(); -} diff --git a/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerGPUTask_cff.py b/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerGPUTask_cff.py deleted file mode 100644 index ab2d6c96d0a79..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/python/hbheRecHitProducerGPUTask_cff.py +++ /dev/null @@ -1,67 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -# Run 3 HCAL workflow on GPU - -# EventSetup modules used by HBHERecHitProducerGPU -from RecoLocalCalo.HcalRecProducers.hcalGainsGPUESProducer_cfi import hcalGainsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalGainWidthsGPUESProducer_cfi import hcalGainWidthsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalLUTCorrsGPUESProducer_cfi import hcalLUTCorrsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalConvertedPedestalsGPUESProducer_cfi import hcalConvertedPedestalsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalConvertedEffectivePedestalsGPUESProducer_cfi import hcalConvertedEffectivePedestalsGPUESProducer -hcalConvertedEffectivePedestalsGPUESProducer.label0 = "withTopoEff" - -from RecoLocalCalo.HcalRecProducers.hcalConvertedPedestalWidthsGPUESProducer_cfi import hcalConvertedPedestalWidthsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalConvertedEffectivePedestalWidthsGPUESProducer_cfi import hcalConvertedEffectivePedestalWidthsGPUESProducer -hcalConvertedEffectivePedestalWidthsGPUESProducer.label0 = "withTopoEff" -hcalConvertedEffectivePedestalWidthsGPUESProducer.label1 = "withTopoEff" - -from RecoLocalCalo.HcalRecProducers.hcalChannelQualityGPUESProducer_cfi import hcalChannelQualityGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalQIECodersGPUESProducer_cfi import hcalQIECodersGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalRecoParamsWithPulseShapesGPUESProducer_cfi import hcalRecoParamsWithPulseShapesGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalRespCorrsGPUESProducer_cfi import hcalRespCorrsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalTimeCorrsGPUESProducer_cfi import hcalTimeCorrsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalQIETypesGPUESProducer_cfi import hcalQIETypesGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalSiPMParametersGPUESProducer_cfi import hcalSiPMParametersGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalSiPMCharacteristicsGPUESProducer_cfi import hcalSiPMCharacteristicsGPUESProducer -from RecoLocalCalo.HcalRecProducers.hcalMahiPulseOffsetsGPUESProducer_cfi import hcalMahiPulseOffsetsGPUESProducer - -# convert the HBHE digis into SoA format, and copy them from CPU to GPU -from EventFilter.HcalRawToDigi.hcalDigisProducerGPU_cfi import hcalDigisProducerGPU as _hcalDigisProducerGPU -hcalDigisGPU = _hcalDigisProducerGPU.clone( - digisLabelF01HE = "", - digisLabelF5HB = "", - digisLabelF3HB = "" -) - -# run the HCAL local reconstruction (MAHI) on GPU -from RecoLocalCalo.HcalRecProducers.hbheRecHitProducerGPU_cfi import hbheRecHitProducerGPU as _hbheRecHitProducerGPU -hbheRecHitProducerGPU = _hbheRecHitProducerGPU.clone( - digisLabelF01HE = "hcalDigisGPU", - digisLabelF5HB = "hcalDigisGPU", - digisLabelF3HB = "hcalDigisGPU", - recHitsLabelM0HBHE = "" -) - -# Tasks and Sequences -hbheRecHitProducerGPUTask = cms.Task( - hcalGainsGPUESProducer, - hcalGainWidthsGPUESProducer, - hcalLUTCorrsGPUESProducer, - hcalConvertedPedestalsGPUESProducer, - hcalConvertedEffectivePedestalsGPUESProducer, - hcalConvertedPedestalWidthsGPUESProducer, - hcalConvertedEffectivePedestalWidthsGPUESProducer, - hcalChannelQualityGPUESProducer, - hcalQIECodersGPUESProducer, - hcalRecoParamsWithPulseShapesGPUESProducer, - hcalRespCorrsGPUESProducer, - hcalTimeCorrsGPUESProducer, - hcalQIETypesGPUESProducer, - hcalSiPMParametersGPUESProducer, - hcalSiPMCharacteristicsGPUESProducer, - hcalMahiPulseOffsetsGPUESProducer, - hcalDigisGPU, - hbheRecHitProducerGPU -) - -hbheRecHitProducerGPUSequence = cms.Sequence(hbheRecHitProducerGPUTask) diff --git a/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h b/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h deleted file mode 100644 index 1b8f611aed2e3..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/DeclsForKernels.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef RecoLocalCalo_HcalRecProducers_src_DeclsForKernels_h -#define RecoLocalCalo_HcalRecProducers_src_DeclsForKernels_h - -#include -#include - -#include "CondFormats/HcalObjects/interface/HcalChannelStatus.h" -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h" -#include "CalibCalorimetry/HcalAlgos/interface/HcalTimeSlew.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalGainsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalQIECodersGPU.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypesGPU.h" -#include "CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h" -#include "CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h" -#include "Geometry/CaloTopology/interface/HcalTopology.h" -#include "Geometry/HcalCommonData/interface/HcalDDDRecConstants.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h" -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h" - -namespace hcal { - namespace reconstruction { - - struct ConditionsProducts { - HcalGainWidthsGPU::Product const& gainWidths; - HcalGainsGPU::Product const& gains; - HcalLUTCorrsGPU::Product const& lutCorrs; - HcalConvertedPedestalWidthsGPU::Product const& pedestalWidths; - HcalConvertedEffectivePedestalWidthsGPU::Product const& effectivePedestalWidths; - HcalConvertedPedestalsGPU::Product const& pedestals; - HcalQIECodersGPU::Product const& qieCoders; - HcalChannelQualityGPU::Product const& channelQuality; - HcalRecoParamsWithPulseShapesGPU::Product const& recoParams; - HcalRespCorrsGPU::Product const& respCorrs; - HcalTimeCorrsGPU::Product const& timeCorrs; - HcalQIETypesGPU::Product const& qieTypes; - HcalSiPMParametersGPU::Product const& sipmParameters; - HcalSiPMCharacteristicsGPU::Product const& sipmCharacteristics; - HcalConvertedPedestalsGPU::Product const* convertedEffectivePedestals; - HcalTopology const* topology; - HcalDDDRecConstants const* recConstants; - uint32_t offsetForHashes; - HcalMahiPulseOffsetsGPU::Product const& pulseOffsets; - std::vector> const& pulseOffsetsHost; - }; - - struct ConfigParameters { - uint32_t maxTimeSamples; - uint32_t kprep1dChannelsPerBlock; - int sipmQTSShift; - int sipmQNTStoSum; - int firstSampleShift; - bool useEffectivePedestals; - - float meanTime; - float timeSigmaSiPM, timeSigmaHPD; - float ts4Thresh; - - std::array kernelMinimizeThreads; - - // FIXME: - // - add "getters" to HcalTimeSlew calib formats - // - add ES Producer to consume what is produced above not to replicate. - // which ones to use is hardcoded, therefore no need to send those to the device - bool applyTimeSlew; - float tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew; - }; - - struct OutputDataGPU { - RecHitCollection<::calo::common::DevStoragePolicy> recHits; - - void allocate(ConfigParameters const& config, uint32_t size, cudaStream_t cudaStream) { - recHits.energy = cms::cuda::make_device_unique(size, cudaStream); - recHits.chi2 = cms::cuda::make_device_unique(size, cudaStream); - recHits.energyM0 = cms::cuda::make_device_unique(size, cudaStream); - recHits.timeM0 = cms::cuda::make_device_unique(size, cudaStream); - recHits.did = cms::cuda::make_device_unique(size, cudaStream); - } - }; - - struct ScratchDataGPU { - cms::cuda::device::unique_ptr amplitudes, noiseTerms, electronicNoiseTerms, pulseMatrices, - pulseMatricesM, pulseMatricesP; - cms::cuda::device::unique_ptr soiSamples; - }; - - struct InputDataGPU { - DigiCollection const& f01HEDigis; - DigiCollection const& f5HBDigis; - DigiCollection const& f3HBDigis; - }; - - } // namespace reconstruction -} // namespace hcal - -#endif // RecoLocalCalo_HcalRecProducers_src_DeclsForKernels_h diff --git a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc b/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc deleted file mode 100644 index 2fcce2827488e..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/HBHERecHitProducerGPU.cc +++ /dev/null @@ -1,269 +0,0 @@ -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" - -#include "SimpleAlgoGPU.h" - -#include "CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h" -#include "CondFormats/DataRecord/interface/HcalGainWidthsRcd.h" -#include "CondFormats/DataRecord/interface/HcalGainsRcd.h" -#include "CondFormats/DataRecord/interface/HcalLUTCorrsRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIEDataRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIETypesRcd.h" -#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" -#include "CondFormats/DataRecord/interface/HcalRespCorrsRcd.h" -#include "CondFormats/DataRecord/interface/HcalSiPMCharacteristicsRcd.h" -#include "CondFormats/DataRecord/interface/HcalSiPMParametersRcd.h" -#include "CondFormats/DataRecord/interface/HcalTimeCorrsRcd.h" -#include "CondFormats/DataRecord/interface/HcalChannelQualityRcd.h" - -class HBHERecHitProducerGPU : public edm::stream::EDProducer { -public: - explicit HBHERecHitProducerGPU(edm::ParameterSet const&); - ~HBHERecHitProducerGPU() override; - static void fillDescriptions(edm::ConfigurationDescriptions&); - -private: - void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; - void produce(edm::Event&, edm::EventSetup const&) override; - - using IProductTypef01 = cms::cuda::Product>; - edm::EDGetTokenT digisTokenF01HE_; - - using IProductTypef5 = cms::cuda::Product>; - edm::EDGetTokenT digisTokenF5HB_; - - using IProductTypef3 = cms::cuda::Product>; - edm::EDGetTokenT digisTokenF3HB_; - - using RecHitType = hcal::RecHitCollection; - using OProductType = cms::cuda::Product; - edm::EDPutTokenT rechitsM0Token_; - - const edm::ESGetToken recoParamsToken_; - const edm::ESGetToken gainWidthsToken_; - const edm::ESGetToken gainsToken_; - const edm::ESGetToken lutCorrsToken_; - const edm::ESGetToken pedestalWidthsToken_; - const edm::ESGetToken - effectivePedestalWidthsToken_; - const edm::ESGetToken pedestalsToken_; - edm::ESGetToken effectivePedestalsToken_; - const edm::ESGetToken qieCodersToken_; - const edm::ESGetToken respCorrsToken_; - const edm::ESGetToken timeCorrsToken_; - const edm::ESGetToken qieTypesToken_; - const edm::ESGetToken topologyToken_; - const edm::ESGetToken recConstantsToken_; - const edm::ESGetToken sipmParametersToken_; - const edm::ESGetToken sipmCharacteristicsToken_; - const edm::ESGetToken chQualProductToken_; - const edm::ESGetToken pulseOffsetsToken_; - - hcal::reconstruction::ConfigParameters configParameters_; - hcal::reconstruction::OutputDataGPU outputGPU_; - cms::cuda::ContextState cudaState_; -}; - -HBHERecHitProducerGPU::HBHERecHitProducerGPU(edm::ParameterSet const& ps) - : digisTokenF01HE_{consumes(ps.getParameter("digisLabelF01HE"))}, - digisTokenF5HB_{consumes(ps.getParameter("digisLabelF5HB"))}, - digisTokenF3HB_{consumes(ps.getParameter("digisLabelF3HB"))}, - rechitsM0Token_{produces(ps.getParameter("recHitsLabelM0HBHE"))}, - recoParamsToken_{esConsumes()}, - gainWidthsToken_{esConsumes()}, - gainsToken_{esConsumes()}, - lutCorrsToken_{esConsumes()}, - pedestalWidthsToken_{esConsumes()}, - effectivePedestalWidthsToken_{esConsumes()}, - pedestalsToken_{esConsumes()}, - qieCodersToken_{esConsumes()}, - respCorrsToken_{esConsumes()}, - timeCorrsToken_{esConsumes()}, - qieTypesToken_{esConsumes()}, - topologyToken_{esConsumes()}, - recConstantsToken_{esConsumes()}, - sipmParametersToken_{esConsumes()}, - sipmCharacteristicsToken_{esConsumes()}, - chQualProductToken_{esConsumes()}, - pulseOffsetsToken_{esConsumes()} { - configParameters_.maxTimeSamples = ps.getParameter("maxTimeSamples"); - configParameters_.kprep1dChannelsPerBlock = ps.getParameter("kprep1dChannelsPerBlock"); - configParameters_.sipmQTSShift = ps.getParameter("sipmQTSShift"); - configParameters_.sipmQNTStoSum = ps.getParameter("sipmQNTStoSum"); - configParameters_.firstSampleShift = ps.getParameter("firstSampleShift"); - configParameters_.useEffectivePedestals = ps.getParameter("useEffectivePedestals"); - if (configParameters_.useEffectivePedestals) { - effectivePedestalsToken_ = esConsumes(); - } - - configParameters_.meanTime = ps.getParameter("meanTime"); - configParameters_.timeSigmaSiPM = ps.getParameter("timeSigmaSiPM"); - configParameters_.timeSigmaHPD = ps.getParameter("timeSigmaHPD"); - configParameters_.ts4Thresh = ps.getParameter("ts4Thresh"); - - configParameters_.applyTimeSlew = ps.getParameter("applyTimeSlew"); - auto const tzeroValues = ps.getParameter>("tzeroTimeSlewParameters"); - auto const slopeValues = ps.getParameter>("slopeTimeSlewParameters"); - auto const tmaxValues = ps.getParameter>("tmaxTimeSlewParameters"); - - configParameters_.tzeroTimeSlew = tzeroValues[HcalTimeSlew::Medium]; - configParameters_.slopeTimeSlew = slopeValues[HcalTimeSlew::Medium]; - configParameters_.tmaxTimeSlew = tmaxValues[HcalTimeSlew::Medium]; - - auto threadsMinimize = ps.getParameter>("kernelMinimizeThreads"); - configParameters_.kernelMinimizeThreads[0] = threadsMinimize[0]; - configParameters_.kernelMinimizeThreads[1] = threadsMinimize[1]; - configParameters_.kernelMinimizeThreads[2] = threadsMinimize[2]; -} - -HBHERecHitProducerGPU::~HBHERecHitProducerGPU() {} - -void HBHERecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& cdesc) { - edm::ParameterSetDescription desc; - desc.add("maxTimeSamples", 10); - desc.add("kprep1dChannelsPerBlock", 32); - desc.add("digisLabelF01HE", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"}); - desc.add("digisLabelF5HB", edm::InputTag{"hcalRawToDigiGPU", "f5HBDigisGPU"}); - desc.add("digisLabelF3HB", edm::InputTag{"hcalRawToDigiGPU", "f3HBDigisGPU"}); - desc.add("recHitsLabelM0HBHE", "recHitsM0HBHE"); - desc.add("sipmQTSShift", 0); - desc.add("sipmQNTStoSum", 3); - desc.add("firstSampleShift", 0); - desc.add("useEffectivePedestals", true); - - desc.add("meanTime", 0.f); - desc.add("timeSigmaSiPM", 2.5f); - desc.add("timeSigmaHPD", 5.0f); - desc.add("ts4Thresh", 0.0); - - desc.add("applyTimeSlew", true); - desc.add>("tzeroTimeSlewParameters", {23.960177, 11.977461, 9.109694}); - desc.add>("slopeTimeSlewParameters", {-3.178648, -1.5610227, -1.075824}); - desc.add>("tmaxTimeSlewParameters", {16.00, 10.00, 6.25}); - desc.add>("kernelMinimizeThreads", {16, 1, 1}); - - cdesc.addWithDefaultLabel(desc); -} - -void HBHERecHitProducerGPU::acquire(edm::Event const& event, - edm::EventSetup const& setup, - edm::WaitingTaskWithArenaHolder holder) { -#ifdef HCAL_MAHI_CPUDEBUG - auto start = std::chrono::high_resolution_clock::now(); -#endif - - // input + raii - auto const& f01HEProduct = event.get(digisTokenF01HE_); - auto const& f5HBProduct = event.get(digisTokenF5HB_); - auto const& f3HBProduct = event.get(digisTokenF3HB_); - cms::cuda::ScopedContextAcquire ctx{f01HEProduct, std::move(holder), cudaState_}; - auto const& f01HEDigis = ctx.get(f01HEProduct); - auto const& f5HBDigis = ctx.get(f5HBProduct); - auto const& f3HBDigis = ctx.get(f3HBProduct); - auto const totalChannels = f01HEDigis.size + f5HBDigis.size + f3HBDigis.size; - - hcal::reconstruction::InputDataGPU inputGPU{f01HEDigis, f5HBDigis, f3HBDigis}; - - // conditions - auto const& recoParamsProduct = setup.getData(recoParamsToken_).getProduct(ctx.stream()); - - auto const& gainWidthsProduct = setup.getData(gainWidthsToken_).getProduct(ctx.stream()); - - auto const& gainsProduct = setup.getData(gainsToken_).getProduct(ctx.stream()); - - auto const& lutCorrsProduct = setup.getData(lutCorrsToken_).getProduct(ctx.stream()); - - // use only 1 depending on useEffectivePedestals - auto const& pedestalWidthsProduct = setup.getData(pedestalWidthsToken_).getProduct(ctx.stream()); - auto const& effectivePedestalWidthsProduct = setup.getData(effectivePedestalWidthsToken_).getProduct(ctx.stream()); - - auto const& pedestals = setup.getData(pedestalsToken_); - auto const& pedestalsProduct = pedestals.getProduct(ctx.stream()); - - edm::ESHandle effectivePedestalsHandle; - if (configParameters_.useEffectivePedestals) - effectivePedestalsHandle = setup.getHandle(effectivePedestalsToken_); - auto const* effectivePedestalsProduct = - configParameters_.useEffectivePedestals ? &effectivePedestalsHandle->getProduct(ctx.stream()) : nullptr; - - auto const& qieCodersProduct = setup.getData(qieCodersToken_).getProduct(ctx.stream()); - - auto const& respCorrsProduct = setup.getData(respCorrsToken_).getProduct(ctx.stream()); - - auto const& timeCorrsProduct = setup.getData(timeCorrsToken_).getProduct(ctx.stream()); - - auto const& qieTypesProduct = setup.getData(qieTypesToken_).getProduct(ctx.stream()); - - HcalTopology const& topology = setup.getData(topologyToken_); - HcalDDDRecConstants const& recConstants = setup.getData(recConstantsToken_); - - auto const& sipmParametersProduct = setup.getData(sipmParametersToken_).getProduct(ctx.stream()); - - auto const& sipmCharacteristicsProduct = setup.getData(sipmCharacteristicsToken_).getProduct(ctx.stream()); - - auto const& chQualProduct = setup.getData(chQualProductToken_).getProduct(ctx.stream()); - - auto const& pulseOffsets = setup.getData(pulseOffsetsToken_); - auto const& pulseOffsetsProduct = pulseOffsets.getProduct(ctx.stream()); - - // bundle up conditions - hcal::reconstruction::ConditionsProducts conditions{gainWidthsProduct, - gainsProduct, - lutCorrsProduct, - pedestalWidthsProduct, - effectivePedestalWidthsProduct, - pedestalsProduct, - qieCodersProduct, - chQualProduct, - recoParamsProduct, - respCorrsProduct, - timeCorrsProduct, - qieTypesProduct, - sipmParametersProduct, - sipmCharacteristicsProduct, - effectivePedestalsProduct, - &topology, - &recConstants, - pedestals.offsetForHashes(), - pulseOffsetsProduct, - pulseOffsets.getValues()}; - - // scratch mem on device - hcal::reconstruction::ScratchDataGPU scratchGPU = { - cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique(totalChannels * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique( - totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique( - totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique( - totalChannels * configParameters_.maxTimeSamples * configParameters_.maxTimeSamples, ctx.stream()), - cms::cuda::make_device_unique(totalChannels, ctx.stream()), - }; - - // output dev mem - outputGPU_.allocate(configParameters_, totalChannels, ctx.stream()); - - hcal::reconstruction::entryPoint(inputGPU, outputGPU_, conditions, scratchGPU, configParameters_, ctx.stream()); - -#ifdef HCAL_MAHI_CPUDEBUG - auto end = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end - start).count(); - std::cout << "acquire duration = " << duration << std::endl; -#endif -} - -void HBHERecHitProducerGPU::produce(edm::Event& event, edm::EventSetup const& setup) { - cms::cuda::ScopedContextProduce ctx{cudaState_}; - ctx.emplace(event, rechitsM0Token_, std::move(outputGPU_.recHits)); -} - -DEFINE_FWK_MODULE(HBHERecHitProducerGPU); diff --git a/RecoLocalCalo/HcalRecProducers/src/HCALGPUAnalyzer.cc b/RecoLocalCalo/HcalRecProducers/src/HCALGPUAnalyzer.cc deleted file mode 100644 index ba3c9de696c47..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/HCALGPUAnalyzer.cc +++ /dev/null @@ -1,307 +0,0 @@ -// -*- C++ -*- -// -// Package: ComparisonPlots/HCALGPUAnalyzer -// Class: HCALGPUAnalyzer -// -/**\class HCALGPUAnalyzer HCALGPUAnalyzer.cc ComparisonPlots/HCALGPUAnalyzer/plugins/HCALGPUAnalyzer.cc - - Description: [one line class summary] - - Implementation: - [Notes on implementation] -*/ -// -// Original Author: Mariarosaria D'Alfonso -// Created: Mon, 17 Dec 2018 16:22:58 GMT -// -// - -// system include files -#include -#include -#include -#include -using namespace std; - -// user include files -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/Framework/interface/one/EDAnalyzer.h" - -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/MakerMacros.h" - -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "CommonTools/UtilAlgos/interface/TFileService.h" - -#include "DataFormats/HcalRecHit/interface/HBHERecHit.h" -#include "DataFormats/HcalRecHit/interface/HcalRecHitCollections.h" -#include "DataFormats/HcalDetId/interface/HcalDetId.h" - -#include "SimDataFormats/CaloHit/interface/PCaloHit.h" -#include "SimDataFormats/CaloHit/interface/PCaloHitContainer.h" - -#include "SimCalorimetry/HcalSimAlgos/interface/HcalSimParameterMap.h" - -#include "TH2F.h" - -// -// class declaration -// - -class HCALGPUAnalyzer : public edm::one::EDAnalyzer { -public: - explicit HCALGPUAnalyzer(const edm::ParameterSet &); - ~HCALGPUAnalyzer() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); - -private: - void beginJob() override; - void analyze(const edm::Event &, const edm::EventSetup &) override; - void endJob() override; - - // ----------member data --------------------------- - // void ClearVariables(); - - // some variables for storing information - double Method0Energy, Method0EnergyGPU; - double RecHitEnergy, RecHitEnergyGPU; - double RecHitTime, RecHitTimeGPU; - double iEta, iEtaGPU; - double iPhi, iPhiGPU; - int depth, depthGPU; - - TH2F *hEnergy_2dMahi; - TH2F *hEnergy_2dM0; - TH2F *hTime_2dMahi; - - TH2F *Unmatched; - TH2F *Matched; - TH1F *hEnergy_cpu; - TH1F *hEnergy_gpu; - TH1F *hEnergy_cpugpu; - TH1F *hEnergy_cpugpu_rel; - TH1F *hEnergyM0_cpu; - TH1F *hEnergyM0_gpu; - TH1F *hTime_cpu; - TH1F *hTime_gpu; - - // create the output file - edm::Service FileService; - // create the token to retrieve hit information - edm::EDGetTokenT hRhToken; - edm::EDGetTokenT hRhTokenGPU; -}; - -// -// constants, enums and typedefs -// - -// -// static data member definitions -// - -// -// constructors and destructor -// -HCALGPUAnalyzer::HCALGPUAnalyzer(const edm::ParameterSet &iConfig) { - usesResource("TFileService"); - - hRhToken = consumes(iConfig.getUntrackedParameter("HBHERecHits", "hbheprereco")); - hRhTokenGPU = consumes( - iConfig.getUntrackedParameter("HBHERecHits", "hcalCPURecHitsProducer:recHitsLegacyHBHE")); - - // - - hEnergy_2dM0 = FileService->make("hEnergy_2dM0", "hEnergy_2dM0", 1000, 0., 100., 1000, 0., 100.); - hEnergy_2dM0->GetXaxis()->SetTitle("Cpu M0 Energy"); - hEnergy_2dM0->GetYaxis()->SetTitle("GPU M0 Energy"); - - hEnergy_2dMahi = FileService->make("hEnergy_2dMahi", "hEnergy_2dMahi", 1000, 0., 100., 1000, 0., 100.); - hEnergy_2dMahi->GetXaxis()->SetTitle("CPU Energy"); - hEnergy_2dMahi->GetYaxis()->SetTitle("GPU Energy"); - - hTime_2dMahi = FileService->make("hTime_2dMahi", "hTime_2dMahi", 250, -12.5, 12.5, 250, -12.5, 12.5); - hTime_2dMahi->GetXaxis()->SetTitle("Mahi Time CPU"); - hTime_2dMahi->GetYaxis()->SetTitle("Mahi Time GPU"); - - // - - hEnergyM0_cpu = FileService->make("hEnergyM0_cpu", "hEnergyM0_cpu", 100, 0., 100.); - hEnergyM0_cpu->GetXaxis()->SetTitle("CPU Energy"); - - hEnergy_cpu = FileService->make("hEnergy_cpu", "hEnergy_cpu", 50, 0., 50.); - hEnergy_cpu->GetXaxis()->SetTitle("CPU Energy"); - - hEnergy_gpu = FileService->make("hEnergy_gpu", "hEnergy_gpu", 50, 0., 50.); - hEnergy_gpu->GetXaxis()->SetTitle("GPU Energy"); - - // - - hEnergy_cpugpu = FileService->make("hEnergy_cpugpu", "hEnergy_cpugpu", 500, -2.5, 2.5); - hEnergy_cpugpu->GetXaxis()->SetTitle("GPU Energy - CPU Energy [GeV]"); - hEnergy_cpugpu->GetYaxis()->SetTitle("# RecHits"); - - hEnergy_cpugpu_rel = - FileService->make("hEnergy_cpugpu_rel", "hEnergy_cpugpu_rel ( E > 0.005 GeV)", 500, -2.5, 2.5); - hEnergy_cpugpu_rel->GetXaxis()->SetTitle("(GPU Energy - CPU Energy) / CPU energy"); - hEnergy_cpugpu_rel->GetYaxis()->SetTitle("# RecHits"); - - // - - hTime_cpu = FileService->make("hTime_cpu", "hTime_cpu", 50, -25., 25.); - hTime_cpu->GetXaxis()->SetTitle("CPU Time"); - - hTime_gpu = FileService->make("hTime_gpu", "hTime_gpu", 50, -25., 25.); - hTime_gpu->GetXaxis()->SetTitle("GPU Time"); - - Unmatched = FileService->make("Unmatched", "Unmatched (eta,phi)", 100, -50., 50., 85, 0., 85.); - Matched = FileService->make("Matched", "Matched (eta,phi)", 100, -50., 50., 85, 0., 85.); - - //now do what ever initialization is needed -} - -// -// member functions -// - -// ------------ method called for each event ------------ -void HCALGPUAnalyzer::analyze(const edm::Event &iEvent, const edm::EventSetup &iSetup) { - using namespace edm; - - // Read events - Handle hRecHits; - iEvent.getByToken(hRhToken, hRecHits); - - Handle hRecHitsGPU; - iEvent.getByToken(hRhTokenGPU, hRecHitsGPU); - - // Loop over all rechits in one event - for (int i = 0; i < (int)hRecHits->size(); i++) { - // get ID information for the reconstructed hit - HcalDetId detID_rh = (*hRecHits)[i].id().rawId(); - - // ID information can get us detector coordinates - depth = (*hRecHits)[i].id().depth(); - iEta = detID_rh.ieta(); - iPhi = detID_rh.iphi(); - - // get some variables - Method0Energy = (*hRecHits)[i].eraw(); - RecHitEnergy = (*hRecHits)[i].energy(); - RecHitTime = (*hRecHits)[i].time(); - - hEnergy_cpu->Fill(RecHitEnergy); - hTime_cpu->Fill(RecHitTime); - - /* - cout << "Run " << i << ": "; - cout << "Method0Energy: " << Method0Energy; - cout << "RecHitEnergy: " << RecHitEnergy; - cout << "depth: " << depth; - cout << "iEta: " << iEta; - cout << "iPhi: " << iPhi; - cout << "RecHitTime" << RecHitTime; - */ - } - - for (int i = 0; i < (int)hRecHitsGPU->size(); i++) { - // get ID information for the reconstructed hit - HcalDetId detID_rh = (*hRecHitsGPU)[i].id().rawId(); - - // ID information can get us detector coordinates - depthGPU = (*hRecHitsGPU)[i].id().depth(); - iEtaGPU = detID_rh.ieta(); - iPhiGPU = detID_rh.iphi(); - - // get some variables - Method0EnergyGPU = (*hRecHitsGPU)[i].eraw(); - RecHitEnergyGPU = (*hRecHitsGPU)[i].energy(); - RecHitTimeGPU = (*hRecHitsGPU)[i].time(); - - hEnergy_gpu->Fill(RecHitEnergyGPU); - hTime_gpu->Fill(RecHitTimeGPU); - - /* - cout << "Run " << i << ": "; - cout << "Method0Energy: " << Method0EnergyGPU; - cout << "RecHitEnergy: " << RecHitEnergyGPU; - cout << "depth: " << depthGPU; - cout << "iEta: " << iEtaGPU; - cout << "iPhi: " << iPhiGPU; - cout << "RecHitTime" << RecHitTimeGPU; - */ - } - - // Loop over all rechits in one event - for (int i = 0; i < (int)hRecHits->size(); i++) { - HcalDetId detID_rh = (*hRecHits)[i].id().rawId(); - - bool unmatched = true; - // cout << "--------------------------------------------------------" << endl; - - for (int j = 0; j < (int)hRecHitsGPU->size(); j++) { - HcalDetId detID_gpu = (*hRecHitsGPU)[j].id().rawId(); - - if ((detID_rh == detID_gpu)) { - /* - cout << "Mtime(cpu)" << (*hRecHits)[i].time() << endl; - cout << " Mtime(gpu)" << (*hRecHitsGPU)[j].time() << endl; - - cout << "M0E(cpu)" << (*hRecHits)[i].eraw() << endl; - cout << " M0E(gpu)" << (*hRecHitsGPU)[j].eraw() << endl; - */ - - auto relValue = ((*hRecHitsGPU)[j].energy() - (*hRecHits)[i].energy()) / (*hRecHits)[i].energy(); - - hEnergy_2dM0->Fill((*hRecHits)[i].eraw(), (*hRecHitsGPU)[j].eraw()); - hEnergy_2dMahi->Fill((*hRecHits)[i].energy(), (*hRecHitsGPU)[j].energy()); - hEnergy_cpugpu->Fill((*hRecHitsGPU)[j].energy() - (*hRecHits)[i].energy()); - if ((*hRecHits)[i].energy() > 0.005) - hEnergy_cpugpu_rel->Fill(relValue); - hTime_2dMahi->Fill((*hRecHits)[i].time(), (*hRecHitsGPU)[j].time()); - - /* - if((relValue < - 0.9) and ((*hRecHits)[i].energy()>0.005)) { - cout << "----------------------------------"<< endl; - cout << " detID = " << detID_rh.rawId() << endl; - cout << "ME(cpu)" << (*hRecHits)[i].energy() << endl; - cout << " ME(gpu)" << (*hRecHitsGPU)[j].energy() << endl; - } - */ - - Matched->Fill(detID_rh.ieta(), detID_rh.iphi()); - - unmatched = false; - } - } - - /// - - if (unmatched) { - Unmatched->Fill(detID_rh.ieta(), detID_rh.iphi()); - // cout << " recHit not matched =" << detID_rh << " E(raw)=" << (*hRecHits)[i].eraw() << " E=" << (*hRecHits)[i].energy() << endl; - } - } -} - -// ------------ method called once each job just before starting event loop ------------ -void HCALGPUAnalyzer::beginJob() {} - -// ------------ method called once each job just after ending the event loop ------------ -void HCALGPUAnalyzer::endJob() {} - -// ------------ method fills 'descriptions' with the allowed parameters for the module ------------ -void HCALGPUAnalyzer::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { - //The following says we do not know what parameters are allowed so do no validation - // Please change this to state exactly what you do use, even if it is no parameters - edm::ParameterSetDescription desc; - desc.setUnknown(); - descriptions.addDefault(desc); -} - -//define this as a plug-in -DEFINE_FWK_MODULE(HCALGPUAnalyzer); diff --git a/RecoLocalCalo/HcalRecProducers/src/HcalCPURecHitsProducer.cc b/RecoLocalCalo/HcalRecProducers/src/HcalCPURecHitsProducer.cc deleted file mode 100644 index ceb8f4b08f849..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/HcalCPURecHitsProducer.cc +++ /dev/null @@ -1,132 +0,0 @@ -#include -#include - -#include "CUDADataFormats/HcalRecHitSoA/interface/RecHitCollection.h" -#include "DataFormats/HcalRecHit/interface/HcalRecHitCollections.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -class HcalCPURecHitsProducer : public edm::stream::EDProducer { -public: - explicit HcalCPURecHitsProducer(edm::ParameterSet const& ps); - ~HcalCPURecHitsProducer() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions&); - -private: - void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; - void produce(edm::Event&, edm::EventSetup const&) override; - -private: - const bool produceSoA_; - const bool produceLegacy_; - - using IProductType = cms::cuda::Product>; - const edm::EDGetTokenT recHitsM0TokenIn_; - - using OProductType = hcal::RecHitCollection>; - const edm::EDPutTokenT recHitsM0TokenOut_; - const edm::EDPutTokenT recHitsLegacyTokenOut_; - - // to pass from acquire to produce - OProductType tmpRecHits_; -}; - -void HcalCPURecHitsProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { - edm::ParameterSetDescription desc; - - desc.add("recHitsM0LabelIn", edm::InputTag{"hbheRecHitProducerGPU"}); - desc.add("recHitsM0LabelOut", ""); - desc.add("recHitsLegacyLabelOut", ""); - desc.add("produceSoA", true); - desc.add("produceLegacy", true); - - confDesc.addWithDefaultLabel(desc); -} - -HcalCPURecHitsProducer::HcalCPURecHitsProducer(const edm::ParameterSet& ps) - : produceSoA_{ps.getParameter("produceSoA")}, - produceLegacy_{ps.getParameter("produceLegacy")}, - recHitsM0TokenIn_{consumes(ps.getParameter("recHitsM0LabelIn"))}, - recHitsM0TokenOut_{produceSoA_ ? produces(ps.getParameter("recHitsM0LabelOut")) - : edm::EDPutTokenT{}}, // empty token if disabled - recHitsLegacyTokenOut_{produceLegacy_ - ? produces(ps.getParameter("recHitsLegacyLabelOut")) - : edm::EDPutTokenT{}} // empty token if disabled -{} - -void HcalCPURecHitsProducer::acquire(edm::Event const& event, - edm::EventSetup const& setup, - edm::WaitingTaskWithArenaHolder taskHolder) { - // retrieve data/ctx - auto const& recHitsProduct = event.get(recHitsM0TokenIn_); - cms::cuda::ScopedContextAcquire ctx{recHitsProduct, std::move(taskHolder)}; - auto const& recHits = ctx.get(recHitsProduct); - - // resize tmp buffers - tmpRecHits_.resize(recHits.size); - -#ifdef HCAL_MAHI_CPUDEBUG - std::cout << "num rec Hits = " << recHits.size << std::endl; -#endif - - // do not try to copy the rechits if they are empty - if (recHits.size == 0) { - return; - } - - auto lambdaToTransfer = [&ctx](auto& dest, auto* src) { - using vector_type = typename std::remove_reference::type; - using src_data_type = typename std::remove_pointer::type; - using type = typename vector_type::value_type; - static_assert(std::is_same::value && "Dest and Src data types do not match"); - cudaCheck(cudaMemcpyAsync(dest.data(), src, dest.size() * sizeof(type), cudaMemcpyDeviceToHost, ctx.stream())); - }; - - lambdaToTransfer(tmpRecHits_.energy, recHits.energy.get()); - lambdaToTransfer(tmpRecHits_.chi2, recHits.chi2.get()); - lambdaToTransfer(tmpRecHits_.energyM0, recHits.energyM0.get()); - lambdaToTransfer(tmpRecHits_.timeM0, recHits.timeM0.get()); - lambdaToTransfer(tmpRecHits_.did, recHits.did.get()); -} - -void HcalCPURecHitsProducer::produce(edm::Event& event, edm::EventSetup const& setup) { - if (produceLegacy_) { - // populate the legacy collection - auto recHitsLegacy = std::make_unique(); - // did not set size with ctor as there is no setter for did - recHitsLegacy->reserve(tmpRecHits_.did.size()); - for (uint32_t i = 0; i < tmpRecHits_.did.size(); i++) { - // skip bad channels - if (tmpRecHits_.chi2[i] < 0) - continue; - - // build a legacy rechit with the computed detid and MAHI energy - recHitsLegacy->emplace_back(HcalDetId{tmpRecHits_.did[i]}, - tmpRecHits_.energy[i], - 0 // timeRising - ); - // update the legacy rechit with the Chi2 and M0 values - recHitsLegacy->back().setChiSquared(tmpRecHits_.chi2[i]); - recHitsLegacy->back().setRawEnergy(tmpRecHits_.energyM0[i]); - } - - // put the legacy collection - event.put(recHitsLegacyTokenOut_, std::move(recHitsLegacy)); - } - - if (produceSoA_) { - // put the SoA collection - event.emplace(recHitsM0TokenOut_, std::move(tmpRecHits_)); - } - // clear the temporary collection for the next event - tmpRecHits_.resize(0); -} - -DEFINE_FWK_MODULE(HcalCPURecHitsProducer); diff --git a/RecoLocalCalo/HcalRecProducers/src/HcalESProducersGPUDefs.cc b/RecoLocalCalo/HcalRecProducers/src/HcalESProducersGPUDefs.cc deleted file mode 100644 index d28840f860f6b..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/HcalESProducersGPUDefs.cc +++ /dev/null @@ -1,127 +0,0 @@ -#include "CondFormats/DataRecord/interface/HcalCombinedRecordsGPU.h" -#include "CondFormats/DataRecord/interface/HcalGainWidthsRcd.h" -#include "CondFormats/DataRecord/interface/HcalGainsRcd.h" -#include "CondFormats/DataRecord/interface/HcalLUTCorrsRcd.h" -#include "CondFormats/DataRecord/interface/HcalPedestalWidthsRcd.h" -#include "CondFormats/DataRecord/interface/HcalPedestalsRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIEDataRcd.h" -#include "CondFormats/DataRecord/interface/HcalQIETypesRcd.h" -#include "CondFormats/DataRecord/interface/HcalRecoParamsRcd.h" -#include "CondFormats/DataRecord/interface/HcalRespCorrsRcd.h" -#include "CondFormats/DataRecord/interface/HcalSiPMCharacteristicsRcd.h" -#include "CondFormats/DataRecord/interface/HcalSiPMParametersRcd.h" -#include "CondFormats/DataRecord/interface/HcalTimeCorrsRcd.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedEffectivePedestalsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalConvertedPedestalsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalGainWidths.h" -#include "CondFormats/HcalObjects/interface/HcalGainWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalGains.h" -#include "CondFormats/HcalObjects/interface/HcalGainsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalLUTCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalLUTCorrsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalPedestalWidths.h" -#include "CondFormats/HcalObjects/interface/HcalPedestalWidthsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalPedestals.h" -#include "CondFormats/HcalObjects/interface/HcalPedestalsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalQIECodersGPU.h" -#include "CondFormats/HcalObjects/interface/HcalQIEData.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypes.h" -#include "CondFormats/HcalObjects/interface/HcalQIETypesGPU.h" -#include "CondFormats/HcalObjects/interface/HcalRecoParams.h" -#include "CondFormats/HcalObjects/interface/HcalRecoParamsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalRespCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalRespCorrsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalChannelQuality.h" -#include "CondFormats/HcalObjects/interface/HcalChannelQualityGPU.h" -#include "CondFormats/DataRecord/interface/HcalChannelQualityRcd.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristics.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMCharacteristicsGPU.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMParameters.h" -#include "CondFormats/HcalObjects/interface/HcalSiPMParametersGPU.h" -#include "CondFormats/HcalObjects/interface/HcalTimeCorrs.h" -#include "CondFormats/HcalObjects/interface/HcalTimeCorrsGPU.h" -#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h" -#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerWithDependenciesT.h" -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalRecoParamsWithPulseShapesGPU.h" - -using HcalRecoParamsGPUESProducer = ConvertingESProducerT; - -using HcalRecoParamsWithPulseShapesGPUESProducer = - ConvertingESProducerT; - -using HcalPedestalsGPUESProducer = ConvertingESProducerT; - -using HcalGainsGPUESProducer = ConvertingESProducerT; - -using HcalLUTCorrsGPUESProducer = ConvertingESProducerT; - -using HcalRespCorrsGPUESProducer = ConvertingESProducerT; - -using HcalTimeCorrsGPUESProducer = ConvertingESProducerT; - -using HcalPedestalWidthsGPUESProducer = - ConvertingESProducerT; - -using HcalGainWidthsGPUESProducer = ConvertingESProducerT; - -using HcalQIECodersGPUESProducer = ConvertingESProducerT; - -using HcalChannelQualityGPUESProducer = - ConvertingESProducerT; - -using HcalQIETypesGPUESProducer = ConvertingESProducerT; - -using HcalSiPMParametersGPUESProducer = - ConvertingESProducerT; - -using HcalSiPMCharacteristicsGPUESProducer = - ConvertingESProducerT; - -using HcalConvertedPedestalsGPUESProducer = ConvertingESProducerWithDependenciesT; - -using HcalConvertedEffectivePedestalsGPUESProducer = - ConvertingESProducerWithDependenciesT; - -using HcalConvertedPedestalWidthsGPUESProducer = ConvertingESProducerWithDependenciesT; - -using HcalConvertedEffectivePedestalWidthsGPUESProducer = - ConvertingESProducerWithDependenciesT; - -DEFINE_FWK_EVENTSETUP_MODULE(HcalRecoParamsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalRecoParamsWithPulseShapesGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalPedestalsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalGainsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalLUTCorrsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalRespCorrsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalTimeCorrsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalPedestalWidthsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalGainWidthsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalQIECodersGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalChannelQualityGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalQIETypesGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalSiPMParametersGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalSiPMCharacteristicsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalConvertedPedestalsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalConvertedEffectivePedestalsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalConvertedPedestalWidthsGPUESProducer); -DEFINE_FWK_EVENTSETUP_MODULE(HcalConvertedEffectivePedestalWidthsGPUESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/src/HcalMahiPulseOffsetsGPUESProducer.cc b/RecoLocalCalo/HcalRecProducers/src/HcalMahiPulseOffsetsGPUESProducer.cc deleted file mode 100644 index 1cbc3286d6742..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/HcalMahiPulseOffsetsGPUESProducer.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include - -#include "FWCore/Framework/interface/ESProducer.h" -#include "FWCore/Framework/interface/ESProductHost.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/EventSetupRecordIntervalFinder.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/ModuleFactory.h" -#include "FWCore/Framework/interface/SourceFactory.h" -#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/Utilities/interface/ReusableObjectHolder.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalMahiPulseOffsetsGPU.h" - -class HcalMahiPulseOffsetsGPUESProducer : public edm::ESProducer, public edm::EventSetupRecordIntervalFinder { -public: - HcalMahiPulseOffsetsGPUESProducer(edm::ParameterSet const&); - ~HcalMahiPulseOffsetsGPUESProducer() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions&); - std::unique_ptr produce(JobConfigurationGPURecord const&); - -protected: - void setIntervalFor(const edm::eventsetup::EventSetupRecordKey&, - const edm::IOVSyncValue&, - edm::ValidityInterval&) override; - -private: - std::vector pulseOffsets_; -}; - -HcalMahiPulseOffsetsGPUESProducer::HcalMahiPulseOffsetsGPUESProducer(edm::ParameterSet const& pset) - : pulseOffsets_(pset.getParameter>("pulseOffsets")) { - setWhatProduced(this); - findingRecord(); -} - -void HcalMahiPulseOffsetsGPUESProducer::setIntervalFor(const edm::eventsetup::EventSetupRecordKey& iKey, - const edm::IOVSyncValue& iTime, - edm::ValidityInterval& oInterval) { - oInterval = edm::ValidityInterval(edm::IOVSyncValue::beginOfTime(), edm::IOVSyncValue::endOfTime()); -} - -void HcalMahiPulseOffsetsGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& desc) { - edm::ParameterSetDescription d; - d.add>("pulseOffsets", {-3, -2, -1, 0, 1, 2, 3, 4}); - desc.addWithDefaultLabel(d); -} - -std::unique_ptr HcalMahiPulseOffsetsGPUESProducer::produce(JobConfigurationGPURecord const&) { - return std::make_unique(pulseOffsets_); -} - -DEFINE_FWK_EVENTSETUP_SOURCE(HcalMahiPulseOffsetsGPUESProducer); diff --git a/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h b/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h deleted file mode 100644 index ade221b2c4870..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/KernelHelpers.h +++ /dev/null @@ -1,220 +0,0 @@ -#ifndef RecoLocalCalo_HcalRecProducers_src_KernelHelpers_h -#define RecoLocalCalo_HcalRecProducers_src_KernelHelpers_h - -#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" - -#include "DeclsForKernels.h" - -namespace hcal { - namespace reconstruction { - - // this is from HcalTimeSlew. - // HcalTimeSlew are values that come in from ESProducer that takes them - // from a python config. see DeclsForKernels for more explanation - __forceinline__ __device__ float compute_time_slew_delay(float const fC, - float const tzero, - float const slope, - float const tmax) { - auto const rawDelay = tzero + slope * std::log(fC); - return rawDelay < 0 ? 0 : (rawDelay > tmax ? tmax : rawDelay); - } - - // HcalQIEShapes are hardcoded in HcalQIEData.cc basically - // + some logic to generate 128 and 256 value arrays... - __constant__ float const qie8shape[129] = { - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, - 18, 20, 22, 24, 26, 28, 31, 34, 37, 40, 44, 48, 52, 57, 62, 57, 62, - 67, 72, 77, 82, 87, 92, 97, 102, 107, 112, 117, 122, 127, 132, 142, 152, 162, - 172, 182, 192, 202, 217, 232, 247, 262, 282, 302, 322, 347, 372, 347, 372, 397, 422, - 447, 472, 497, 522, 547, 572, 597, 622, 647, 672, 697, 722, 772, 822, 872, 922, 972, - 1022, 1072, 1147, 1222, 1297, 1372, 1472, 1572, 1672, 1797, 1922, 1797, 1922, 2047, 2172, 2297, 2422, - 2547, 2672, 2797, 2922, 3047, 3172, 3297, 3422, 3547, 3672, 3922, 4172, 4422, 4672, 4922, 5172, 5422, - 5797, 6172, 6547, 6922, 7422, 7922, 8422, 9047, 9672, 10297}; - - __constant__ float const qie11shape[257] = { - -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, - 11.5, 12.5, 13.5, 14.5, 15.5, 17.5, 19.5, 21.5, 23.5, 25.5, 27.5, 29.5, - 31.5, 33.5, 35.5, 37.5, 39.5, 41.5, 43.5, 45.5, 47.5, 49.5, 51.5, 53.5, - 55.5, 59.5, 63.5, 67.5, 71.5, 75.5, 79.5, 83.5, 87.5, 91.5, 95.5, 99.5, - 103.5, 107.5, 111.5, 115.5, 119.5, 123.5, 127.5, 131.5, 135.5, 139.5, 147.5, 155.5, - 163.5, 171.5, 179.5, 187.5, 171.5, 179.5, 187.5, 195.5, 203.5, 211.5, 219.5, 227.5, - 235.5, 243.5, 251.5, 259.5, 267.5, 275.5, 283.5, 291.5, 299.5, 315.5, 331.5, 347.5, - 363.5, 379.5, 395.5, 411.5, 427.5, 443.5, 459.5, 475.5, 491.5, 507.5, 523.5, 539.5, - 555.5, 571.5, 587.5, 603.5, 619.5, 651.5, 683.5, 715.5, 747.5, 779.5, 811.5, 843.5, - 875.5, 907.5, 939.5, 971.5, 1003.5, 1035.5, 1067.5, 1099.5, 1131.5, 1163.5, 1195.5, 1227.5, - 1259.5, 1291.5, 1355.5, 1419.5, 1483.5, 1547.5, 1611.5, 1675.5, 1547.5, 1611.5, 1675.5, 1739.5, - 1803.5, 1867.5, 1931.5, 1995.5, 2059.5, 2123.5, 2187.5, 2251.5, 2315.5, 2379.5, 2443.5, 2507.5, - 2571.5, 2699.5, 2827.5, 2955.5, 3083.5, 3211.5, 3339.5, 3467.5, 3595.5, 3723.5, 3851.5, 3979.5, - 4107.5, 4235.5, 4363.5, 4491.5, 4619.5, 4747.5, 4875.5, 5003.5, 5131.5, 5387.5, 5643.5, 5899.5, - 6155.5, 6411.5, 6667.5, 6923.5, 7179.5, 7435.5, 7691.5, 7947.5, 8203.5, 8459.5, 8715.5, 8971.5, - 9227.5, 9483.5, 9739.5, 9995.5, 10251.5, 10507.5, 11019.5, 11531.5, 12043.5, 12555.5, 13067.5, 13579.5, - 12555.5, 13067.5, 13579.5, 14091.5, 14603.5, 15115.5, 15627.5, 16139.5, 16651.5, 17163.5, 17675.5, 18187.5, - 18699.5, 19211.5, 19723.5, 20235.5, 20747.5, 21771.5, 22795.5, 23819.5, 24843.5, 25867.5, 26891.5, 27915.5, - 28939.5, 29963.5, 30987.5, 32011.5, 33035.5, 34059.5, 35083.5, 36107.5, 37131.5, 38155.5, 39179.5, 40203.5, - 41227.5, 43275.5, 45323.5, 47371.5, 49419.5, 51467.5, 53515.5, 55563.5, 57611.5, 59659.5, 61707.5, 63755.5, - 65803.5, 67851.5, 69899.5, 71947.5, 73995.5, 76043.5, 78091.5, 80139.5, 82187.5, 84235.5, 88331.5, 92427.5, - 96523.5, 100620, 104716, 108812, 112908}; - - // Conditions are transferred once per IOV - // Access is performed based on the det id which is converted to a linear index - // 2 funcs below are taken from HcalTopology (reimplemented here). - // Inputs are constants that are also taken from HcalTopology - // but passed to the kernel as arguments using the HclaTopology itself - constexpr int32_t IPHI_MAX = 72; - - __forceinline__ __device__ uint32_t did2linearIndexHB( - uint32_t const didraw, int const maxDepthHB, int const firstHBRing, int const lastHBRing, int const nEtaHB) { - HcalDetId did{didraw}; - uint32_t const value = (did.depth() - 1) + maxDepthHB * (did.iphi() - 1); - return did.ieta() > 0 ? value + maxDepthHB * hcal::reconstruction::IPHI_MAX * (did.ieta() - firstHBRing) - : value + maxDepthHB * hcal::reconstruction::IPHI_MAX * (did.ieta() + lastHBRing + nEtaHB); - } - - __forceinline__ __device__ uint32_t did2linearIndexHE(uint32_t const didraw, - int const maxDepthHE, - int const maxPhiHE, - int const firstHERing, - int const lastHERing, - int const nEtaHE) { - HcalDetId did{didraw}; - uint32_t const value = (did.depth() - 1) + maxDepthHE * (did.iphi() - 1); - return did.ieta() > 0 ? value + maxDepthHE * maxPhiHE * (did.ieta() - firstHERing) - : value + maxDepthHE * maxPhiHE * (did.ieta() + lastHERing + nEtaHE); - } - - __forceinline__ __device__ uint32_t get_qiecoder_index(uint32_t const capid, uint32_t const range) { - return capid * 4 + range; - } - - __forceinline__ __device__ float compute_reco_correction_factor(float const par1, - float const par2, - float const par3, - float const x) { - return par3 * x * x + par2 * x + par1; - } - - // compute the charge using the adc, qie type and the appropriate qie shape array - __forceinline__ __device__ float compute_coder_charge( - int const qieType, uint8_t const adc, uint8_t const capid, float const* qieOffsets, float const* qieSlopes) { - auto const range = qieType == 0 ? (adc >> 5) & 0x3 : (adc >> 6) & 0x3; - auto const* qieShapeToUse = qieType == 0 ? qie8shape : qie11shape; - auto const nbins = qieType == 0 ? 32 : 64; - auto const center = adc % nbins == nbins - 1 ? 0.5 * (3 * qieShapeToUse[adc] - qieShapeToUse[adc - 1]) - : 0.5 * (qieShapeToUse[adc] + qieShapeToUse[adc + 1]); - auto const index = get_qiecoder_index(capid, range); - return (center - qieOffsets[index]) / qieSlopes[index]; - } - - // this is from - // https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HcalRecProducers/src/HBHEPhase1Reconstructor.cc#L140 - - __forceinline__ __device__ float compute_diff_charge_gain(int const qieType, - uint8_t adc, - uint8_t const capid, - float const* qieOffsets, - float const* qieSlopes, - bool const isqie11) { - constexpr uint32_t mantissaMaskQIE8 = 0x1fu; - constexpr uint32_t mantissaMaskQIE11 = 0x3f; - auto const mantissaMask = isqie11 ? mantissaMaskQIE11 : mantissaMaskQIE8; - auto const q = compute_coder_charge(qieType, adc, capid, qieOffsets, qieSlopes); - auto const mantissa = adc & mantissaMask; - - if (mantissa == 0u || mantissa == mantissaMask - 1u) - return compute_coder_charge(qieType, adc + 1u, capid, qieOffsets, qieSlopes) - q; - else if (mantissa == 1u || mantissa == mantissaMask) - return q - compute_coder_charge(qieType, adc - 1u, capid, qieOffsets, qieSlopes); - else { - auto const qup = compute_coder_charge(qieType, adc + 1u, capid, qieOffsets, qieSlopes); - auto const qdown = compute_coder_charge(qieType, adc - 1u, capid, qieOffsets, qieSlopes); - auto const upgain = qup - q; - auto const downgain = q - qdown; - auto const averagegain = (qup - qdown) / 2.f; - if (std::abs(upgain - downgain) < 0.01f * averagegain) - return averagegain; - else { - auto const q2up = compute_coder_charge(qieType, adc + 2u, capid, qieOffsets, qieSlopes); - auto const q2down = compute_coder_charge(qieType, adc - 2u, capid, qieOffsets, qieSlopes); - auto const upgain2 = q2up - qup; - auto const downgain2 = qdown - q2down; - if (std::abs(upgain2 - upgain) < std::abs(downgain2 - downgain)) - return upgain; - else - return downgain; - } - } - } - - // TODO: remove what's not needed - // originally from from RecoLocalCalo/HcalRecAlgos/src/PulseShapeFunctor.cc - __forceinline__ __device__ float compute_pulse_shape_value(float const pulse_time, - int const sample, - int const shift, - float const* acc25nsVec, - float const* diff25nsItvlVec, - float const* accVarLenIdxMinusOneVec, - float const* diffVarItvlIdxMinusOneVec, - float const* accVarLenIdxZeroVec, - float const* diffVarItvlIdxZeroVec) { - // constants - constexpr float slew = 0.f; - constexpr auto ns_per_bx = hcal::constants::nsPerBX; - - // FIXME: clean up all the rounding... this is coming from original cpu version - float const i_start_float = -hcal::constants::iniTimeShift - pulse_time - slew > 0.f - ? 0.f - : std::abs(-hcal::constants::iniTimeShift - pulse_time - slew) + 1.f; - int i_start = static_cast(i_start_float); - float offset_start = static_cast(i_start) - hcal::constants::iniTimeShift - pulse_time - slew; - // FIXME: do we need a check for nan??? -#ifdef HCAL_MAHI_GPUDEBUG - if (shift == 0) - printf("i_start_float = %f i_start = %d offset_start = %f\n", i_start_float, i_start, offset_start); -#endif - - // boundary - if (offset_start == 1.0f) { - offset_start = 0.f; - i_start -= 1; - } - -#ifdef HCAL_MAHI_GPUDEBUG - if (shift == 0) - printf("i_start_float = %f i_start = %d offset_start = %f\n", i_start_float, i_start, offset_start); -#endif - - int const bin_start = static_cast(offset_start); - auto const bin_start_up = static_cast(bin_start) + 0.5f; - int const bin_0_start = offset_start < bin_start_up ? bin_start - 1 : bin_start; - int const its_start = i_start / ns_per_bx; - int const distTo25ns_start = hcal::constants::nsPerBX - 1 - i_start % ns_per_bx; - auto const factor = offset_start - static_cast(bin_0_start) - 0.5; - -#ifdef HCAL_MAHI_GPUDEBUG - if (shift == 0) { - printf("bin_start = %d bin_0_start = %d its_start = %d distTo25ns_start = %d factor = %f\n", - bin_start, - bin_0_start, - its_start, - distTo25ns_start, - factor); - } -#endif - - auto const sample_over10ts = sample + shift; - float value = 0.0f; - if (sample_over10ts == its_start) { - value = bin_0_start == -1 - ? accVarLenIdxMinusOneVec[distTo25ns_start] + factor * diffVarItvlIdxMinusOneVec[distTo25ns_start] - : accVarLenIdxZeroVec[distTo25ns_start] + factor * diffVarItvlIdxZeroVec[distTo25ns_start]; - } else if (sample_over10ts > its_start) { - int const bin_idx = distTo25ns_start + 1 + (sample_over10ts - its_start - 1) * ns_per_bx + bin_0_start; - value = acc25nsVec[bin_idx] + factor * diff25nsItvlVec[bin_idx]; - } - return value; - } - - } // namespace reconstruction -} // namespace hcal - -#endif // RecoLocalCalo_HcalRecProducers_src_KernelHelpers_h diff --git a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu b/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu deleted file mode 100644 index 759dee43e72ba..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/MahiGPU.cu +++ /dev/null @@ -1,1272 +0,0 @@ -#include - -#include "DataFormats/CaloRecHit/interface/MultifitComputations.h" -// needed to compile with USER_CXXFLAGS="-DCOMPUTE_TDC_TIME" -#include "DataFormats/HcalRecHit/interface/HcalSpecialTimes.h" -#include "FWCore/Utilities/interface/CMSUnrollLoop.h" - -// TODO reuse some of the HCAL constats from -//#include "RecoLocalCalo/HcalRecAlgos/interface/HcalConstants.h" - -#include "SimpleAlgoGPU.h" -#include "KernelHelpers.h" - -#ifdef HCAL_MAHI_GPUDEBUG -#define DETID_TO_DEBUG 1125647428 -#endif - -namespace hcal { - namespace mahi { - - // TODO: provide constants from configuration - // from RecoLocalCalo/HcalRecProducers/python/HBHEMahiParameters_cfi.py - constexpr int nMaxItersMin = 50; - constexpr int nMaxItersNNLS = 500; - constexpr double nnlsThresh = 1e-11; - constexpr float deltaChi2Threashold = 1e-3; - - // from RecoLocalCalo/HcalRecProducers/src/HBHEPhase1Reconstructor.cc - __forceinline__ __device__ float get_raw_charge(double const charge, - double const pedestal, - float const* shrChargeMinusPedestal, - float const* parLin1Values, - float const* parLin2Values, - float const* parLin3Values, - int32_t const nsamplesForCompute, - int32_t const soi, - int const sipmQTSShift, - int const sipmQNTStoSum, - int const sipmType, - float const fcByPE, - bool const isqie11) { - float rawCharge; - - if (!isqie11) - rawCharge = charge; - else { - auto const parLin1 = parLin1Values[sipmType - 1]; - auto const parLin2 = parLin2Values[sipmType - 1]; - auto const parLin3 = parLin3Values[sipmType - 1]; - - int const first = std::max(soi + sipmQTSShift, 0); - int const last = std::min(soi + sipmQNTStoSum, nsamplesForCompute); - float sipmq = 0.0f; - for (auto ts = first; ts < last; ts++) - sipmq += shrChargeMinusPedestal[threadIdx.y * nsamplesForCompute + ts]; - auto const effectivePixelsFired = sipmq / fcByPE; - auto const factor = - hcal::reconstruction::compute_reco_correction_factor(parLin1, parLin2, parLin3, effectivePixelsFired); - rawCharge = (charge - pedestal) * factor + pedestal; - -#ifdef HCAL_MAHI_GPUDEBUG - printf("first = %d last = %d sipmQ = %f factor = %f rawCharge = %f\n", first, last, sipmq, factor, rawCharge); -#endif - } - return rawCharge; - } - - // Assume: same number of samples for HB and HE - // TODO: add/validate restrict (will increase #registers in use by the kernel) - __global__ void kernel_prep1d_sameNumberOfSamples(float* amplitudes, - float* noiseTerms, - float* electronicNoiseTerms, - float* outputEnergy, - float* outputChi2, - uint16_t const* dataf01HE, - uint16_t const* dataf5HB, - uint16_t const* dataf3HB, - uint32_t const* idsf01HE, - uint32_t const* idsf5HB, - uint32_t const* idsf3HB, - uint32_t const stridef01HE, - uint32_t const stridef5HB, - uint32_t const stridef3HB, - uint32_t const nchannelsf01HE, - uint32_t const nchannelsf5HB, - uint8_t const* npresamplesf5HB, - int8_t* soiSamples, - float* method0Energy, - float* method0Time, - uint32_t* outputdid, - uint32_t const nchannels, - uint32_t const* qualityStatus, - uint32_t const* recoParam1Values, - uint32_t const* recoParam2Values, - float const* qieCoderOffsets, - float const* qieCoderSlopes, - int const* qieTypes, - float const* pedestalWidths, - float const* effectivePedestalWidths, - float const* pedestals, - float const* effectivePedestals, - bool const useEffectivePedestals, - int const* sipmTypeValues, - float const* fcByPEValues, - float const* parLin1Values, - float const* parLin2Values, - float const* parLin3Values, - float const* gainValues, - float const* respCorrectionValues, - int const maxDepthHB, - int const maxDepthHE, - int const maxPhiHE, - int const firstHBRing, - int const lastHBRing, - int const firstHERing, - int const lastHERing, - int const nEtaHB, - int const nEtaHE, - int const sipmQTSShift, - int const sipmQNTStoSum, - int const firstSampleShift, - uint32_t const offsetForHashes, - float const ts4Thresh, - int const startingSample) { - // indices + runtime constants - auto const sample = threadIdx.x + startingSample; - auto const sampleWithinWindow = threadIdx.x; - int32_t const nsamplesForCompute = blockDim.x; - auto const lch = threadIdx.y; - auto const gch = lch + blockDim.y * blockIdx.x; - auto const nchannels_per_block = blockDim.y; - auto const linearThPerBlock = threadIdx.x + threadIdx.y * blockDim.x; - - // remove - if (gch >= nchannels) - return; - - // initialize all output buffers - if (sampleWithinWindow == 0) { - outputdid[gch] = 0; - method0Energy[gch] = 0; - method0Time[gch] = 0; - outputEnergy[gch] = 0; - outputChi2[gch] = 0; - soiSamples[gch] = -1; - } - -#ifdef HCAL_MAHI_GPUDEBUG -#ifdef HCAL_MAHI_GPUDEBUG_SINGLECHANNEL - if (gch > 0) - return; -#endif -#endif - - // configure shared mem - extern __shared__ char smem[]; - float* shrEnergyM0PerTS = reinterpret_cast(smem); - float* shrChargeMinusPedestal = shrEnergyM0PerTS + nsamplesForCompute * nchannels_per_block; - float* shrMethod0EnergyAccum = shrChargeMinusPedestal + nsamplesForCompute * nchannels_per_block; - float* shrEnergyM0TotalAccum = shrMethod0EnergyAccum + nchannels_per_block; - unsigned long long int* shrMethod0EnergySamplePair = - reinterpret_cast(shrEnergyM0TotalAccum + nchannels_per_block); - if (sampleWithinWindow == 0) { - shrMethod0EnergyAccum[lch] = 0; - shrMethod0EnergySamplePair[lch] = __float_as_uint(std::numeric_limits::min()); - shrEnergyM0TotalAccum[lch] = 0; - } - - // offset output - auto* amplitudesForChannel = amplitudes + nsamplesForCompute * gch; - auto* noiseTermsForChannel = noiseTerms + nsamplesForCompute * gch; - auto* electronicNoiseTermsForChannel = electronicNoiseTerms + nsamplesForCompute * gch; - auto const nchannelsf015 = nchannelsf01HE + nchannelsf5HB; - - // get event input quantities - auto const stride = gch < nchannelsf01HE ? stridef01HE : (gch < nchannelsf015 ? stridef5HB : stridef3HB); - auto const nsamples = gch < nchannelsf01HE ? compute_nsamples(stride) - : (gch < nchannelsf015 ? compute_nsamples(stride) - : compute_nsamples(stride)); - -#ifdef HCAL_MAHI_GPUDEBUG - assert(nsamples == nsamplesForCompute || nsamples - startingSample == nsamplesForCompute); -#endif - - auto const id = gch < nchannelsf01HE - ? idsf01HE[gch] - : (gch < nchannelsf015 ? idsf5HB[gch - nchannelsf01HE] : idsf3HB[gch - nchannelsf015]); - auto const did = HcalDetId{id}; - - auto const adc = - gch < nchannelsf01HE - ? adc_for_sample(dataf01HE + stride * gch, sample) - : (gch < nchannelsf015 ? adc_for_sample(dataf5HB + stride * (gch - nchannelsf01HE), sample) - : adc_for_sample(dataf3HB + stride * (gch - nchannelsf015), sample)); - auto const capid = - gch < nchannelsf01HE - ? capid_for_sample(dataf01HE + stride * gch, sample) - : (gch < nchannelsf015 ? capid_for_sample(dataf5HB + stride * (gch - nchannelsf01HE), sample) - : capid_for_sample(dataf3HB + stride * (gch - nchannelsf015), sample)); - -#ifdef HCAL_MAHI_GPUDEBUG -#ifdef HCAL_MAHI_GPUDEBUG_FILTERDETID - if (id != DETID_TO_DEBUG) - return; -#endif -#endif - - // compute hash for this did - auto const hashedId = - did.subdetId() == HcalBarrel - ? hcal::reconstruction::did2linearIndexHB(id, maxDepthHB, firstHBRing, lastHBRing, nEtaHB) - : hcal::reconstruction::did2linearIndexHE(id, maxDepthHE, maxPhiHE, firstHERing, lastHERing, nEtaHE) + - offsetForHashes; - - // conditions based on the hash - // FIXME: remove hardcoded values - auto const qieType = qieTypes[hashedId] > 0 ? 1 : 0; // 2 types at this point - auto const* qieOffsets = qieCoderOffsets + hashedId * HcalQIECodersGPU::numValuesPerChannel; - auto const* qieSlopes = qieCoderSlopes + hashedId * HcalQIECodersGPU::numValuesPerChannel; - auto const* pedestalsForChannel = pedestals + hashedId * 4; - auto const* pedestalWidthsForChannel = useEffectivePedestals && (gch < nchannelsf01HE || gch >= nchannelsf015) - ? effectivePedestalWidths + hashedId * 4 - : pedestalWidths + hashedId * 4; - - auto const* gains = gainValues + hashedId * 4; - auto const gain = gains[capid]; - auto const gain0 = gains[0]; - auto const respCorrection = respCorrectionValues[hashedId]; - auto const pedestal = pedestalsForChannel[capid]; - auto const pedestalWidth = pedestalWidthsForChannel[capid]; - // if needed, only use effective pedestals for f01 - auto const pedestalToUseForMethod0 = useEffectivePedestals && (gch < nchannelsf01HE || gch >= nchannelsf015) - ? effectivePedestals[hashedId * 4 + capid] - : pedestal; - auto const sipmType = sipmTypeValues[hashedId]; - auto const fcByPE = fcByPEValues[hashedId]; - auto const recoParam1 = recoParam1Values[hashedId]; - auto const recoParam2 = recoParam2Values[hashedId]; - -#ifdef HCAL_MAHI_GPUDEBUG - printf("qieType = %d qieOffset0 = %f qieOffset1 = %f qieSlope0 = %f qieSlope1 = %f\n", - qieType, - qieOffsets[0], - qieOffsets[1], - qieSlopes[0], - qieSlopes[1]); -#endif - - // compute charge - auto const charge = hcal::reconstruction::compute_coder_charge(qieType, adc, capid, qieOffsets, qieSlopes); - - shrChargeMinusPedestal[linearThPerBlock] = charge - pedestal; - if (gch < nchannelsf01HE) { - // NOTE: assume that soi is high only for a single guy! - // which must be the case. cpu version does not check for that - // if that is not the case, we will see that with cuda mmecheck - auto const soibit = soibit_for_sample(dataf01HE + stride * gch, sample); - if (soibit == 1) - soiSamples[gch] = sampleWithinWindow; - } else if (gch >= nchannelsf015) { - auto const soibit = soibit_for_sample(dataf3HB + stride * (gch - nchannelsf015), sample); - if (soibit == 1) - soiSamples[gch] = sampleWithinWindow; - } - __syncthreads(); - int32_t const soi = gch < nchannelsf01HE - ? soiSamples[gch] - : (gch < nchannelsf015 ? npresamplesf5HB[gch - nchannelsf01HE] : soiSamples[gch]); - - bool badSOI = (soi < 0 or soi >= nsamplesForCompute); - if (badSOI and sampleWithinWindow == 0) { -#ifdef GPU_DEBUG - printf("Found HBHE channel %d with invalid SOI %d\n", gch, soi); -#endif - // mark the channel as bad - outputChi2[gch] = -9999.f; - } - - //int32_t const soi = gch >= nchannelsf01HE - // ? npresamplesf5HB[gch - nchannelsf01HE] - // : soiSamples[gch]; - // this is here just to make things uniform... - if (gch >= nchannelsf01HE && gch < nchannelsf015 && sampleWithinWindow == 0) - soiSamples[gch] = npresamplesf5HB[gch - nchannelsf01HE]; - - // - // compute various quantities (raw charge and tdc stuff) - // NOTE: this branch will be divergent only for a single warp that - // sits on the boundary when flavor 01 channels end and flavor 5 start - // - float const rawCharge = get_raw_charge(charge, - pedestal, - shrChargeMinusPedestal, - parLin1Values, - parLin2Values, - parLin3Values, - nsamplesForCompute, - soi, - sipmQTSShift, - sipmQNTStoSum, - sipmType, - fcByPE, - gch < nchannelsf01HE || gch >= nchannelsf015); - - auto const dfc = hcal::reconstruction::compute_diff_charge_gain( - qieType, adc, capid, qieOffsets, qieSlopes, gch < nchannelsf01HE || gch >= nchannelsf015); - -#ifdef COMPUTE_TDC_TIME - float tdcTime; - if (gch >= nchannelsf01HE && gch < nchannelsf015) { - tdcTime = HcalSpecialTimes::UNKNOWN_T_NOTDC; - } else { - if (gch < nchannelsf01HE) - tdcTime = HcalSpecialTimes::getTDCTime(tdc_for_sample(dataf01HE + stride * gch, sample)); - else if (gch >= nchannelsf015) - tdcTime = - HcalSpecialTimes::getTDCTime(tdc_for_sample(dataf3HB + stride * (gch - nchannelsf015), sample)); - } -#endif // COMPUTE_TDC_TIME - - // compute method 0 quantities - // TODO: need to apply containment - // TODO: need to apply time slew - // TODO: for < run 3, apply HBM legacy energy correction - auto const nsamplesToAdd = recoParam1 < 10 ? recoParam2 : (recoParam1 >> 14) & 0xF; - auto const startSampleTmp = soi + firstSampleShift; - auto const startSample = startSampleTmp < 0 ? 0 : startSampleTmp; - auto const endSample = - startSample + nsamplesToAdd < nsamplesForCompute ? startSample + nsamplesToAdd : nsamplesForCompute; - // NOTE: gain is a small number < 10^-3, multiply it last - auto const energym0_per_ts = gain * ((rawCharge - pedestalToUseForMethod0) * respCorrection); - auto const energym0_per_ts_gain0 = gain0 * ((rawCharge - pedestalToUseForMethod0) * respCorrection); - // store to shared mem - shrEnergyM0PerTS[lch * nsamplesForCompute + sampleWithinWindow] = energym0_per_ts; - atomicAdd(&shrEnergyM0TotalAccum[lch], energym0_per_ts_gain0); - -#ifdef HCAL_MAHI_GPUDEBUG - printf( - "id = %u sample = %d gch = %d hashedId = %u adc = %u capid = %u\n" - " charge = %f rawCharge = %f dfc = %f pedestal = %f\n" - " gain = %f respCorrection = %f energym0_per_ts = %f\n", - id, - sample, - gch, - hashedId, - adc, - capid, - charge, - rawCharge, - dfc, - pedestalToUseForMethod0, - gain, - respCorrection, - energym0_per_ts); - printf( - "startSample = %d endSample = %d param1 = %u param2 = %u\n", startSample, endSample, recoParam1, recoParam2); -#endif - - if (sampleWithinWindow >= startSample && sampleWithinWindow < endSample) { - atomicAdd(&shrMethod0EnergyAccum[lch], energym0_per_ts); - // pack sample, energy as 64 bit value - unsigned long long int old = shrMethod0EnergySamplePair[lch], assumed; - unsigned long long int val = - (static_cast(sampleWithinWindow) << 32) + __float_as_uint(energym0_per_ts); - do { - assumed = old; - // decode energy, sample values - //int const current_sample = (assumed >> 32) & 0xffffffff; - float const current_energy = __uint_as_float(assumed & 0xffffffff); - if (energym0_per_ts > current_energy) - old = atomicCAS(&shrMethod0EnergySamplePair[lch], assumed, val); - else - break; - } while (assumed != old); - } - __syncthreads(); - - // NOTE: must take soi, as values for that thread are used... - // NOTE: does not run if soi is bad, because it does not match any sampleWithinWindow - if (sampleWithinWindow == soi) { - auto const method0_energy = shrMethod0EnergyAccum[lch]; - auto const val = shrMethod0EnergySamplePair[lch]; - int const max_sample = (val >> 32) & 0xffffffff; - float const max_energy = __uint_as_float(val & 0xffffffff); - float const max_energy_1 = - max_sample < nsamplesForCompute - 1 ? shrEnergyM0PerTS[lch * nsamplesForCompute + max_sample + 1] : 0.f; - float const position = nsamplesToAdd < nsamplesForCompute ? max_sample - soi : max_sample; - auto const sum = max_energy + max_energy_1; - // FIXME: for full comparison with cpu method 0 timing, - // need to correct by slew - // requires an accumulator -> more shared mem -> omit here unless - // really needed - float const time = - max_energy > 0.f && max_energy_1 > 0.f ? 25.f * (position + max_energy_1 / sum) : 25.f * position; - - // store method0 quantities to global mem - outputdid[gch] = id; - method0Energy[gch] = method0_energy; - method0Time[gch] = time; - -#ifdef HCAL_MAHI_GPUDEBUG - printf("tsTOT = %f tstrig = %f ts4Thresh = %f\n", shrEnergyM0TotalAccum[lch], energym0_per_ts_gain0, ts4Thresh); -#endif - - // Channel quality check - // https://github.com/cms-sw/cmssw/blob/master/RecoLocalCalo/HcalRecAlgos/plugins/HcalChannelPropertiesEP.cc#L107-L109 - // https://github.com/cms-sw/cmssw/blob/6d2f66057131baacc2fcbdd203588c41c885b42c/CondCore/HcalPlugins/plugins/HcalChannelQuality_PayloadInspector.cc#L30 - // const bool taggedBadByDb = severity.dropChannel(digistatus->getValue()); - // do not run MAHI if taggedBadByDb = true - - auto const digiStatus_ = qualityStatus[hashedId]; - const bool taggedBadByDb = (digiStatus_ / 32770); - - if (taggedBadByDb) - outputChi2[gch] = -9999.f; - - // check as in cpu version if mahi is not needed - // FIXME: KNOWN ISSUE: observed a problem when rawCharge and pedestal - // are basically equal and generate -0.00000... - // needs to be treated properly - if (!(shrEnergyM0TotalAccum[lch] > 0 && energym0_per_ts_gain0 > ts4Thresh)) { - // do not need to run mahi minimization - //outputEnergy[gch] = 0; energy already inited to 0 - outputChi2[gch] = -9999.f; - } - -#ifdef HCAL_MAHI_GPUDEBUG - printf("method0_energy = %f max_sample = %d max_energy = %f time = %f\n", - method0_energy, - max_sample, - max_energy, - time); -#endif - } - - // - // preparations for mahi fit - // - auto const amplitude = rawCharge - pedestalToUseForMethod0; - auto const noiseADC = (1. / std::sqrt(12)) * dfc; - auto const noisePhotoSq = amplitude > pedestalWidth ? (amplitude * fcByPE) : 0.f; - auto const noiseTerm = noiseADC * noiseADC + noisePhotoSq + pedestalWidth * pedestalWidth; - -#ifdef HCAL_MAHI_GPUDEBUG - printf( - "charge(%d) = %f pedestal(%d) = %f dfc(%d) = %f pedestalWidth(%d) = %f noiseADC(%d) = %f noisPhoto(%d) = " - "%f\n", - sample, - rawCharge, - sample, - pedestalToUseForMethod0, - sample, - dfc, - sample, - pedestalWidth, - sample, - noiseADC, - sample, - noisePhotoSq); -#endif - - // store to global memory - amplitudesForChannel[sampleWithinWindow] = amplitude; - noiseTermsForChannel[sampleWithinWindow] = noiseTerm; - electronicNoiseTermsForChannel[sampleWithinWindow] = pedestalWidth; - } - - // TODO: need to add an array of offsets for pulses (a la activeBXs...) - // Assume for now 8 pulses - __global__ void kernel_prep_pulseMatrices_sameNumberOfSamples(float* pulseMatrices, - float* pulseMatricesM, - float* pulseMatricesP, - int const* pulseOffsets, - float const* amplitudes, - uint32_t const* idsf01HE, - uint32_t const* idsf5HB, - uint32_t const* idsf3HB, - uint32_t const nchannelsf01HE, - uint32_t const nchannelsf5HB, - uint32_t const nchannelsTotal, - int8_t const* soiSamples, - uint32_t const* recoPulseShapeIds, - float const* acc25nsVecValues, - float const* diff25nsItvlVecValues, - float const* accVarLenIdxMinusOneVecValues, - float const* diffVarItvlIdxMinusOneVecValues, - float const* accVarLenIdxZeroVecValues, - float const* diffVarItvlIdxZeroVecValues, - float const meanTime, - float const timeSigmaSiPM, - float const timeSigmaHPD, - int const maxDepthHB, - int const maxDepthHE, - int const maxPhiHE, - int const firstHBRing, - int const lastHBRing, - int const firstHERing, - int const lastHERing, - int const nEtaHB, - int const nEtaHE, - uint32_t const offsetForHashes, - bool const applyTimeSlew, - float const tzeroTimeSlew, - float const slopeTimeSlew, - float const tmaxTimeSlew) { - // indices - auto const ipulse = threadIdx.y; - auto const npulses = blockDim.y; - auto const sample = threadIdx.x; - auto const nsamples = blockDim.x; - auto const lch = threadIdx.z; - auto const gch = lch + blockIdx.x * blockDim.z; - auto const nchannelsf015 = nchannelsf01HE + nchannelsf5HB; - - if (gch >= nchannelsTotal) - return; - - // conditions - auto const id = gch < nchannelsf01HE - ? idsf01HE[gch] - : (gch < nchannelsf015 ? idsf5HB[gch - nchannelsf01HE] : idsf3HB[gch - nchannelsf015]); - //auto const id = gch >= nchannelsf01HE - // ? idsf5HB[gch - nchannelsf01HE] - // : idsf01HE[gch]; - auto const deltaT = gch >= nchannelsf01HE && gch < nchannelsf015 ? timeSigmaHPD : timeSigmaSiPM; - auto const did = DetId{id}; - auto const hashedId = - did.subdetId() == HcalBarrel - ? hcal::reconstruction::did2linearIndexHB(id, maxDepthHB, firstHBRing, lastHBRing, nEtaHB) - : hcal::reconstruction::did2linearIndexHE(id, maxDepthHE, maxPhiHE, firstHERing, lastHERing, nEtaHE) + - offsetForHashes; - auto const recoPulseShapeId = recoPulseShapeIds[hashedId]; - auto const* acc25nsVec = acc25nsVecValues + recoPulseShapeId * hcal::constants::maxPSshapeBin; - auto const* diff25nsItvlVec = diff25nsItvlVecValues + recoPulseShapeId * hcal::constants::maxPSshapeBin; - auto const* accVarLenIdxMinusOneVec = accVarLenIdxMinusOneVecValues + recoPulseShapeId * hcal::constants::nsPerBX; - auto const* diffVarItvlIdxMinusOneVec = - diffVarItvlIdxMinusOneVecValues + recoPulseShapeId * hcal::constants::nsPerBX; - auto const* accVarLenIdxZeroVec = accVarLenIdxZeroVecValues + recoPulseShapeId * hcal::constants::nsPerBX; - auto const* diffVarItvlIdxZeroVec = diffVarItvlIdxZeroVecValues + recoPulseShapeId * hcal::constants::nsPerBX; - - // offset output arrays - auto* pulseMatrix = pulseMatrices + nsamples * npulses * gch; - auto* pulseMatrixM = pulseMatricesM + nsamples * npulses * gch; - auto* pulseMatrixP = pulseMatricesP + nsamples * npulses * gch; - - // amplitude per ipulse - int const soi = soiSamples[gch]; - int const pulseOffset = pulseOffsets[ipulse]; - auto const amplitude = amplitudes[gch * nsamples + pulseOffset + soi]; - -#ifdef HCAL_MAHI_GPUDEBUG -#ifdef HCAL_MAHI_GPUDEBUG_FILTERDETID - if (id != DETID_TO_DEBUG) - return; -#endif -#endif - -#ifdef HCAL_MAHI_GPUDEBUG - if (sample == 0 && ipulse == 0) { - for (int i = 0; i < 8; i++) - printf("amplitude(%d) = %f\n", i, amplitudes[gch * nsamples + i]); - printf("acc25nsVec and diff25nsItvlVec for recoPulseShapeId = %u\n", recoPulseShapeId); - for (int i = 0; i < 256; i++) { - printf("acc25nsVec(%d) = %f diff25nsItvlVec(%d) = %f\n", i, acc25nsVec[i], i, diff25nsItvlVec[i]); - } - printf("accVarLenIdxZEROVec and accVarLenIdxMinusOneVec\n"); - for (int i = 0; i < 25; i++) { - printf("accVarLenIdxZEROVec(%d) = %f accVarLenIdxMinusOneVec(%d) = %f\n", - i, - accVarLenIdxZeroVec[i], - i, - accVarLenIdxMinusOneVec[i]); - } - printf("diffVarItvlIdxZEROVec and diffVarItvlIdxMinusOneVec\n"); - for (int i = 0; i < 25; i++) { - printf("diffVarItvlIdxZEROVec(%d) = %f diffVarItvlIdxMinusOneVec(%d) = %f\n", - i, - diffVarItvlIdxZeroVec[i], - i, - diffVarItvlIdxMinusOneVec[i]); - } - } -#endif - - auto t0 = meanTime; - if (applyTimeSlew) { - if (amplitude <= 1.0f) - t0 += hcal::reconstruction::compute_time_slew_delay(1.0, tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew); - else - t0 += hcal::reconstruction::compute_time_slew_delay(amplitude, tzeroTimeSlew, slopeTimeSlew, tmaxTimeSlew); - } - auto const t0m = -deltaT + t0; - auto const t0p = deltaT + t0; - -#ifdef HCAL_MAHI_GPUDEBUG - if (sample == 0 && ipulse == 0) { - printf("time values: %f %f %f\n", t0, t0m, t0p); - } - - if (sample == 0 && ipulse == 0) { - for (int i = 0; i < hcal::constants::maxSamples; i++) { - auto const value = hcal::reconstruction::compute_pulse_shape_value(t0, - i, - 0, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec); - printf("pulse(%d) = %f\n", i, value); - } - printf("\n"); - for (int i = 0; i < hcal::constants::maxSamples; i++) { - auto const value = hcal::reconstruction::compute_pulse_shape_value(t0p, - i, - 0, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec); - printf("pulseP(%d) = %f\n", i, value); - } - printf("\n"); - for (int i = 0; i < hcal::constants::maxSamples; i++) { - auto const value = hcal::reconstruction::compute_pulse_shape_value(t0m, - i, - 0, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec); - printf("pulseM(%d) = %f\n", i, value); - } - } -#endif - - // FIXME: shift should be treated properly, - // here assume 8 time slices and 8 samples - auto const shift = 4 - soi; // as in cpu version! - - // auto const offset = ipulse - soi; - // auto const idx = sample - offset; - int32_t const idx = sample - pulseOffset; - auto const value = idx >= 0 && idx < nsamples - ? hcal::reconstruction::compute_pulse_shape_value(t0, - idx, - shift, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec) - : 0; - auto const value_t0m = idx >= 0 && idx < nsamples - ? hcal::reconstruction::compute_pulse_shape_value(t0m, - idx, - shift, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec) - : 0; - auto const value_t0p = idx >= 0 && idx < nsamples - ? hcal::reconstruction::compute_pulse_shape_value(t0p, - idx, - shift, - acc25nsVec, - diff25nsItvlVec, - accVarLenIdxMinusOneVec, - diffVarItvlIdxMinusOneVec, - accVarLenIdxZeroVec, - diffVarItvlIdxZeroVec) - : 0; - - // store to global - if (amplitude > 0.f) { - pulseMatrix[ipulse * nsamples + sample] = value; - pulseMatrixM[ipulse * nsamples + sample] = value_t0m; - pulseMatrixP[ipulse * nsamples + sample] = value_t0p; - } else { - pulseMatrix[ipulse * nsamples + sample] = 0.f; - pulseMatrixM[ipulse * nsamples + sample] = 0.f; - pulseMatrixP[ipulse * nsamples + sample] = 0.f; - } - } - - template - __forceinline__ __device__ void update_covariance( - calo::multifit::ColumnVector const& resultAmplitudesVector, - calo::multifit::MapSymM& covarianceMatrix, - Eigen::Map> const& pulseMatrix, - Eigen::Map> const& pulseMatrixM, - Eigen::Map> const& pulseMatrixP) { - CMS_UNROLL_LOOP - for (int ipulse = 0; ipulse < NPULSES; ipulse++) { - auto const resultAmplitude = resultAmplitudesVector(ipulse); - if (resultAmplitude == 0) - continue; - -#ifdef HCAL_MAHI_GPUDEBUG - printf("pulse cov array for ibx = %d\n", ipulse); -#endif - - // preload a column - float pmcol[NSAMPLES], pmpcol[NSAMPLES], pmmcol[NSAMPLES]; - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) { - pmcol[counter] = __ldg(&pulseMatrix.coeffRef(counter, ipulse)); - pmpcol[counter] = __ldg(&pulseMatrixP.coeffRef(counter, ipulse)); - pmmcol[counter] = __ldg(&pulseMatrixM.coeffRef(counter, ipulse)); - } - - auto const ampl2 = resultAmplitude * resultAmplitude; - CMS_UNROLL_LOOP - for (int col = 0; col < NSAMPLES; col++) { - auto const valueP_col = pmpcol[col]; - auto const valueM_col = pmmcol[col]; - auto const value_col = pmcol[col]; - auto const tmppcol = valueP_col - value_col; - auto const tmpmcol = valueM_col - value_col; - - // diagonal - auto tmp_value = 0.5 * (tmppcol * tmppcol + tmpmcol * tmpmcol); - covarianceMatrix(col, col) += ampl2 * tmp_value; - - // FIXME: understand if this actually gets unrolled - CMS_UNROLL_LOOP - for (int row = col + 1; row < NSAMPLES; row++) { - float const valueP_row = pmpcol[row]; //pulseMatrixP(j, ipulseReal); - float const value_row = pmcol[row]; //pulseMatrix(j, ipulseReal); - float const valueM_row = pmmcol[row]; //pulseMatrixM(j, ipulseReal); - - float tmpprow = valueP_row - value_row; - float tmpmrow = valueM_row - value_row; - - auto const covValue = 0.5 * (tmppcol * tmpprow + tmpmcol * tmpmrow); - - covarianceMatrix(row, col) += ampl2 * covValue; - } - } - } - } - - template - __global__ void kernel_minimize(float* outputEnergy, - float* outputChi2, - float const* __restrict__ inputAmplitudes, - float const* __restrict__ pulseMatrices, - float const* __restrict__ pulseMatricesM, - float const* __restrict__ pulseMatricesP, - int const* __restrict__ pulseOffsetValues, - float const* __restrict__ noiseTerms, - float const* __restrict__ electronicNoiseTerms, - int8_t const* __restrict__ soiSamples, - float const* __restrict__ noiseCorrelationValues, - float const* __restrict__ pedestalWidths, - float const* __restrict__ effectivePedestalWidths, - bool const useEffectivePedestals, - uint32_t const* __restrict__ idsf01HE, - uint32_t const* __restrict__ idsf5HB, - uint32_t const* __restrict__ idsf3HB, - float const* __restrict__ gainValues, - float const* __restrict__ respCorrectionValues, - uint32_t const nchannelsf01HE, - uint32_t const nchannelsf5HB, - uint32_t const nchannelsTotal, - uint32_t const offsetForHashes, - int const maxDepthHB, - int const maxDepthHE, - int const maxPhiHE, - int const firstHBRing, - int const lastHBRing, - int const firstHERing, - int const lastHERing, - int const nEtaHB, - int const nEtaHE) { - // can be relaxed if needed - minor updates are needed in that case! - static_assert(NPULSES == NSAMPLES); - - // indices - auto const gch = threadIdx.x + blockIdx.x * blockDim.x; - auto const nchannelsf015 = nchannelsf01HE + nchannelsf5HB; - if (gch >= nchannelsTotal) - return; - - // if chi2 is set to -9999 do not run minimization - if (outputChi2[gch] == -9999.f) - return; - - // configure shared mem - extern __shared__ char shrmem[]; - float* shrMatrixLFnnlsStorage = - reinterpret_cast(shrmem) + calo::multifit::MapSymM::total * threadIdx.x; - float* shrAtAStorage = reinterpret_cast(shrmem) + - calo::multifit::MapSymM::total * (threadIdx.x + blockDim.x); - - // conditions for pedestal widths - auto const id = gch < nchannelsf01HE - ? idsf01HE[gch] - : (gch < nchannelsf015 ? idsf5HB[gch - nchannelsf01HE] : idsf3HB[gch - nchannelsf015]); - auto const did = DetId{id}; - auto const hashedId = - did.subdetId() == HcalBarrel - ? hcal::reconstruction::did2linearIndexHB(id, maxDepthHB, firstHBRing, lastHBRing, nEtaHB) - : hcal::reconstruction::did2linearIndexHE(id, maxDepthHE, maxPhiHE, firstHERing, lastHERing, nEtaHE) + - offsetForHashes; - - auto const* pedestalWidthsForChannel = useEffectivePedestals && (gch < nchannelsf01HE || gch >= nchannelsf015) - ? effectivePedestalWidths + hashedId * 4 - : pedestalWidths + hashedId * 4; - auto const averagePedestalWidth2 = 0.25 * (pedestalWidthsForChannel[0] * pedestalWidthsForChannel[0] + - pedestalWidthsForChannel[1] * pedestalWidthsForChannel[1] + - pedestalWidthsForChannel[2] * pedestalWidthsForChannel[2] + - pedestalWidthsForChannel[3] * pedestalWidthsForChannel[3]); - - auto const* gains = gainValues + hashedId * 4; - // FIXME on cpu ts 0 capid was used - does it make any difference - auto const gain = gains[0]; - auto const respCorrection = respCorrectionValues[hashedId]; - - auto const noisecorr = noiseCorrelationValues[hashedId]; - -#ifdef HCAL_MAHI_GPUDEBUG -#ifdef HCAL_MAHI_GPUDEBUG_FILTERDETID - if (id != DETID_TO_DEBUG) - return; -#endif -#endif - - /* - // TODO: provide this properly - int const soi = soiSamples[gch]; - */ - calo::multifit::ColumnVector pulseOffsets; - CMS_UNROLL_LOOP - for (int i = 0; i < NPULSES; ++i) - pulseOffsets(i) = i; - // pulseOffsets(i) = pulseOffsetValues[i] - pulseOffsetValues[0]; - - // output amplitudes/weights - calo::multifit::ColumnVector resultAmplitudesVector = calo::multifit::ColumnVector::Zero(); - - // map views - Eigen::Map> inputAmplitudesView{inputAmplitudes + gch * NSAMPLES}; - Eigen::Map> noiseTermsView{noiseTerms + gch * NSAMPLES}; - Eigen::Map> noiseElectronicView{electronicNoiseTerms + - gch * NSAMPLES}; - Eigen::Map> glbPulseMatrixMView{pulseMatricesM + - gch * NSAMPLES * NPULSES}; - Eigen::Map> glbPulseMatrixPView{pulseMatricesP + - gch * NSAMPLES * NPULSES}; - Eigen::Map> glbPulseMatrixView{pulseMatrices + - gch * NSAMPLES * NPULSES}; - -#ifdef HCAL_MAHI_GPUDEBUG - for (int i = 0; i < NSAMPLES; i++) - printf("inputValues(%d) = %f noiseTerms(%d) = %f\n", i, inputAmplitudesView(i), i, noiseTermsView(i)); - for (int i = 0; i < NSAMPLES; i++) { - for (int j = 0; j < NPULSES; j++) - printf("%f ", glbPulseMatrixView(i, j)); - printf("\n"); - } - printf("\n"); - for (int i = 0; i < NSAMPLES; i++) { - for (int j = 0; j < NPULSES; j++) - printf("%f ", glbPulseMatrixMView(i, j)); - printf("\n"); - } - printf("\n"); - for (int i = 0; i < NSAMPLES; i++) { - for (int j = 0; j < NPULSES; j++) - printf("%f ", glbPulseMatrixPView(i, j)); - printf("\n"); - } -#endif - - int npassive = 0; - float chi2 = 0, previous_chi2 = 0.f, chi2_2itersback = 0.f; - for (int iter = 1; iter < nMaxItersMin; iter++) { - //float covarianceMatrixStorage[MapSymM::total]; - // NOTE: only works when NSAMPLES == NPULSES - // if does not hold -> slightly rearrange shared mem to still reuse - // shared memory - float* covarianceMatrixStorage = shrMatrixLFnnlsStorage; - calo::multifit::MapSymM covarianceMatrix{covarianceMatrixStorage}; - CMS_UNROLL_LOOP - for (int counter = 0; counter < calo::multifit::MapSymM::total; counter++) - covarianceMatrixStorage[counter] = (noisecorr != 0.f) ? 0.f : averagePedestalWidth2; - CMS_UNROLL_LOOP - for (unsigned int counter = 0; counter < calo::multifit::MapSymM::stride; counter++) { - covarianceMatrix(counter, counter) += noiseTermsView.coeffRef(counter); - if (counter != 0) - covarianceMatrix(counter, counter - 1) += noisecorr * __ldg(&noiseElectronicView.coeffRef(counter - 1)) * - __ldg(&noiseElectronicView.coeffRef(counter)); - } - - // update covariance matrix - update_covariance( - resultAmplitudesVector, covarianceMatrix, glbPulseMatrixView, glbPulseMatrixMView, glbPulseMatrixPView); - -#ifdef HCAL_MAHI_GPUDEBUG - printf("covariance matrix\n"); - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) - printf("%f ", covarianceMatrix(i, j)); - printf("\n"); - } -#endif - - // compute Cholesky Decomposition L matrix - //matrixDecomposition.compute(covarianceMatrix); - //auto const& matrixL = matrixDecomposition.matrixL(); - float matrixLStorage[calo::multifit::MapSymM::total]; - calo::multifit::MapSymM matrixL{matrixLStorage}; - calo::multifit::compute_decomposition_unrolled(matrixL, covarianceMatrix); - - // - // replace eigen - // - //auto const& A = matrixDecomposition - // .matrixL() - // .solve(pulseMatrixView); - calo::multifit::ColMajorMatrix A; - calo::multifit::solve_forward_subst_matrix(A, glbPulseMatrixView, matrixL); - - // - // remove eigen - // - //auto const& b = matrixL - // .solve(inputAmplitudesView); - // - float reg_b[NSAMPLES]; - calo::multifit::solve_forward_subst_vector(reg_b, inputAmplitudesView, matrixL); - - // TODO: we do not really need to change these matrcies - // will be fixed in the optimized version - //ColMajorMatrix AtA = A.transpose() * A; - //ColumnVector Atb = A.transpose() * b; - //ColMajorMatrix AtA; - //float AtAStorage[MapSymM::total]; - calo::multifit::MapSymM AtA{shrAtAStorage}; - calo::multifit::ColumnVector Atb; - CMS_UNROLL_LOOP - for (int icol = 0; icol < NPULSES; icol++) { - float reg_ai[NSAMPLES]; - - // load column icol - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) - reg_ai[counter] = A(counter, icol); - - // compute diagonal - float sum = 0.f; - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) - sum += reg_ai[counter] * reg_ai[counter]; - - // store - AtA(icol, icol) = sum; - - // go thru the other columns - CMS_UNROLL_LOOP - for (int j = icol + 1; j < NPULSES; j++) { - // load column j - float reg_aj[NSAMPLES]; - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) - reg_aj[counter] = A(counter, j); - - // accum - float sum = 0.f; - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) - sum += reg_aj[counter] * reg_ai[counter]; - - // store - //AtA(icol, j) = sum; - AtA(j, icol) = sum; - } - - // Atb accum - float sum_atb = 0; - CMS_UNROLL_LOOP - for (int counter = 0; counter < NSAMPLES; counter++) - sum_atb += reg_ai[counter] * reg_b[counter]; - - // store atb - Atb(icol) = sum_atb; - } - -#ifdef HCAL_MAHI_GPUDEBUG - printf("AtA\n"); - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) - printf("%f ", AtA(i, j)); - printf("\n"); - } - printf("Atb\n"); - for (int i = 0; i < 8; i++) - printf("%f ", Atb(i)); - printf("\n"); - printf("result Amplitudes before nnls\n"); - for (int i = 0; i < 8; i++) - printf("%f ", resultAmplitudesVector(i)); - printf("\n"); -#endif - - // for fnnls - calo::multifit::MapSymM matrixLForFnnls{shrMatrixLFnnlsStorage}; - - // run fast nnls - calo::multifit::fnnls( - AtA, Atb, resultAmplitudesVector, npassive, pulseOffsets, matrixLForFnnls, nnlsThresh, nMaxItersNNLS, 10, 10); - -#ifdef HCAL_MAHI_GPUDEBUG - printf("result Amplitudes\n"); - for (int i = 0; i < 8; i++) - printf("resultAmplitudes(%d) = %f\n", i, resultAmplitudesVector(i)); -#endif - - calo::multifit::calculateChiSq(matrixL, glbPulseMatrixView, resultAmplitudesVector, inputAmplitudesView, chi2); - - auto const deltaChi2 = std::abs(chi2 - previous_chi2); - if (chi2 == chi2_2itersback && chi2 < previous_chi2) - break; - - // update - chi2_2itersback = previous_chi2; - previous_chi2 = chi2; - - // exit condition - if (deltaChi2 < deltaChi2Threashold) - break; - } - -#ifdef HCAL_MAHI_GPUDEBUG - for (int i = 0; i < NPULSES; i++) - printf("pulseOffsets(%d) = %d outputAmplitudes(%d) = %f\n", i, pulseOffsets(i), i, resultAmplitudesVector(i)); - printf("chi2 = %f\n", chi2); -#endif - - outputChi2[gch] = chi2; - auto const idx_for_energy = std::abs(pulseOffsetValues[0]); - outputEnergy[gch] = (gain * resultAmplitudesVector(idx_for_energy)) * respCorrection; - /* - CMS_UNROLL_LOOP - for (int i=0; i(inputGPU.f01HEDigis.stride); - auto const f5nsamples = compute_nsamples(inputGPU.f5HBDigis.stride); - auto const f3nsamples = compute_nsamples(inputGPU.f3HBDigis.stride); - int constexpr windowSize = 8; - int const startingSample = f01nsamples - windowSize; - assert(startingSample == 0 || startingSample == 2); - if (inputGPU.f01HEDigis.stride > 0 && inputGPU.f5HBDigis.stride > 0) - assert(f01nsamples == f5nsamples); - if (inputGPU.f01HEDigis.stride > 0 && inputGPU.f3HBDigis.stride > 0) - assert(f01nsamples == f3nsamples); - - dim3 threadsPerBlock{windowSize, configParameters.kprep1dChannelsPerBlock}; - int blocks = static_cast(threadsPerBlock.y) > totalChannels - ? 1 - : (totalChannels + threadsPerBlock.y - 1) / threadsPerBlock.y; - int nbytesShared = - ((2 * windowSize + 2) * sizeof(float) + sizeof(uint64_t)) * configParameters.kprep1dChannelsPerBlock; - hcal::mahi::kernel_prep1d_sameNumberOfSamples<<>>( - scratch.amplitudes.get(), - scratch.noiseTerms.get(), - scratch.electronicNoiseTerms.get(), - outputGPU.recHits.energy.get(), - outputGPU.recHits.chi2.get(), - inputGPU.f01HEDigis.data.get(), - inputGPU.f5HBDigis.data.get(), - inputGPU.f3HBDigis.data.get(), - inputGPU.f01HEDigis.ids.get(), - inputGPU.f5HBDigis.ids.get(), - inputGPU.f3HBDigis.ids.get(), - inputGPU.f01HEDigis.stride, - inputGPU.f5HBDigis.stride, - inputGPU.f3HBDigis.stride, - inputGPU.f01HEDigis.size, - inputGPU.f5HBDigis.size, - inputGPU.f5HBDigis.npresamples.get(), - scratch.soiSamples.get(), - outputGPU.recHits.energyM0.get(), - outputGPU.recHits.timeM0.get(), - outputGPU.recHits.did.get(), - totalChannels, - conditions.channelQuality.status, - conditions.recoParams.param1, - conditions.recoParams.param2, - conditions.qieCoders.offsets, - conditions.qieCoders.slopes, - conditions.qieTypes.values, - conditions.pedestalWidths.values, - conditions.effectivePedestalWidths.values, - conditions.pedestals.values, - conditions.convertedEffectivePedestals ? conditions.convertedEffectivePedestals->values - : conditions.pedestals.values, - configParameters.useEffectivePedestals, - conditions.sipmParameters.type, - conditions.sipmParameters.fcByPE, - conditions.sipmCharacteristics.parLin1, - conditions.sipmCharacteristics.parLin2, - conditions.sipmCharacteristics.parLin3, - conditions.gains.values, - conditions.respCorrs.values, - conditions.topology->maxDepthHB(), - conditions.topology->maxDepthHE(), - conditions.recConstants->getNPhi(1) > hcal::reconstruction::IPHI_MAX ? conditions.recConstants->getNPhi(1) - : hcal::reconstruction::IPHI_MAX, - conditions.topology->firstHBRing(), - conditions.topology->lastHBRing(), - conditions.topology->firstHERing(), - conditions.topology->lastHERing(), - conditions.recConstants->getEtaRange(0).second - conditions.recConstants->getEtaRange(0).first + 1, - conditions.topology->firstHERing() > conditions.topology->lastHERing() - ? 0 - : (conditions.topology->lastHERing() - conditions.topology->firstHERing() + 1), - configParameters.sipmQTSShift, - configParameters.sipmQNTStoSum, - configParameters.firstSampleShift, - conditions.offsetForHashes, - configParameters.ts4Thresh, - startingSample); - cudaCheck(cudaGetLastError()); - - // 1024 is the max threads per block for gtx1080 - // FIXME: take this from cuda service or something like that - uint32_t const channelsPerBlock = 1024 / (windowSize * conditions.pulseOffsetsHost.size()); - dim3 threadsPerBlock2{windowSize, static_cast(conditions.pulseOffsetsHost.size()), channelsPerBlock}; - int blocks2 = - threadsPerBlock2.z > totalChannels ? 1 : (totalChannels + threadsPerBlock2.z - 1) / threadsPerBlock2.z; - -#ifdef HCAL_MAHI_CPUDEBUG - std::cout << "threads: " << threadsPerBlock2.x << " " << threadsPerBlock2.y << " " << threadsPerBlock2.z - << std::endl; - std::cout << "blocks: " << blocks2 << std::endl; -#endif - - hcal::mahi::kernel_prep_pulseMatrices_sameNumberOfSamples<<>>( - scratch.pulseMatrices.get(), - scratch.pulseMatricesM.get(), - scratch.pulseMatricesP.get(), - conditions.pulseOffsets.values, - scratch.amplitudes.get(), - inputGPU.f01HEDigis.ids.get(), - inputGPU.f5HBDigis.ids.get(), - inputGPU.f3HBDigis.ids.get(), - inputGPU.f01HEDigis.size, - inputGPU.f5HBDigis.size, - totalChannels, - scratch.soiSamples.get(), - conditions.recoParams.ids, - conditions.recoParams.acc25nsVec, - conditions.recoParams.diff25nsItvlVec, - conditions.recoParams.accVarLenIdxMinusOneVec, - conditions.recoParams.diffVarItvlIdxMinusOneVec, - conditions.recoParams.accVarLenIdxZEROVec, - conditions.recoParams.diffVarItvlIdxZEROVec, - configParameters.meanTime, - configParameters.timeSigmaSiPM, - configParameters.timeSigmaHPD, - conditions.topology->maxDepthHB(), - conditions.topology->maxDepthHE(), - conditions.recConstants->getNPhi(1) > hcal::reconstruction::IPHI_MAX ? conditions.recConstants->getNPhi(1) - : hcal::reconstruction::IPHI_MAX, - conditions.topology->firstHBRing(), - conditions.topology->lastHBRing(), - conditions.topology->firstHERing(), - conditions.topology->lastHERing(), - conditions.recConstants->getEtaRange(0).second - conditions.recConstants->getEtaRange(0).first + 1, - conditions.topology->firstHERing() > conditions.topology->lastHERing() - ? 0 - : (conditions.topology->lastHERing() - conditions.topology->firstHERing() + 1), - conditions.offsetForHashes, - configParameters.applyTimeSlew, - configParameters.tzeroTimeSlew, - configParameters.slopeTimeSlew, - configParameters.tmaxTimeSlew); - cudaCheck(cudaGetLastError()); - - // number of samples is checked in above assert - if (conditions.pulseOffsetsHost.size() == 8u) { - // FIXME: provide constants from configuration - uint32_t threadsPerBlock = configParameters.kernelMinimizeThreads[0]; - uint32_t blocks = threadsPerBlock > totalChannels ? 1 : (totalChannels + threadsPerBlock - 1) / threadsPerBlock; - auto const nbytesShared = 2 * threadsPerBlock * calo::multifit::MapSymM::total * sizeof(float); - hcal::mahi::kernel_minimize<8, 8><<>>( - outputGPU.recHits.energy.get(), - outputGPU.recHits.chi2.get(), - scratch.amplitudes.get(), - scratch.pulseMatrices.get(), - scratch.pulseMatricesM.get(), - scratch.pulseMatricesP.get(), - conditions.pulseOffsets.values, - scratch.noiseTerms.get(), - scratch.electronicNoiseTerms.get(), - scratch.soiSamples.get(), - conditions.sipmParameters.auxi2, - conditions.pedestalWidths.values, - conditions.effectivePedestalWidths.values, - configParameters.useEffectivePedestals, - inputGPU.f01HEDigis.ids.get(), - inputGPU.f5HBDigis.ids.get(), - inputGPU.f3HBDigis.ids.get(), - conditions.gains.values, - conditions.respCorrs.values, - inputGPU.f01HEDigis.size, - inputGPU.f5HBDigis.size, - totalChannels, - conditions.offsetForHashes, - conditions.topology->maxDepthHB(), - conditions.topology->maxDepthHE(), - conditions.recConstants->getNPhi(1) > hcal::reconstruction::IPHI_MAX ? conditions.recConstants->getNPhi(1) - : hcal::reconstruction::IPHI_MAX, - conditions.topology->firstHBRing(), - conditions.topology->lastHBRing(), - conditions.topology->firstHERing(), - conditions.topology->lastHERing(), - conditions.recConstants->getEtaRange(0).second - conditions.recConstants->getEtaRange(0).first + 1, - conditions.topology->firstHERing() > conditions.topology->lastHERing() - ? 0 - : (conditions.topology->lastHERing() - conditions.topology->firstHERing() + 1)); - } else { - throw cms::Exception("Invalid MahiGPU configuration") - << "Currently support only 8 pulses and 8 time samples and provided: " << f01nsamples << " samples and " - << conditions.pulseOffsetsHost.size() << " pulses" << std::endl; - } - } - - } // namespace reconstruction -} // namespace hcal diff --git a/RecoLocalCalo/HcalRecProducers/src/SimpleAlgoGPU.h b/RecoLocalCalo/HcalRecProducers/src/SimpleAlgoGPU.h deleted file mode 100644 index c0bb499b517a7..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/src/SimpleAlgoGPU.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef RecoLocalCalo_HcalRecProducers_src_SimpleAlgoGPU_h -#define RecoLocalCalo_HcalRecProducers_src_SimpleAlgoGPU_h - -#include "DeclsForKernels.h" - -namespace hcal { - namespace reconstruction { - - void entryPoint(InputDataGPU const&, - OutputDataGPU&, - ConditionsProducts const&, - ScratchDataGPU&, - ConfigParameters const&, - cudaStream_t); - - } -} // namespace hcal - -#endif // RecoLocalCalo_HcalRecProducers_src_SimpleAlgoGPU_h diff --git a/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_plots.py b/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_plots.py deleted file mode 100644 index 2b97efc2f2d8c..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_plots.py +++ /dev/null @@ -1,28 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -process = cms.Process("PLOT") - -process.load("FWCore.MessageService.MessageLogger_cfi") -process.options = cms.untracked.PSet( - wantSummary = cms.untracked.bool(False) -) - -process.load('Configuration.StandardSequences.GeometryRecoDB_cff') -process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff") -from Configuration.AlCa.GlobalTag import GlobalTag -process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:run2_hlt_relval', '') - -process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(-1) ) -process.MessageLogger.cerr.FwkReport.reportEvery = 500 - -process.source = cms.Source("PoolSource", - fileNames = cms.untracked.vstring('file:GPUvsCPU_HCAL_rechits.root') -) - -process.comparisonPlots = cms.EDAnalyzer('HCALGPUAnalyzer') - -process.TFileService = cms.Service('TFileService', - fileName = cms.string('GPUvsCPU_HCAL_plots.root') -) - -process.path = cms.Path(process.comparisonPlots) diff --git a/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_rechits.py b/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_rechits.py deleted file mode 100644 index 84fb7a98132e2..0000000000000 --- a/RecoLocalCalo/HcalRecProducers/test/make_GPUvsCPU_HCAL_rechits.py +++ /dev/null @@ -1,149 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -from Configuration.StandardSequences.Eras import eras - -process = cms.Process('RECOgpu', eras.Run2_2018) - -# import of standard configurations -process.load('Configuration.StandardSequences.Services_cff') -process.load('FWCore.MessageService.MessageLogger_cfi') -process.load('HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi') - -process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') -from Configuration.AlCa.GlobalTag import GlobalTag -process.GlobalTag = GlobalTag(process.GlobalTag, 'auto:run2_hlt_relval', '') - -process.maxEvents = cms.untracked.PSet( - input = cms.untracked.int32(1000) -) - -#----------------------------------------- -# INPUT -#----------------------------------------- - -process.source = cms.Source("PoolSource", - fileNames = cms.untracked.vstring('/store/data/Run2018D/EphemeralHLTPhysics1/RAW/v1/000/323/775/00000/A27DFA33-8FCB-BE42-A2D2-1A396EEE2B6E.root') -) - -process.hltGetRaw = cms.EDAnalyzer( "HLTGetRaw", - RawDataCollection = cms.InputTag( "rawDataCollector" ) -) - -process.input = cms.Path( process.hltGetRaw ) - -#----------------------------------------- -# CMSSW/Hcal non-DQM Related Module import -#----------------------------------------- - -process.load('Configuration.StandardSequences.GeometryRecoDB_cff') -process.load("RecoLocalCalo.Configuration.hcalLocalReco_cff") -process.load("EventFilter.HcalRawToDigi.HcalRawToDigi_cfi") -process.load("RecoLuminosity.LumiProducer.bunchSpacingProducer_cfi") - -process.hcalDigis.InputLabel = cms.InputTag("rawDataCollector") - -#----------------------------------------- -# CMSSW/Hcal GPU related files -#----------------------------------------- - -process.load("RecoLocalCalo.HcalRecProducers.hbheRecHitProducerGPUTask_cff") -process.load("RecoLocalCalo.HcalRecProducers.hcalCPURecHitsProducer_cfi") -process.hcalCPURecHitsProducer.recHitsM0LabelIn = cms.InputTag("hbheRecHitProducerGPU","") -process.hcalCPURecHitsProducer.recHitsM0LabelOut = cms.string("") - -#----------------------------------------- -# Temporary customization (things not implemented on the GPU) -#----------------------------------------- - -## the one below is taken directly from the DB, regard M0 -#process.hbheprereco.algorithm.correctForPhaseContainment = cms.bool(False) - -## do always 8 pulse -process.hbheprereco.cpu.algorithm.chiSqSwitch = cms.double(-1) - -## to match hard coded setting (will be fixed on CPU) -process.hbheprereco.cpu.algorithm.nMaxItersMin = cms.int32(50) - -#----------------------------------------- -# Final Custmization for Run3 -#----------------------------------------- - -# we will not run arrival Time at HLT -process.hbheprereco.cpu.algorithm.calculateArrivalTime = cms.bool(False) - -## we do not need this -process.hbheprereco.cpu.algorithm.applyLegacyHBMCorrection = cms.bool(False) - -# we only run Mahi at HLT -process.hbheprereco.cpu.algorithm.useM3 = cms.bool(False) - -# we will not have the HPD noise flags in Run3, as will be all siPM -process.hbheprereco.cpu.setLegacyFlagsQIE8 = cms.bool(False) -process.hbheprereco.cpu.setNegativeFlagsQIE8 = cms.bool(False) -process.hbheprereco.cpu.setNoiseFlagsQIE8 = cms.bool(False) -process.hbheprereco.cpu.setPulseShapeFlagsQIE8 = cms.bool(False) - -# for testing M0 only -##process.hbheprereco.cpu.algorithm.useMahi = cms.bool(False) - -#----------------------------------------- -# OUTPUT -#----------------------------------------- - -#process.out = cms.OutputModule("AsciiOutputModule", -# outputCommands = cms.untracked.vstring( -# 'keep *_*_*_*', -# ), -# verbosity = cms.untracked.uint32(0) -#) - -process.out = cms.OutputModule("PoolOutputModule", - fileName = cms.untracked.string("GPUvsCPU_HCAL_rechits.root") -) - -#--------------- - -process.finalize = cms.EndPath(process.out) - -process.bunchSpacing = cms.Path( - process.bunchSpacingProducer -) - -#----------------------------------------- -# gpu test -#----------------------------------------- - -process.digiPathCPU = cms.Path( - process.hcalDigis -) - -process.recoPathCPU = cms.Path( - process.hbheprereco -) - -#--------------- - -## hcalCPUDigisProducer <-- this convert the GPU digi on cpu (for dqm) -process.recoPathGPU = cms.Path( - process.hbheRecHitProducerGPUSequence - * process.hcalCPURecHitsProducer -) - -#--------------- - -process.schedule = cms.Schedule( - process.input, - process.digiPathCPU, - process.recoPathCPU, - process.recoPathGPU, - process.finalize -) - -process.options = cms.untracked.PSet( - numberOfThreads = cms.untracked.uint32(8), - numberOfStreams = cms.untracked.uint32(8), - SkipEvent = cms.untracked.vstring('ProductNotFound'), - wantSummary = cms.untracked.bool(True) -) - -process.MessageLogger.cerr.FwkReport.reportEvery = 100 From 317f4c265a4b67a55d4bb714a1ea211f63e5b044 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sat, 14 Sep 2024 00:05:48 +0200 Subject: [PATCH 2/2] Remove legacy CUDA modules for HCAL unpacker The outcome of the discussion with the HCAL DPG is that - for Run 3 we should use the legacy cpu unpacker, and the alpaka version of the legacy-to-SoA converter; - for Phase 2 the decision can be revisited once the final raw and digi formats are defined. --- CUDADataFormats/HcalDigi/BuildFile.xml | 8 - .../HcalDigi/interface/DigiCollection.h | 160 ----- CUDADataFormats/HcalDigi/src/classes.h | 3 - CUDADataFormats/HcalDigi/src/classes_def.xml | 36 -- EventFilter/HcalRawToDigi/bin/BuildFile.xml | 7 - .../makeHcalRaw2DigiGpuValidationPlots.cpp | 386 ------------ .../HcalRawToDigi/plugins/BuildFile.xml | 10 - .../HcalRawToDigi/plugins/DeclsForKernels.h | 86 --- .../HcalRawToDigi/plugins/DecodeGPU.cu | 593 ------------------ EventFilter/HcalRawToDigi/plugins/DecodeGPU.h | 23 - .../plugins/ElectronicsMappingGPU.cc | 63 -- .../plugins/ElectronicsMappingGPU.h | 48 -- .../plugins/HcalCPUDigisProducer.cc | 117 ---- .../plugins/HcalDigisProducerGPU.cc | 235 ------- .../plugins/HcalESProducerGPUDefs.cc | 10 - .../HcalRawToDigi/plugins/HcalRawToDigiGPU.cc | 195 ------ 16 files changed, 1980 deletions(-) delete mode 100644 CUDADataFormats/HcalDigi/BuildFile.xml delete mode 100644 CUDADataFormats/HcalDigi/interface/DigiCollection.h delete mode 100644 CUDADataFormats/HcalDigi/src/classes.h delete mode 100644 CUDADataFormats/HcalDigi/src/classes_def.xml delete mode 100644 EventFilter/HcalRawToDigi/bin/BuildFile.xml delete mode 100644 EventFilter/HcalRawToDigi/bin/makeHcalRaw2DigiGpuValidationPlots.cpp delete mode 100644 EventFilter/HcalRawToDigi/plugins/DeclsForKernels.h delete mode 100644 EventFilter/HcalRawToDigi/plugins/DecodeGPU.cu delete mode 100644 EventFilter/HcalRawToDigi/plugins/DecodeGPU.h delete mode 100644 EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.cc delete mode 100644 EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.h delete mode 100644 EventFilter/HcalRawToDigi/plugins/HcalCPUDigisProducer.cc delete mode 100644 EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc delete mode 100644 EventFilter/HcalRawToDigi/plugins/HcalESProducerGPUDefs.cc delete mode 100644 EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc diff --git a/CUDADataFormats/HcalDigi/BuildFile.xml b/CUDADataFormats/HcalDigi/BuildFile.xml deleted file mode 100644 index 2e8ab2fe58fcf..0000000000000 --- a/CUDADataFormats/HcalDigi/BuildFile.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/CUDADataFormats/HcalDigi/interface/DigiCollection.h b/CUDADataFormats/HcalDigi/interface/DigiCollection.h deleted file mode 100644 index e2f4bf0848e94..0000000000000 --- a/CUDADataFormats/HcalDigi/interface/DigiCollection.h +++ /dev/null @@ -1,160 +0,0 @@ -#ifndef CUDADataFormats_HcalDigi_interface_DigiCollection_h -#define CUDADataFormats_HcalDigi_interface_DigiCollection_h - -#include "CUDADataFormats/CaloCommon/interface/Common.h" - -namespace hcal { - - // FLAVOR_HE_QIE11 = 1; Phase1 upgrade - struct Flavor1 { - static constexpr int WORDS_PER_SAMPLE = 1; - static constexpr int SAMPLES_PER_WORD = 1; - static constexpr int HEADER_WORDS = 1; - - static constexpr uint8_t adc(uint16_t const* const sample_start) { return (*sample_start & 0xff); } - static constexpr uint8_t tdc(uint16_t const* const sample_start) { return (*sample_start >> 8) & 0x3f; } - static constexpr uint8_t soibit(uint16_t const* const sample_start) { return (*sample_start >> 14) & 0x1; } - }; - - // FLAVOR_HB_QIE11 = 3; Phase1 upgrade - struct Flavor3 { - static constexpr int WORDS_PER_SAMPLE = 1; - static constexpr int SAMPLES_PER_WORD = 1; - static constexpr int HEADER_WORDS = 1; - - static constexpr uint8_t adc(uint16_t const* const sample_start) { return (*sample_start & 0xff); } - static constexpr uint8_t tdc(uint16_t const* const sample_start) { return ((*sample_start >> 8) & 0x3); } - static constexpr uint8_t soibit(uint16_t const* const sample_start) { return ((*sample_start >> 14) & 0x1); } - static constexpr uint8_t capid(uint16_t const* const sample_start) { return ((*sample_start >> 10) & 0x3); } - }; - - // FLAVOR_HB_QIE10 = 5; Phase0 - struct Flavor5 { - static constexpr float WORDS_PER_SAMPLE = 0.5; - static constexpr int SAMPLES_PER_WORD = 2; - static constexpr int HEADER_WORDS = 1; - - static constexpr uint8_t adc(uint16_t const* const sample_start, uint8_t const shifter) { - return ((*sample_start >> shifter * 8) & 0x7f); - } - }; - - template - constexpr uint8_t capid_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - auto const capid_first = (*dfstart >> 8) & 0x3; - return (capid_first + sample) & 0x3; // same as % 4 - } - - template <> - constexpr uint8_t capid_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - return Flavor3::capid(dfstart + Flavor3::HEADER_WORDS + sample * Flavor3::WORDS_PER_SAMPLE); - } - - template - constexpr uint8_t soibit_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - return Flavor::soibit(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); - } - - template - constexpr uint8_t adc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - return Flavor::adc(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); - } - - template - constexpr uint8_t tdc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - return Flavor::tdc(dfstart + Flavor::HEADER_WORDS + sample * Flavor::WORDS_PER_SAMPLE); - } - - template <> - constexpr uint8_t adc_for_sample(uint16_t const* const dfstart, uint32_t const sample) { - // avoid using WORDS_PER_SAMPLE and simply shift - return Flavor5::adc(dfstart + Flavor5::HEADER_WORDS + (sample >> 1), sample % 2); - } - - template - constexpr uint32_t compute_stride(uint32_t const nsamples) { - return static_cast(nsamples * Flavor::WORDS_PER_SAMPLE) + Flavor::HEADER_WORDS; - } - - template - constexpr uint32_t compute_nsamples(uint32_t const nwords) { - if constexpr (Flavor::SAMPLES_PER_WORD >= 1) - return (nwords - Flavor::HEADER_WORDS) * Flavor::SAMPLES_PER_WORD; - else - return (nwords - Flavor::HEADER_WORDS) / Flavor::WORDS_PER_SAMPLE; - } - - // - template - struct DigiCollectionBase : public ::calo::common::AddSize { - DigiCollectionBase() = default; - DigiCollectionBase(DigiCollectionBase const&) = default; - DigiCollectionBase& operator=(DigiCollectionBase const&) = default; - - DigiCollectionBase(DigiCollectionBase&&) = default; - DigiCollectionBase& operator=(DigiCollectionBase&&) = default; - - template - typename std::enable_if::value, void>::type resize(std::size_t size) { - ids.resize(size); - data.resize(size * stride); - } - - template - typename std::enable_if::value, void>::type reserve(std::size_t size) { - ids.reserve(size); - data.reserve(size * stride); - } - - template - typename std::enable_if::value, void>::type clear() { - ids.clear(); - data.clear(); - } - - typename StoragePolicy::template StorageSelector::type ids; - typename StoragePolicy::template StorageSelector::type data; - uint32_t stride{0}; - }; - - template - struct DigiCollection : public DigiCollectionBase { - using DigiCollectionBase::DigiCollectionBase; - }; - - // NOTE: base ctors will not be available - template - struct DigiCollection : public DigiCollectionBase { - DigiCollection() = default; - - DigiCollection(DigiCollection const&) = default; - DigiCollection& operator=(DigiCollection const&) = default; - - DigiCollection(DigiCollection&&) = default; - DigiCollection& operator=(DigiCollection&&) = default; - - template - typename std::enable_if::value, void>::type resize(std::size_t size) { - DigiCollectionBase::resize(size); - npresamples.resize(size); - } - - template - typename std::enable_if::value, void>::type reserve(std::size_t size) { - DigiCollectionBase::reserve(size); - npresamples.reserve(size); - } - - template - typename std::enable_if::value, void>::type clear() { - DigiCollectionBase::clear(); - npresamples.clear(); - } - - // add npresamples member - typename StoragePolicy::template StorageSelector::type npresamples; - }; - -} // namespace hcal - -#endif // CUDADataFormats_HcalDigi_interface_DigiCollection_h diff --git a/CUDADataFormats/HcalDigi/src/classes.h b/CUDADataFormats/HcalDigi/src/classes.h deleted file mode 100644 index 8c4a20318928e..0000000000000 --- a/CUDADataFormats/HcalDigi/src/classes.h +++ /dev/null @@ -1,3 +0,0 @@ -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/HcalDigi/src/classes_def.xml b/CUDADataFormats/HcalDigi/src/classes_def.xml deleted file mode 100644 index 7bb2d7f39c63a..0000000000000 --- a/CUDADataFormats/HcalDigi/src/classes_def.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/EventFilter/HcalRawToDigi/bin/BuildFile.xml b/EventFilter/HcalRawToDigi/bin/BuildFile.xml deleted file mode 100644 index c074e16ef4eab..0000000000000 --- a/EventFilter/HcalRawToDigi/bin/BuildFile.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/EventFilter/HcalRawToDigi/bin/makeHcalRaw2DigiGpuValidationPlots.cpp b/EventFilter/HcalRawToDigi/bin/makeHcalRaw2DigiGpuValidationPlots.cpp deleted file mode 100644 index 039c38dd9df16..0000000000000 --- a/EventFilter/HcalRawToDigi/bin/makeHcalRaw2DigiGpuValidationPlots.cpp +++ /dev/null @@ -1,386 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "DataFormats/Common/interface/Wrapper.h" -#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" - -#define CREATE_HIST_1D(varname, nbins, first, last) auto varname = new TH1D(#varname, #varname, nbins, first, last) - -#define CREATE_HIST_2D(varname, nbins, first, last) \ - auto varname = new TH2D(#varname, #varname, nbins, first, last, nbins, first, last) - -QIE11DigiCollection filterQIE11(QIE11DigiCollection const& coll) { - QIE11DigiCollection out; - out.reserve(coll.size()); - - for (uint32_t i = 0; i < coll.size(); i++) { - auto const df = coll[i]; - auto const id = HcalDetId{df.id()}; - if (id.subdetId() != HcalEndcap) - continue; - - out.push_back(QIE11DataFrame{df}); - } - - return out; -} - -int main(int argc, char* argv[]) { - if (argc < 3) { - std::cout << "run with: ./ \n"; - exit(0); - } - - auto filterf01HE = [](QIE11DigiCollection const& coll) { - QIE11DigiCollection out{coll.samples(), coll.subdetId()}; - out.reserve(coll.size()); - - for (uint32_t i = 0; i < coll.size(); i++) { - auto const df = QIE11DataFrame{coll[i]}; - auto const id = HcalDetId{df.id()}; - if ((df.flavor() == 0 or df.flavor() == 1) and id.subdetId() == HcalEndcap) - out.push_back(df); - } - - return out; - }; - - auto filterf3HB = [](QIE11DigiCollection const& coll) { - QIE11DigiCollection out{coll.samples(), coll.subdetId()}; - out.reserve(coll.size()); - - for (uint32_t i = 0; i < coll.size(); i++) { - auto const df = QIE11DataFrame{coll[i]}; - auto const did = HcalDetId{df.id()}; - if (df.flavor() == 3 and did.subdetId() == HcalBarrel) - out.push_back(df); - } - - return out; - }; - - // branches to use - using Collectionf01 = - hcal::DigiCollection>; - using Collectionf5 = - hcal::DigiCollection>; - using Collectionf3 = - hcal::DigiCollection>; - edm::Wrapper* wgpuf01he = nullptr; - edm::Wrapper* wgpuf5hb = nullptr; - edm::Wrapper* wgpuf3hb = nullptr; - edm::Wrapper* wcpuf01he = nullptr; - edm::Wrapper* wcpuf5hb = nullptr; - - std::string inFileName{argv[1]}; - std::string outFileName{argv[2]}; - - // prep output - TFile rfout{outFileName.c_str(), "recreate"}; - - CREATE_HIST_1D(hADCf01HEGPU, 256, 0, 256); - CREATE_HIST_1D(hADCf01HECPU, 256, 0, 256); - CREATE_HIST_1D(hADCf5HBGPU, 128, 0, 128); - CREATE_HIST_1D(hADCf5HBCPU, 128, 0, 128); - CREATE_HIST_1D(hADCf3HBGPU, 256, 0, 256); - CREATE_HIST_1D(hADCf3HBCPU, 256, 0, 256); - CREATE_HIST_1D(hTDCf01HEGPU, 64, 0, 64); - CREATE_HIST_1D(hTDCf01HECPU, 64, 0, 64); - - CREATE_HIST_2D(hADCf01HEGPUvsCPU, 256, 0, 256); - CREATE_HIST_2D(hADCf3HBGPUvsCPU, 256, 0, 256); - CREATE_HIST_2D(hADCf5HBGPUvsCPU, 128, 0, 128); - CREATE_HIST_2D(hTDCf01HEGPUvsCPU, 64, 0, 64); - CREATE_HIST_2D(hTDCf3HBGPUvsCPU, 4, 0, 4); - - // prep input - TFile rfin{inFileName.c_str()}; - TTree* rt = (TTree*)rfin.Get("Events"); - rt->SetBranchAddress("QIE11DataFrameHcalDataFrameContainer_hcalDigis__RECO.", &wcpuf01he); - rt->SetBranchAddress("HBHEDataFramesSorted_hcalDigis__RECO.", &wcpuf5hb); - rt->SetBranchAddress( - "hcalFlavor5calocommonCUDAHostAllocatorAliascalocommonVecStoragePolicyhcalDigiCollection_hcalCPUDigisProducer_" - "f5HBDigis_RECO.", - &wgpuf5hb); - rt->SetBranchAddress( - "hcalFlavor1calocommonCUDAHostAllocatorAliascalocommonVecStoragePolicyhcalDigiCollection_hcalCPUDigisProducer_" - "f01HEDigis_RECO.", - &wgpuf01he); - rt->SetBranchAddress( - "hcalFlavor3calocommonCUDAHostAllocatorAliascalocommonVecStoragePolicyhcalDigiCollection_hcalCPUDigisProducer_" - "f3HBDigis_RECO.", - &wgpuf3hb); - - // accumulate - auto const nentries = rt->GetEntries(); - std::cout << ">>> nentries = " << nentries << std::endl; - for (int ie = 0; ie < nentries; ++ie) { - rt->GetEntry(ie); - - auto const& f01HEProduct = wgpuf01he->bareProduct(); - auto const& f5HBProduct = wgpuf5hb->bareProduct(); - auto const& f3HBProduct = wgpuf3hb->bareProduct(); - auto const& qie11Product = wcpuf01he->bareProduct(); - auto const qie11Filteredf01 = filterf01HE(qie11Product); - auto const qie11Filteredf3 = filterf3HB(qie11Product); - auto const& qie8Product = wcpuf5hb->bareProduct(); - - auto const ngpuf01he = f01HEProduct.ids.size(); - auto const ngpuf5hb = f5HBProduct.ids.size(); - auto const ngpuf3hb = f3HBProduct.ids.size(); - auto const ncpuf01he = qie11Filteredf01.size(); - auto const ncpuf5hb = qie8Product.size(); - auto const ncpuf3hb = qie11Filteredf3.size(); - - /* - printf("ngpuf01he = %u nqie11 = %u ncpuf01he = %u ngpuf5hb = %u ncpuf5hb = %u\n", - f01HEProduct.size(), qie11Product.size(), qie11Filtered.size(), - f5HBProduct.size(), - static_cast(qie8Product.size())); - */ - - if (ngpuf01he != ncpuf01he) { - std::cerr << "*** mismatch in number of flavor 01 digis for event " << ie << std::endl - << ">>> ngpuf01he = " << ngpuf01he << std::endl - << ">>> ncpuf01he = " << ncpuf01he << std::endl; - } - - { - auto const& idsgpu = f01HEProduct.ids; - auto const& datagpu = f01HEProduct.data; - - for (uint32_t ich = 0; ich < ncpuf01he; ich++) { - auto const cpudf = QIE11DataFrame{qie11Filteredf01[ich]}; - auto const cpuid = cpudf.id(); - auto iter2idgpu = std::find(idsgpu.begin(), idsgpu.end(), cpuid); - - if (iter2idgpu == idsgpu.end()) { - std::cerr << "missing " << HcalDetId{cpuid} << std::endl; - continue; - } - - // FIXME: cna fail... - assert(*iter2idgpu == cpuid); - - auto const ptrdiff = iter2idgpu - idsgpu.begin(); - auto const nsamples_gpu = hcal::compute_nsamples(f01HEProduct.stride); - auto const nsamples_cpu = qie11Filteredf01.samples(); - assert(static_cast(nsamples_cpu) == nsamples_gpu); - - uint32_t ichgpu = ptrdiff; - uint32_t offset = ichgpu * f01HEProduct.stride; - uint16_t const* df_start = datagpu.data() + offset; - for (uint32_t sample = 0u; sample < nsamples_gpu; sample++) { - auto const cpuadc = cpudf[sample].adc(); - auto const gpuadc = hcal::adc_for_sample(df_start, sample); - auto const cputdc = cpudf[sample].tdc(); - auto const gputdc = hcal::tdc_for_sample(df_start, sample); - auto const cpucapid = cpudf[sample].capid(); - auto const gpucapid = hcal::capid_for_sample(df_start, sample); - - hADCf01HEGPU->Fill(gpuadc); - hADCf01HECPU->Fill(cpuadc); - hTDCf01HEGPU->Fill(gputdc); - hTDCf01HECPU->Fill(cputdc); - hADCf01HEGPUvsCPU->Fill(cpuadc, gpuadc); - hTDCf01HEGPUvsCPU->Fill(cputdc, gputdc); - - // At RAW Decoding level there must not be any mistmatches - // in the adc values at all! - assert(static_cast(cpuadc) == gpuadc); - assert(static_cast(cputdc) == gputdc); - assert(static_cast(cpucapid) == gpucapid); - } - } - } - - if (ngpuf3hb != ncpuf3hb) { - std::cerr << "*** mismatch in number of flavor 3 digis for event " << ie << std::endl - << ">>> ngpuf01he = " << ngpuf3hb << std::endl - << ">>> ncpuf01he = " << ncpuf3hb << std::endl; - } - - { - auto const& idsgpu = f3HBProduct.ids; - auto const& datagpu = f3HBProduct.data; - - for (uint32_t ich = 0; ich < ncpuf3hb; ich++) { - auto const cpudf = QIE11DataFrame{qie11Filteredf3[ich]}; - auto const cpuid = cpudf.id(); - auto iter2idgpu = std::find(idsgpu.begin(), idsgpu.end(), cpuid); - - if (iter2idgpu == idsgpu.end()) { - std::cerr << "missing " << HcalDetId{cpuid} << std::endl; - continue; - } - - // FIXME: cna fail... - assert(*iter2idgpu == cpuid); - - auto const ptrdiff = iter2idgpu - idsgpu.begin(); - auto const nsamples_gpu = hcal::compute_nsamples(f3HBProduct.stride); - auto const nsamples_cpu = qie11Filteredf3.samples(); - assert(static_cast(nsamples_cpu) == nsamples_gpu); - - uint32_t ichgpu = ptrdiff; - uint32_t offset = ichgpu * f3HBProduct.stride; - uint16_t const* df_start = datagpu.data() + offset; - for (uint32_t sample = 0u; sample < nsamples_gpu; sample++) { - auto const cpuadc = cpudf[sample].adc(); - auto const gpuadc = hcal::adc_for_sample(df_start, sample); - auto const cputdc = cpudf[sample].tdc(); - auto const gputdc = hcal::tdc_for_sample(df_start, sample); - - hADCf3HBGPU->Fill(gpuadc); - hADCf3HBCPU->Fill(cpuadc); - hADCf3HBGPUvsCPU->Fill(cpuadc, gpuadc); - hTDCf3HBGPUvsCPU->Fill(cputdc, gputdc); - - // At RAW Decoding level there must not be any mistmatches - // in the adc values at all! - assert(static_cast(cpuadc) == gpuadc); - assert(static_cast(cputdc) == gputdc); - } - } - } - - if (ngpuf5hb != ncpuf5hb) { - std::cerr << "*** mismatch in number of flavor 5 digis for event " << ie << std::endl - << ">>> ngpuf5hb = " << ngpuf5hb << std::endl - << ">>> ncpuf5hb = " << ncpuf5hb << std::endl; - } - - { - auto const& idsgpu = f5HBProduct.ids; - auto const& datagpu = f5HBProduct.data; - for (uint32_t i = 0; i < ncpuf5hb; i++) { - auto const cpudf = qie8Product[i]; - auto const cpuid = cpudf.id().rawId(); - auto iter2idgpu = std::find(idsgpu.begin(), idsgpu.end(), cpuid); - if (iter2idgpu == idsgpu.end()) { - std::cerr << "missing " << HcalDetId{cpuid} << std::endl; - continue; - } - - assert(*iter2idgpu == cpuid); - - auto const ptrdiff = iter2idgpu - idsgpu.begin(); - auto const nsamples_gpu = hcal::compute_nsamples(f5HBProduct.stride); - auto const nsamples_cpu = qie8Product[0].size(); - assert(static_cast(nsamples_cpu) == nsamples_gpu); - - uint32_t offset = ptrdiff * f5HBProduct.stride; - uint16_t const* df_start = datagpu.data() + offset; - for (uint32_t sample = 0u; sample < nsamples_gpu; sample++) { - auto const cpuadc = cpudf.sample(sample).adc(); - auto const gpuadc = hcal::adc_for_sample(df_start, sample); - auto const cpucapid = cpudf.sample(sample).capid(); - auto const gpucapid = hcal::capid_for_sample(df_start, sample); - - hADCf5HBGPU->Fill(gpuadc); - hADCf5HBCPU->Fill(cpuadc); - hADCf5HBGPUvsCPU->Fill(cpuadc, gpuadc); - - // the must for us at RAW Decoding stage - assert(static_cast(cpuadc) == gpuadc); - assert(static_cast(cpucapid) == gpucapid); - } - } - } - } - - { - TCanvas c{"plots", "plots", 4200, 6200}; - c.Divide(3, 3); - c.cd(1); - { - gPad->SetLogy(); - hADCf01HECPU->SetLineColor(kBlack); - hADCf01HECPU->SetLineWidth(1.); - hADCf01HECPU->Draw(""); - hADCf01HEGPU->SetLineColor(kBlue); - hADCf01HEGPU->SetLineWidth(1.); - hADCf01HEGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hADCf01HEGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(2); - { - gPad->SetLogy(); - hADCf5HBCPU->SetLineColor(kBlack); - hADCf5HBCPU->SetLineWidth(1.); - hADCf5HBCPU->Draw(""); - hADCf5HBGPU->SetLineColor(kBlue); - hADCf5HBGPU->SetLineWidth(1.); - hADCf5HBGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hADCf5HBGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(3); - { - gPad->SetLogy(); - hADCf3HBCPU->SetLineColor(kBlack); - hADCf3HBCPU->SetLineWidth(1.); - hADCf3HBCPU->Draw(""); - hADCf3HBGPU->SetLineColor(kBlue); - hADCf3HBGPU->SetLineWidth(1.); - hADCf3HBGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hADCf3HBGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(4); - hADCf01HEGPUvsCPU->Draw("colz"); - c.cd(5); - hADCf5HBGPUvsCPU->Draw("colz"); - c.cd(6); - hADCf3HBGPUvsCPU->Draw("colz"); - c.cd(7); - { - gPad->SetLogy(); - hTDCf01HECPU->SetLineColor(kBlack); - hTDCf01HECPU->SetLineWidth(1.); - hTDCf01HECPU->Draw(""); - hTDCf01HEGPU->SetLineColor(kBlue); - hTDCf01HEGPU->SetLineWidth(1.); - hTDCf01HEGPU->Draw("sames"); - gPad->Update(); - auto stats = (TPaveStats*)hTDCf01HEGPU->FindObject("stats"); - auto y2 = stats->GetY2NDC(); - auto y1 = stats->GetY1NDC(); - stats->SetY2NDC(y1); - stats->SetY1NDC(y1 - (y2 - y1)); - } - c.cd(8); - hTDCf01HEGPUvsCPU->Draw("colz"); - c.cd(9); - hTDCf3HBGPUvsCPU->Draw("colz"); - - c.SaveAs("plots.pdf"); - } - - rfin.Close(); - rfout.Write(); - rfout.Close(); -} diff --git a/EventFilter/HcalRawToDigi/plugins/BuildFile.xml b/EventFilter/HcalRawToDigi/plugins/BuildFile.xml index a3d283e9805b6..ad5c632ae409c 100644 --- a/EventFilter/HcalRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/HcalRawToDigi/plugins/BuildFile.xml @@ -16,16 +16,6 @@ - - - - - - - - - - diff --git a/EventFilter/HcalRawToDigi/plugins/DeclsForKernels.h b/EventFilter/HcalRawToDigi/plugins/DeclsForKernels.h deleted file mode 100644 index 9903b77efb341..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/DeclsForKernels.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef EventFilter_HcalRawToDigi_interface_DeclsForKernels_h -#define EventFilter_HcalRawToDigi_interface_DeclsForKernels_h - -#include - -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -#include "ElectronicsMappingGPU.h" - -namespace hcal { - namespace raw { - - constexpr int32_t empty_event_size = 32; - constexpr uint32_t utca_nfeds_max = 50; - constexpr uint32_t nbytes_per_fed_max = 10 * 1024; - - // each collection corresponds to a particular flavor with a certain number of - // samples per digi - constexpr uint32_t numOutputCollections = 3; - constexpr uint8_t OutputF01HE = 0; - constexpr uint8_t OutputF5HB = 1; - constexpr uint8_t OutputF3HB = 2; - - struct ConfigurationParameters { - uint32_t maxChannelsF01HE; - uint32_t maxChannelsF5HB; - uint32_t maxChannelsF3HB; - uint32_t nsamplesF01HE; - uint32_t nsamplesF5HB; - uint32_t nsamplesF3HB; - }; - - struct InputDataCPU { - cms::cuda::host::unique_ptr data; - cms::cuda::host::unique_ptr offsets; - cms::cuda::host::unique_ptr feds; - }; - - struct OutputDataCPU { - cms::cuda::host::unique_ptr nchannels; - }; - - struct ScratchDataGPU { - // depends on the number of output collections - // that is a statically known predefined number - cms::cuda::device::unique_ptr pChannelsCounters; - }; - - struct OutputDataGPU { - DigiCollection digisF01HE; - DigiCollection digisF5HB; - DigiCollection digisF3HB; - - void allocate(ConfigurationParameters const &config, cudaStream_t cudaStream) { - digisF01HE.data = cms::cuda::make_device_unique( - config.maxChannelsF01HE * compute_stride(config.nsamplesF01HE), cudaStream); - digisF01HE.ids = cms::cuda::make_device_unique(config.maxChannelsF01HE, cudaStream); - - digisF5HB.data = cms::cuda::make_device_unique( - config.maxChannelsF5HB * compute_stride(config.nsamplesF5HB), cudaStream); - digisF5HB.ids = cms::cuda::make_device_unique(config.maxChannelsF5HB, cudaStream); - digisF5HB.npresamples = cms::cuda::make_device_unique(config.maxChannelsF5HB, cudaStream); - - digisF3HB.data = cms::cuda::make_device_unique( - config.maxChannelsF3HB * compute_stride(config.nsamplesF3HB), cudaStream); - digisF3HB.ids = cms::cuda::make_device_unique(config.maxChannelsF3HB, cudaStream); - } - }; - - struct InputDataGPU { - cms::cuda::device::unique_ptr data; - cms::cuda::device::unique_ptr offsets; - cms::cuda::device::unique_ptr feds; - }; - - struct ConditionsProducts { - ElectronicsMappingGPU::Product const &eMappingProduct; - }; - - } // namespace raw -} // namespace hcal - -#endif // EventFilter_HcalRawToDigi_interface_DeclsForKernels_h diff --git a/EventFilter/HcalRawToDigi/plugins/DecodeGPU.cu b/EventFilter/HcalRawToDigi/plugins/DecodeGPU.cu deleted file mode 100644 index 4f2ca85861b30..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/DecodeGPU.cu +++ /dev/null @@ -1,593 +0,0 @@ -#include "DataFormats/HcalDetId/interface/HcalElectronicsId.h" -#include "DataFormats/HcalDetId/interface/HcalSubdetector.h" -#include "DataFormats/HcalDetId/interface/HcalDetId.h" - -#include "EventFilter/HcalRawToDigi/plugins/DecodeGPU.h" - -#include -using namespace cooperative_groups; - -namespace hcal { - namespace raw { - - __forceinline__ __device__ char const* get_subdet_str(DetId const& did) { - switch (did.subdetId()) { - case HcalEmpty: - return "HcalEmpty"; - break; - case HcalBarrel: - return "HcalBarrel"; - break; - case HcalEndcap: - return "HcalEndcap"; - break; - case HcalOuter: - return "HcalOuter"; - break; - case HcalForward: - return "HcalForward"; - break; - case HcalTriggerTower: - return "HcalTriggerTower"; - break; - case HcalOther: - return "HcalOther"; - break; - default: - return "Unknown"; - break; - } - - return "Unknown"; - } - - __forceinline__ __device__ bool is_channel_header_word(uint16_t const* ptr) { - uint8_t bit = (*ptr >> 15) & 0x1; - return bit == 1; - } - - template - constexpr bool is_power_of_two(T x) { - return (x != 0) && ((x & (x - 1)) == 0); - } - - template - __global__ void kernel_rawdecode_test(unsigned char const* data, - uint32_t const* offsets, - int const* feds, - uint32_t const* eid2did, - uint32_t const* eid2tid, - uint16_t* digisF01HE, - uint32_t* idsF01HE, - uint16_t* digisF5HB, - uint32_t* idsF5HB, - uint8_t* npresamplesF5HB, - uint16_t* digisF3HB, - uint32_t* idsF3HB, - uint32_t* pChannelsCounters, - uint32_t const nsamplesF01HE, - uint32_t const nsamplesF5HB, - uint32_t const nsamplesF3HB, - uint32_t const nBytesTotal) { - // in order to properly use cooperative groups - static_assert(is_power_of_two(NTHREADS) == true && NTHREADS <= 32); - - thread_block_tile thread_group = tiled_partition(this_thread_block()); - - auto const iamc = threadIdx.x / NTHREADS; - auto const ifed = blockIdx.x; - auto const offset = offsets[ifed]; - -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - if (ifed > 0 || iamc > 0) - return; - printf("threadIdx.x = %d rank = %d iamc = %d\n", threadIdx.x, thread_group.thread_rank(), iamc); -#endif - -#ifdef HCAL_RAWDECODE_GPUDEBUG - auto const fed = feds[ifed]; - auto const size = ifed == gridDim.x - 1 ? nBytesTotal - offset : offsets[ifed + 1] - offset; - printf("ifed = %d fed = %d offset = %u size = %u\n", ifed, fed, offset, size); -#endif - - // offset to the right raw buffer - uint64_t const* buffer = reinterpret_cast(data + offset); - -#ifdef HCAL_RAWDECODE_GPUDEBUG - // - // fed header - // - auto const fed_header = buffer[0]; - uint32_t const fed_id = (fed_header >> 8) & 0xfff; - uint32_t const bx = (fed_header >> 20) & 0xfff; - uint32_t const lv1 = (fed_header >> 32) & 0xffffff; - uint8_t const trigger_type = (fed_header >> 56) & 0xf; - uint8_t const bid_fed_header = (fed_header >> 60) & 0xf; - - printf("fed = %d fed_id = %u bx = %u lv1 = %u trigger_type = %u bid = %u\n", - fed, - fed_id, - bx, - lv1, - trigger_type, - bid_fed_header); -#endif - - // amc 13 header - auto const amc13word = buffer[1]; - uint8_t const namc = (amc13word >> 52) & 0xf; - if (iamc >= namc) - return; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - uint8_t const amc13version = (amc13word >> 60) & 0xf; - uint32_t const amc13OrbitNumber = (amc13word >> 4) & 0xffffffffu; - printf("fed = %d namc = %u amc13version = %u amc13OrbitNumber = %u\n", fed, namc, amc13version, amc13OrbitNumber); -#endif - - // compute the offset int to the right buffer - uint32_t amcoffset = 0; - for (uint8_t ii = 0u; ii < iamc; ii++) { - auto const word = buffer[2 + ii]; - int const amcSize = (word >> 32) & 0xffffff; - amcoffset += amcSize; - } - - auto const word = buffer[2 + iamc]; - int const amcSize = (word >> 32) & 0xffffff; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - uint16_t const amcid = word & 0xffff; - int const slot = (word >> 16) & 0xf; - int const amcBlockNumber = (word >> 20) & 0xff; - printf("fed = %d amcid = %u slot = %d amcBlockNumber = %d\n", fed, amcid, slot, amcBlockNumber); - - bool const amcmore = ((word >> 61) & 0x1) != 0; - bool const amcSegmented = ((word >> 60) & 0x1) != 0; - bool const amcLengthOk = ((word >> 62) & 0x1) != 0; - bool const amcCROk = ((word >> 56) & 0x1) != 0; - bool const amcDataPresent = ((word >> 58) & 0x1) != 0; - bool const amcDataValid = ((word >> 56) & 0x1) != 0; - bool const amcEnabled = ((word >> 59) & 0x1) != 0; - printf( - "fed = %d amcmore = %d amcSegmented = %d, amcLengthOk = %d amcCROk = %d\n>> amcDataPresent = %d amcDataValid " - "= %d amcEnabled = %d\n", - fed, - static_cast(amcmore), - static_cast(amcSegmented), - static_cast(amcLengthOk), - static_cast(amcCROk), - static_cast(amcDataPresent), - static_cast(amcDataValid), - static_cast(amcEnabled)); -#endif - - // get to the payload - auto const* payload64 = buffer + 2 + namc + amcoffset; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - // uhtr header v1 1st 64 bits - auto const payload64_w0 = payload64[0]; -#endif - // uhtr n bytes comes from amcSize, according to the cpu version! - uint32_t const data_length64 = amcSize; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - uint16_t bcn = (payload64_w0 >> 20) & 0xfff; - uint32_t evn = (payload64_w0 >> 32) & 0xffffff; - printf("fed = %d data_length64 = %u bcn = %u evn = %u\n", fed, data_length64, bcn, evn); -#endif - - // uhtr header v1 2nd 64 bits - auto const payload64_w1 = payload64[1]; - uint8_t const uhtrcrate = payload64_w1 & 0xff; - uint8_t const uhtrslot = (payload64_w1 >> 8) & 0xf; - uint8_t const presamples = (payload64_w1 >> 12) & 0xf; - uint8_t const payloadFormat = (payload64_w1 >> 44) & 0xf; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - uint16_t const orbitN = (payload64_w1 >> 16) & 0xffff; - uint8_t const firmFlavor = (payload64_w1 >> 32) & 0xff; - uint8_t const eventType = (payload64_w1 >> 40) & 0xf; - printf( - "fed = %d crate = %u slot = %u presamples = %u\n>>> orbitN = %u firmFlavor = %u eventType = %u payloadFormat " - "= %u\n", - fed, - uhtrcrate, - uhtrslot, - presamples, - orbitN, - firmFlavor, - eventType, - payloadFormat); -#endif - - // this should be filtering out uMNio... - if (payloadFormat != 1) - return; - - // skip uhtr header words - auto const channelDataSize = data_length64 - 2; // 2 uhtr header v1 words - auto const* channelDataBuffer64Start = payload64 + 2; // 2 uhtr header v2 wds - auto const* ptr = reinterpret_cast(channelDataBuffer64Start); - auto const* end = ptr + sizeof(uint64_t) / sizeof(uint16_t) * (channelDataSize - 1); - auto const t_rank = thread_group.thread_rank(); - - // iterate through the channel data - while (ptr != end) { - // this is the starting point for this thread group for this iteration - // with respect to this pointer every thread will move forward afterwards - auto const* const start_ptr = ptr; - -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - thread_group.sync(); -#endif - - // skip to the header word of the right channel for this thread - int counter = 0; - while (counter < thread_group.thread_rank()) { - // just a check for threads that land beyond the end - if (ptr == end) - break; - - // move ptr one forward past header - if (is_channel_header_word(ptr)) - ++ptr; - else { - // go to the next channel and do not consider this guy as a channel - while (ptr != end) - if (!is_channel_header_word(ptr)) - ++ptr; - else - break; - continue; - } - - // skip - while (ptr != end) - if (!is_channel_header_word(ptr)) - ++ptr; - else - break; - counter++; - } - -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - thread_group.sync(); - printf("ptr - start_ptr = %d counter = %d rank = %d\n", static_cast(ptr - start_ptr), counter, t_rank); -#endif - - // when the end is near, channels will land outside of the [start_ptr, end) region - if (ptr != end) { - // for all of the flavors, these 2 guys have the same bit layout - uint8_t const flavor = (ptr[0] >> 12) & 0x7; - uint8_t const channelid = ptr[0] & 0xff; - auto const* const new_channel_start = ptr; - - // flavor dependent stuff - switch (flavor) { - case 0: - case 1: { - // treat eid and did - uint8_t fiber = (channelid >> 3) & 0x1f; - uint8_t fchannel = channelid & 0x7; - HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false}; - auto const did = HcalDetId{eid2did[eid.linearIndex()]}; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n", - eid.rawId(), - eid.linearIndex(), - did.rawId(), - get_subdet_str(did)); - printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - fiber, - fchannel); -#endif - - // remove digis not for HE - if (did.subdetId() != HcalEndcap) - break; - - // count words - auto const* channel_header_word = ptr++; - while (!is_channel_header_word(ptr) && ptr != end) - ++ptr; - auto const* channel_end = ptr; // set ptr - uint32_t const nwords = channel_end - channel_header_word; - - // filter out this digi if nwords does not equal expected - auto const expected_words = compute_stride(nsamplesF01HE); - if (nwords != expected_words) - break; - - // inc the number of digis of this type - auto const pos = atomicAdd(&pChannelsCounters[OutputF01HE], 1); -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - printf("rank = %d pos = %d\n", thread_group.thread_rank(), pos); -#endif - - // store to global mem words for this digi - idsF01HE[pos] = did.rawId(); - - for (uint32_t iword = 0; iword < expected_words; iword++) - digisF01HE[pos * expected_words + iword] = channel_header_word[iword]; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("nwords = %u\n", nwords); -#endif - - break; - } - case 3: { - // treat eid and did - uint8_t fiber = (channelid >> 3) & 0x1f; - uint8_t fchannel = channelid & 0x7; - HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false}; - auto const did = HcalDetId{eid2did[eid.linearIndex()]}; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n", - eid.rawId(), - eid.linearIndex(), - did.rawId(), - get_subdet_str(did)); - printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - fiber, - fchannel); -#endif - - // remove digis not for HE - if (did.subdetId() != HcalBarrel) - break; - - // count words - auto const* channel_header_word = ptr++; - while (!is_channel_header_word(ptr) && ptr != end) - ++ptr; - auto const* channel_end = ptr; // set ptr - uint32_t const nwords = channel_end - channel_header_word; - - // filter out this digi if nwords does not equal expected - auto const expected_words = compute_stride(nsamplesF3HB); - if (nwords != expected_words) - break; - - // inc the number of digis of this type - auto const pos = atomicAdd(&pChannelsCounters[OutputF3HB], 1); - - // store to global mem words for this digi - idsF3HB[pos] = did.rawId(); - for (uint32_t iword = 0; iword < expected_words; iword++) - digisF3HB[pos * expected_words + iword] = channel_header_word[iword]; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("nwords = %u\n", nwords); -#endif - - break; - } - case 2: { - uint8_t fiber = (channelid >> 3) & 0x1f; - uint8_t fchannel = channelid & 0x7; - HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false}; - auto const did = DetId{eid2did[eid.linearIndex()]}; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n", - eid.rawId(), - eid.linearIndex(), - did.rawId(), - get_subdet_str(did)); - printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - fiber, - fchannel); -#endif - - break; - } - case 4: { - uint8_t link = (channelid >> 4) & 0xf; - uint8_t tower = channelid & 0xf; - HcalElectronicsId eid{uhtrcrate, uhtrslot, link, tower, true}; - auto const did = DetId{eid2tid[eid.linearIndex()]}; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n", - eid.rawId(), - eid.linearIndex(), - did.rawId(), - get_subdet_str(did)); - printf("flavor = %u crate = %u slot = %u channelid = %u link = %u tower = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - link, - tower); -#endif - - break; - } - case 5: { - uint8_t fiber = (channelid >> 2) & 0x3f; - uint8_t fchannel = channelid & 0x3; - HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false}; - auto const did = HcalDetId{eid2did[eid.linearIndex()]}; - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u subdet = %s\n", - eid.rawId(), - eid.linearIndex(), - did.rawId(), - get_subdet_str(did)); - printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - fiber, - fchannel); -#endif - - // remove digis not for HB - if (did.subdetId() != HcalBarrel) - break; - - // count words - auto const* channel_header_word = ptr++; - while (!is_channel_header_word(ptr) && ptr != end) - ++ptr; - auto const* channel_end = ptr; // set ptr - uint32_t const nwords = channel_end - channel_header_word; - - // filter out this digi if nwords does not equal expected - auto const expected_words = compute_stride(nsamplesF5HB); - if (nwords != expected_words) - break; - - // inc the number of digis of this type - auto const pos = atomicAdd(&pChannelsCounters[OutputF5HB], 1); - -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - printf("rank = %d pos = %d\n", thread_group.thread_rank(), pos); -#endif - - // store to global mem words for this digi - idsF5HB[pos] = did.rawId(); - npresamplesF5HB[pos] = presamples; - for (uint32_t iword = 0; iword < expected_words; iword++) - digisF5HB[pos * expected_words + iword] = channel_header_word[iword]; - - break; - } - case 7: { - uint8_t const fiber = (channelid >> 2) & 0x3f; - uint8_t const fchannel = channelid & 0x3; - HcalElectronicsId eid{uhtrcrate, uhtrslot, fiber, fchannel, false}; - auto const did = DetId{eid2did[eid.linearIndex()]}; - - /* uncomment to check the linear index validity - if (eid.rawId() >= HcalElectronicsId::maxLinearIndex) { -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("*** rawid = %u has no known det id***\n", eid.rawId()); -#endif - break; - } - */ - -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("erawId = %u linearIndex = %u drawid = %u\n", eid.rawId(), eid.linearIndex(), did.rawId()); - printf("flavor = %u crate = %u slot = %u channelid = %u fiber = %u fchannel = %u\n", - flavor, - uhtrcrate, - uhtrslot, - channelid, - fiber, - fchannel); -#endif - - break; - } - default: -#ifdef HCAL_RAWDECODE_GPUDEBUG - printf("flavor = %u crate = %u slot = %u channelid = %u\n", flavor, uhtrcrate, uhtrslot, channelid); -#endif - ; - } - - // skip to the next word in case - // 1) current channel was not treated - // 2) we are in the middle of the digi words and not at the end - while (new_channel_start == ptr || !is_channel_header_word(ptr) && ptr != end) - ++ptr; - } - - // thread with rank 31 of the group will have the ptr pointing to the - // header word of the next channel or the end - int const offset_to_shuffle = ptr - start_ptr; - - // always receive from the last guy in the group - auto const offset_for_rank31 = thread_group.shfl(offset_to_shuffle, NTHREADS - 1); - -#ifdef HCAL_RAWDECODE_GPUDEBUG_CG - printf("rank = %d offset_to_shuffle = %d offset_for_rank32 = %d\n", - thread_group.thread_rank(), - offset_to_shuffle, - offset_for_rank31); -#endif - - // update the ptr for all threads of this group - // NOTE: relative to the start_ptr that is the same for all threads of - // this group - ptr = start_ptr + offset_for_rank31; - } - } - - void entryPoint(InputDataCPU const& inputCPU, - InputDataGPU& inputGPU, - OutputDataGPU& outputGPU, - ScratchDataGPU& scratchGPU, - OutputDataCPU& outputCPU, - ConditionsProducts const& conditions, - ConfigurationParameters const& config, - cudaStream_t cudaStream, - uint32_t const nfedsWithData, - uint32_t const nbytesTotal) { - // transfer - cudaCheck(cudaMemcpyAsync(inputGPU.data.get(), - inputCPU.data.get(), - nbytesTotal * sizeof(unsigned char), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(inputGPU.offsets.get(), - inputCPU.offsets.get(), - nfedsWithData * sizeof(uint32_t), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck( - cudaMemsetAsync(scratchGPU.pChannelsCounters.get(), 0, sizeof(uint32_t) * numOutputCollections, cudaStream)); - cudaCheck(cudaMemcpyAsync( - inputGPU.feds.get(), inputCPU.feds.get(), nfedsWithData * sizeof(int), cudaMemcpyHostToDevice, cudaStream)); - - // 12 is the max number of modules per crate - kernel_rawdecode_test<32><<>>(inputGPU.data.get(), - inputGPU.offsets.get(), - inputGPU.feds.get(), - conditions.eMappingProduct.eid2did, - conditions.eMappingProduct.eid2tid, - outputGPU.digisF01HE.data.get(), - outputGPU.digisF01HE.ids.get(), - outputGPU.digisF5HB.data.get(), - outputGPU.digisF5HB.ids.get(), - outputGPU.digisF5HB.npresamples.get(), - outputGPU.digisF3HB.data.get(), - outputGPU.digisF3HB.ids.get(), - scratchGPU.pChannelsCounters.get(), - config.nsamplesF01HE, - config.nsamplesF5HB, - config.nsamplesF3HB, - nbytesTotal); - cudaCheck(cudaGetLastError()); - - cudaCheck(cudaMemcpyAsync(outputCPU.nchannels.get(), - scratchGPU.pChannelsCounters.get(), - sizeof(uint32_t) * numOutputCollections, - cudaMemcpyDeviceToHost, - cudaStream)); - } - - } // namespace raw -} // namespace hcal diff --git a/EventFilter/HcalRawToDigi/plugins/DecodeGPU.h b/EventFilter/HcalRawToDigi/plugins/DecodeGPU.h deleted file mode 100644 index 3d5e4eec32269..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/DecodeGPU.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef EventFilter_HcalRawToDigi_interface_DecodeGPU_h -#define EventFilter_HcalRawToDigi_interface_DecodeGPU_h - -#include "DeclsForKernels.h" - -namespace hcal { - namespace raw { - - void entryPoint(InputDataCPU const&, - InputDataGPU&, - OutputDataGPU&, - ScratchDataGPU&, - OutputDataCPU&, - ConditionsProducts const&, - ConfigurationParameters const&, - cudaStream_t cudaStream, - uint32_t const, - uint32_t const); - - } -} // namespace hcal - -#endif // EventFilter_HcalRawToDigi_interface_DecodeGPU_h diff --git a/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.cc b/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.cc deleted file mode 100644 index 6b7b89cc6ea77..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include "DataFormats/HcalDetId/interface/HcalElectronicsId.h" -#include "FWCore/Utilities/interface/typelookup.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "ElectronicsMappingGPU.h" - -namespace hcal { - namespace raw { - - // TODO: 0x3FFFFF * 4B ~= 16MB - // tmp solution for linear mapping of eid -> did - ElectronicsMappingGPU::ElectronicsMappingGPU(HcalElectronicsMap const& mapping) - : eid2tid_(HcalElectronicsId::maxLinearIndex, 0u), eid2did_(HcalElectronicsId::maxLinearIndex, 0u) { - auto const& eidsPrecision = mapping.allElectronicsIdPrecision(); - for (uint32_t i = 0; i < eidsPrecision.size(); ++i) { - auto const& eid = eidsPrecision[i]; - - // assign - eid2did_[eid.linearIndex()] = eid.isTriggerChainId() ? 0u : mapping.lookup(eid).rawId(); - } - - auto const& eidsTrigger = mapping.allElectronicsIdTrigger(); - for (uint32_t i = 0; i < eidsTrigger.size(); i++) { - auto const& eid = eidsTrigger[i]; - - // assign - eid2tid_[eid.linearIndex()] = eid.isTriggerChainId() ? mapping.lookupTrigger(eid).rawId() : 0u; - } - } - - ElectronicsMappingGPU::Product::~Product() { - // deallocation - cudaCheck(cudaFree(eid2did)); - cudaCheck(cudaFree(eid2tid)); - } - - ElectronicsMappingGPU::Product const& ElectronicsMappingGPU::getProduct(cudaStream_t cudaStream) const { - auto const& product = product_.dataForCurrentDeviceAsync( - cudaStream, [this](ElectronicsMappingGPU::Product& product, cudaStream_t cudaStream) { - // malloc - cudaCheck(cudaMalloc((void**)&product.eid2did, this->eid2did_.size() * sizeof(uint32_t))); - cudaCheck(cudaMalloc((void**)&product.eid2tid, this->eid2tid_.size() * sizeof(uint32_t))); - - // transfer - cudaCheck(cudaMemcpyAsync(product.eid2did, - this->eid2did_.data(), - this->eid2did_.size() * sizeof(uint32_t), - cudaMemcpyHostToDevice, - cudaStream)); - cudaCheck(cudaMemcpyAsync(product.eid2tid, - this->eid2tid_.data(), - this->eid2tid_.size() * sizeof(uint32_t), - cudaMemcpyHostToDevice, - cudaStream)); - }); - - return product; - } - - } // namespace raw -} // namespace hcal - -TYPELOOKUP_DATA_REG(hcal::raw::ElectronicsMappingGPU); diff --git a/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.h b/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.h deleted file mode 100644 index 0f4c12f02a92d..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/ElectronicsMappingGPU.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef EventFilter_HcalRawToDigi_plugins_ElectronicsMappingGPU_h -#define EventFilter_HcalRawToDigi_plugins_ElectronicsMappingGPU_h - -#include "CondFormats/HcalObjects/interface/HcalElectronicsMap.h" - -#ifndef __CUDACC__ -#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#endif - -namespace hcal { - namespace raw { - - class ElectronicsMappingGPU { - public: - struct Product { - ~Product(); - // trigger - uint32_t *eid2tid; - // detector - uint32_t *eid2did; - }; - -#ifndef __CUDACC__ - - // rearrange pedestals - ElectronicsMappingGPU(HcalElectronicsMap const &); - - // will call dealloation for Product thru ~Product - ~ElectronicsMappingGPU() = default; - - // get device pointers - Product const &getProduct(cudaStream_t) const; - - private: - // in the future, we need to arrange so to avoid this copy on the host - // if possible - std::vector> eid2tid_; - std::vector> eid2did_; - - cms::cuda::ESProduct product_; -#endif - }; - - } // namespace raw -} // namespace hcal - -#endif // EventFilter_HcalRawToDigi_plugins_ElectronicsMappingGPU_h diff --git a/EventFilter/HcalRawToDigi/plugins/HcalCPUDigisProducer.cc b/EventFilter/HcalRawToDigi/plugins/HcalCPUDigisProducer.cc deleted file mode 100644 index c2b67a10afaff..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/HcalCPUDigisProducer.cc +++ /dev/null @@ -1,117 +0,0 @@ -#include - -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" -#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -class HcalCPUDigisProducer : public edm::stream::EDProducer { -public: - explicit HcalCPUDigisProducer(edm::ParameterSet const& ps); - ~HcalCPUDigisProducer() override; - static void fillDescriptions(edm::ConfigurationDescriptions&); - -private: - void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; - void produce(edm::Event&, edm::EventSetup const&) override; - -private: - using IProductTypef01 = cms::cuda::Product>; - edm::EDGetTokenT digisF01HETokenIn_; - using IProductTypef5 = cms::cuda::Product>; - edm::EDGetTokenT digisF5HBTokenIn_; - using IProductTypef3 = cms::cuda::Product>; - edm::EDGetTokenT digisF3HBTokenIn_; - - using OProductTypef01 = - hcal::DigiCollection>; - edm::EDPutTokenT digisF01HETokenOut_; - using OProductTypef5 = - hcal::DigiCollection>; - edm::EDPutTokenT digisF5HBTokenOut_; - using OProductTypef3 = - hcal::DigiCollection>; - edm::EDPutTokenT digisF3HBTokenOut_; - - // needed to pass data from acquire to produce - OProductTypef01 digisf01HE_; - OProductTypef5 digisf5HB_; - OProductTypef3 digisf3HB_; -}; - -void HcalCPUDigisProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { - edm::ParameterSetDescription desc; - - desc.add("digisLabelF01HEIn", edm::InputTag{"hcalRawToDigiGPU", "f01HEDigisGPU"}); - desc.add("digisLabelF5HBIn", edm::InputTag{"hcalRawToDigiGPU", "f5HBDigisGPU"}); - desc.add("digisLabelF3HBIn", edm::InputTag{"hcalRawToDigiGPU", "f3HBDigisGPU"}); - desc.add("digisLabelF01HEOut", "f01HEDigis"); - desc.add("digisLabelF5HBOut", "f5HBDigis"); - desc.add("digisLabelF3HBOut", "f3HBDigis"); - - confDesc.addWithDefaultLabel(desc); -} - -HcalCPUDigisProducer::HcalCPUDigisProducer(const edm::ParameterSet& ps) - : digisF01HETokenIn_{consumes(ps.getParameter("digisLabelF01HEIn"))}, - digisF5HBTokenIn_{consumes(ps.getParameter("digisLabelF5HBIn"))}, - digisF3HBTokenIn_{consumes(ps.getParameter("digisLabelF3HBIn"))}, - digisF01HETokenOut_{produces(ps.getParameter("digisLabelF01HEOut"))}, - digisF5HBTokenOut_{produces(ps.getParameter("digisLabelF5HBOut"))}, - digisF3HBTokenOut_{produces(ps.getParameter("digisLabelF3HBOut"))} {} - -HcalCPUDigisProducer::~HcalCPUDigisProducer() {} - -void HcalCPUDigisProducer::acquire(edm::Event const& event, - edm::EventSetup const& setup, - edm::WaitingTaskWithArenaHolder taskHolder) { - // retrieve data/ctx - auto const& f01HEProduct = event.get(digisF01HETokenIn_); - auto const& f5HBProduct = event.get(digisF5HBTokenIn_); - auto const& f3HBProduct = event.get(digisF3HBTokenIn_); - cms::cuda::ScopedContextAcquire ctx{f01HEProduct, std::move(taskHolder)}; - auto const& f01HEDigis = ctx.get(f01HEProduct); - auto const& f5HBDigis = ctx.get(f5HBProduct); - auto const& f3HBDigis = ctx.get(f3HBProduct); - - // resize out tmp buffers - digisf01HE_.stride = f01HEDigis.stride; - digisf5HB_.stride = f5HBDigis.stride; - digisf3HB_.stride = f3HBDigis.stride; - digisf01HE_.resize(f01HEDigis.size); - digisf5HB_.resize(f5HBDigis.size); - digisf3HB_.resize(f3HBDigis.size); - - auto lambdaToTransfer = [&ctx](auto& dest, auto* src) { - using vector_type = typename std::remove_reference::type; - using type = typename vector_type::value_type; - using src_data_type = typename std::remove_pointer::type; - static_assert(std::is_same::value && "Dest and Src data types do not match"); - cudaCheck(cudaMemcpyAsync(dest.data(), src, dest.size() * sizeof(type), cudaMemcpyDeviceToHost, ctx.stream())); - }; - - lambdaToTransfer(digisf01HE_.data, f01HEDigis.data.get()); - lambdaToTransfer(digisf01HE_.ids, f01HEDigis.ids.get()); - - lambdaToTransfer(digisf5HB_.data, f5HBDigis.data.get()); - lambdaToTransfer(digisf5HB_.ids, f5HBDigis.ids.get()); - lambdaToTransfer(digisf5HB_.npresamples, f5HBDigis.npresamples.get()); - - lambdaToTransfer(digisf3HB_.data, f3HBDigis.data.get()); - lambdaToTransfer(digisf3HB_.ids, f3HBDigis.ids.get()); -} - -void HcalCPUDigisProducer::produce(edm::Event& event, edm::EventSetup const& setup) { - event.emplace(digisF01HETokenOut_, std::move(digisf01HE_)); - event.emplace(digisF5HBTokenOut_, std::move(digisf5HB_)); - event.emplace(digisF3HBTokenOut_, std::move(digisf3HB_)); -} - -DEFINE_FWK_MODULE(HcalCPUDigisProducer); diff --git a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc b/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc deleted file mode 100644 index 80ac575ff2230..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc +++ /dev/null @@ -1,235 +0,0 @@ -#include - -#include "CUDADataFormats/HcalDigi/interface/DigiCollection.h" -#include "DataFormats/HcalDigi/interface/HcalDigiCollections.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -class HcalDigisProducerGPU : public edm::stream::EDProducer { -public: - explicit HcalDigisProducerGPU(edm::ParameterSet const& ps); - ~HcalDigisProducerGPU() override = default; - static void fillDescriptions(edm::ConfigurationDescriptions&); - -private: - void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; - void produce(edm::Event&, edm::EventSetup const&) override; - -private: - // input product tokens - edm::EDGetTokenT hbheDigiToken_; - edm::EDGetTokenT qie11DigiToken_; - - // type aliases - using HostCollectionf01 = - hcal::DigiCollection>; - using DeviceCollectionf01 = hcal::DigiCollection; - using HostCollectionf5 = - hcal::DigiCollection>; - using DeviceCollectionf5 = hcal::DigiCollection; - using HostCollectionf3 = - hcal::DigiCollection>; - using DeviceCollectionf3 = hcal::DigiCollection; - - // output product tokens - using ProductTypef01 = cms::cuda::Product; - edm::EDPutTokenT digisF01HEToken_; - using ProductTypef5 = cms::cuda::Product; - edm::EDPutTokenT digisF5HBToken_; - using ProductTypef3 = cms::cuda::Product; - edm::EDPutTokenT digisF3HBToken_; - - cms::cuda::ContextState cudaState_; - - struct ConfigParameters { - uint32_t maxChannelsF01HE, maxChannelsF5HB, maxChannelsF3HB; - }; - ConfigParameters config_; - - // per event host buffers - HostCollectionf01 hf01_; - HostCollectionf5 hf5_; - HostCollectionf3 hf3_; - - // device products: product owns memory (i.e. not the module) - DeviceCollectionf01 df01_; - DeviceCollectionf5 df5_; - DeviceCollectionf3 df3_; -}; - -void HcalDigisProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { - edm::ParameterSetDescription desc; - - // FIXME - desc.add("hbheDigisLabel", edm::InputTag("hcalDigis")); - desc.add("qie11DigiLabel", edm::InputTag("hcalDigis")); - desc.add("digisLabelF01HE", std::string{"f01HEDigisGPU"}); - desc.add("digisLabelF5HB", std::string{"f5HBDigisGPU"}); - desc.add("digisLabelF3HB", std::string{"f3HBDigisGPU"}); - desc.add("maxChannelsF01HE", 10000u); - desc.add("maxChannelsF5HB", 10000u); - desc.add("maxChannelsF3HB", 10000u); - - confDesc.addWithDefaultLabel(desc); -} - -HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps) - : hbheDigiToken_{consumes(ps.getParameter("hbheDigisLabel"))}, - qie11DigiToken_{consumes(ps.getParameter("qie11DigiLabel"))}, - digisF01HEToken_{produces(ps.getParameter("digisLabelF01HE"))}, - digisF5HBToken_{produces(ps.getParameter("digisLabelF5HB"))}, - digisF3HBToken_{produces(ps.getParameter("digisLabelF3HB"))} { - config_.maxChannelsF01HE = ps.getParameter("maxChannelsF01HE"); - config_.maxChannelsF5HB = ps.getParameter("maxChannelsF5HB"); - config_.maxChannelsF3HB = ps.getParameter("maxChannelsF3HB"); - - // this is a preallocation for the max statically known number of time samples - // actual stride/nsamples will be inferred from data - hf01_.stride = hcal::compute_stride(QIE11DigiCollection::MAXSAMPLES); - hf5_.stride = hcal::compute_stride(HBHEDataFrame::MAXSAMPLES); - hf3_.stride = hcal::compute_stride(QIE11DigiCollection::MAXSAMPLES); - - // preallocate pinned host memory only if CUDA is available - edm::Service cuda; - if (cuda and cuda->enabled()) { - hf01_.reserve(config_.maxChannelsF01HE); - hf5_.reserve(config_.maxChannelsF5HB); - hf3_.reserve(config_.maxChannelsF3HB); - } -} - -void HcalDigisProducerGPU::acquire(edm::Event const& event, - edm::EventSetup const& setup, - edm::WaitingTaskWithArenaHolder holder) { - // raii - cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_}; - - // clear host buffers - hf01_.clear(); - hf5_.clear(); - hf3_.clear(); - - // event data - edm::Handle hbheDigis; - edm::Handle qie11Digis; - event.getByToken(hbheDigiToken_, hbheDigis); - event.getByToken(qie11DigiToken_, qie11Digis); - - // init f5 collection - if (not hbheDigis->empty()) { - auto const nsamples = (*hbheDigis)[0].size(); - auto const stride = hcal::compute_stride(nsamples); - hf5_.stride = stride; - - // flavor5 get device blobs - df5_.stride = stride; - df5_.data = cms::cuda::make_device_unique(config_.maxChannelsF5HB * stride, ctx.stream()); - df5_.ids = cms::cuda::make_device_unique(config_.maxChannelsF5HB, ctx.stream()); - df5_.npresamples = cms::cuda::make_device_unique(config_.maxChannelsF5HB, ctx.stream()); - } - - if (not qie11Digis->empty()) { - auto const nsamples = qie11Digis->samples(); - auto const stride01 = hcal::compute_stride(nsamples); - auto const stride3 = hcal::compute_stride(nsamples); - - hf01_.stride = stride01; - hf3_.stride = stride3; - - // flavor 0/1 get devie blobs - df01_.stride = stride01; - df01_.data = cms::cuda::make_device_unique(config_.maxChannelsF01HE * stride01, ctx.stream()); - df01_.ids = cms::cuda::make_device_unique(config_.maxChannelsF01HE, ctx.stream()); - - // flavor3 get device blobs - df3_.stride = stride3; - df3_.data = cms::cuda::make_device_unique(config_.maxChannelsF3HB * stride3, ctx.stream()); - df3_.ids = cms::cuda::make_device_unique(config_.maxChannelsF3HB, ctx.stream()); - } - - for (auto const& hbhe : *hbheDigis) { - auto const id = hbhe.id().rawId(); - auto const presamples = hbhe.presamples(); - hf5_.ids.push_back(id); - hf5_.npresamples.push_back(presamples); - auto const stride = hcal::compute_stride(hbhe.size()); - assert(stride == hf5_.stride && "strides must be the same for every single digi of the collection"); - // simple for now... - static_assert(hcal::Flavor5::HEADER_WORDS == 1); - uint16_t header_word = (1 << 15) | (0x5 << 12) | (0 << 10) | ((hbhe.sample(0).capid() & 0x3) << 8); - hf5_.data.push_back(header_word); - for (unsigned int i = 0; i < stride - hcal::Flavor5::HEADER_WORDS; i++) { - uint16_t s0 = (0 << 7) | (static_cast(hbhe.sample(2 * i).adc()) & 0x7f); - uint16_t s1 = (0 << 7) | (static_cast(hbhe.sample(2 * i + 1).adc()) & 0x7f); - uint16_t sample = (s1 << 8) | s0; - hf5_.data.push_back(sample); - } - } - - for (unsigned int i = 0; i < qie11Digis->size(); i++) { - auto const& digi = QIE11DataFrame{(*qie11Digis)[i]}; - assert(digi.samples() == qie11Digis->samples() && "collection nsamples must equal per digi samples"); - if (digi.flavor() == 0 or digi.flavor() == 1) { - if (digi.detid().subdetId() != HcalEndcap) - continue; - auto const id = digi.detid().rawId(); - hf01_.ids.push_back(id); - for (int hw = 0; hw < hcal::Flavor1::HEADER_WORDS; hw++) - hf01_.data.push_back((*qie11Digis)[i][hw]); - for (int sample = 0; sample < digi.samples(); sample++) { - hf01_.data.push_back((*qie11Digis)[i][hcal::Flavor1::HEADER_WORDS + sample]); - } - } else if (digi.flavor() == 3) { - if (digi.detid().subdetId() != HcalBarrel) - continue; - auto const id = digi.detid().rawId(); - hf3_.ids.push_back(id); - for (int hw = 0; hw < hcal::Flavor3::HEADER_WORDS; hw++) - hf3_.data.push_back((*qie11Digis)[i][hw]); - for (int sample = 0; sample < digi.samples(); sample++) { - hf3_.data.push_back((*qie11Digis)[i][hcal::Flavor3::HEADER_WORDS + sample]); - } - } - } - - auto lambdaToTransfer = [&ctx](auto* dest, auto const& src) { - if (src.empty()) - return; - using vector_type = typename std::remove_reference::type; - using type = typename vector_type::value_type; - using dest_data_type = typename std::remove_pointer::type; - static_assert(std::is_same::value && "Dest and Src data typesdo not match"); - cudaCheck(cudaMemcpyAsync(dest, src.data(), src.size() * sizeof(type), cudaMemcpyHostToDevice, ctx.stream())); - }; - - lambdaToTransfer(df01_.data.get(), hf01_.data); - lambdaToTransfer(df01_.ids.get(), hf01_.ids); - - lambdaToTransfer(df5_.data.get(), hf5_.data); - lambdaToTransfer(df5_.ids.get(), hf5_.ids); - lambdaToTransfer(df5_.npresamples.get(), hf5_.npresamples); - - lambdaToTransfer(df3_.data.get(), hf3_.data); - lambdaToTransfer(df3_.ids.get(), hf3_.ids); - - df01_.size = hf01_.ids.size(); - df5_.size = hf5_.ids.size(); - df3_.size = hf3_.ids.size(); -} - -void HcalDigisProducerGPU::produce(edm::Event& event, edm::EventSetup const& setup) { - cms::cuda::ScopedContextProduce ctx{cudaState_}; - - ctx.emplace(event, digisF01HEToken_, std::move(df01_)); - ctx.emplace(event, digisF5HBToken_, std::move(df5_)); - ctx.emplace(event, digisF3HBToken_, std::move(df3_)); -} - -DEFINE_FWK_MODULE(HcalDigisProducerGPU); diff --git a/EventFilter/HcalRawToDigi/plugins/HcalESProducerGPUDefs.cc b/EventFilter/HcalRawToDigi/plugins/HcalESProducerGPUDefs.cc deleted file mode 100644 index 749a98e990755..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/HcalESProducerGPUDefs.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "CondFormats/DataRecord/interface/HcalElectronicsMapRcd.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h" - -#include "ElectronicsMappingGPU.h" - -using HcalElectronicsMappingGPUESProducer = - ConvertingESProducerT; - -DEFINE_FWK_EVENTSETUP_MODULE(HcalElectronicsMappingGPUESProducer); diff --git a/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc b/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc deleted file mode 100644 index 5fd50199d248b..0000000000000 --- a/EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc +++ /dev/null @@ -1,195 +0,0 @@ -#include "CUDADataFormats/Common/interface/Product.h" -#include "CondFormats/DataRecord/interface/HcalElectronicsMapRcd.h" -#include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -#include "DeclsForKernels.h" -#include "DecodeGPU.h" -#include "ElectronicsMappingGPU.h" - -class HcalRawToDigiGPU : public edm::stream::EDProducer { -public: - explicit HcalRawToDigiGPU(edm::ParameterSet const& ps); - ~HcalRawToDigiGPU() override; - static void fillDescriptions(edm::ConfigurationDescriptions&); - -private: - void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; - void produce(edm::Event&, edm::EventSetup const&) override; - -private: - edm::ESGetToken eMappingToken_; - edm::EDGetTokenT rawDataToken_; - using ProductTypef01 = cms::cuda::Product>; - edm::EDPutTokenT digisF01HEToken_; - using ProductTypef5 = cms::cuda::Product>; - edm::EDPutTokenT digisF5HBToken_; - using ProductTypef3 = cms::cuda::Product>; - edm::EDPutTokenT digisF3HBToken_; - - cms::cuda::ContextState cudaState_; - - std::vector fedsToUnpack_; - - hcal::raw::ConfigurationParameters config_; - hcal::raw::OutputDataGPU outputGPU_; - hcal::raw::OutputDataCPU outputCPU_; -}; - -void HcalRawToDigiGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { - edm::ParameterSetDescription desc; - - desc.add("InputLabel", edm::InputTag("rawDataCollector")); - auto nFeds = FEDNumbering::MAXHCALuTCAFEDID - FEDNumbering::MINHCALuTCAFEDID + 1; - std::vector feds(nFeds); - for (int i = 0; i < nFeds; ++i) - feds[i] = i + FEDNumbering::MINHCALuTCAFEDID; - desc.add>("FEDs", feds); - desc.add("maxChannelsF01HE", 10000u); - desc.add("maxChannelsF5HB", 10000u); - desc.add("maxChannelsF3HB", 10000u); - desc.add("nsamplesF01HE", 8); - desc.add("nsamplesF5HB", 8); - desc.add("nsamplesF3HB", 8); - desc.add("digisLabelF5HB", "f5HBDigisGPU"); - desc.add("digisLabelF01HE", "f01HEDigisGPU"); - desc.add("digisLabelF3HB", "f3HBDigisGPU"); - - std::string label = "hcalRawToDigiGPU"; - confDesc.add(label, desc); -} - -HcalRawToDigiGPU::HcalRawToDigiGPU(const edm::ParameterSet& ps) - : eMappingToken_{esConsumes()}, - rawDataToken_{consumes(ps.getParameter("InputLabel"))}, - digisF01HEToken_{produces(ps.getParameter("digisLabelF01HE"))}, - digisF5HBToken_{produces(ps.getParameter("digisLabelF5HB"))}, - digisF3HBToken_{produces(ps.getParameter("digisLabelF3HB"))}, - fedsToUnpack_{ps.getParameter>("FEDs")} { - config_.maxChannelsF01HE = ps.getParameter("maxChannelsF01HE"); - config_.maxChannelsF5HB = ps.getParameter("maxChannelsF5HB"); - config_.maxChannelsF3HB = ps.getParameter("maxChannelsF3HB"); - config_.nsamplesF01HE = ps.getParameter("nsamplesF01HE"); - config_.nsamplesF5HB = ps.getParameter("nsamplesF5HB"); - config_.nsamplesF3HB = ps.getParameter("nsamplesF3HB"); -} - -HcalRawToDigiGPU::~HcalRawToDigiGPU() {} - -void HcalRawToDigiGPU::acquire(edm::Event const& event, - edm::EventSetup const& setup, - edm::WaitingTaskWithArenaHolder holder) { - // raii - cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_}; - - // conditions - auto const& eMappingProduct = setup.getData(eMappingToken_).getProduct(ctx.stream()); - - // bundle up conditions - hcal::raw::ConditionsProducts conditions{eMappingProduct}; - - // event data - edm::Handle rawDataHandle; - event.getByToken(rawDataToken_, rawDataHandle); - - // scratch - hcal::raw::ScratchDataGPU scratchGPU = { - cms::cuda::make_device_unique(hcal::raw::numOutputCollections, ctx.stream())}; - - // input cpu data - hcal::raw::InputDataCPU inputCPU = {cms::cuda::make_host_unique( - hcal::raw::utca_nfeds_max * hcal::raw::nbytes_per_fed_max, ctx.stream()), - cms::cuda::make_host_unique(hcal::raw::utca_nfeds_max, ctx.stream()), - cms::cuda::make_host_unique(hcal::raw::utca_nfeds_max, ctx.stream())}; - - // input data gpu - hcal::raw::InputDataGPU inputGPU = { - cms::cuda::make_device_unique(hcal::raw::utca_nfeds_max * hcal::raw::nbytes_per_fed_max, - ctx.stream()), - cms::cuda::make_device_unique(hcal::raw::utca_nfeds_max, ctx.stream()), - cms::cuda::make_device_unique(hcal::raw::utca_nfeds_max, ctx.stream())}; - - // output cpu - outputCPU_ = {cms::cuda::make_host_unique(hcal::raw::numOutputCollections, ctx.stream())}; - - // output gpu - outputGPU_.allocate(config_, ctx.stream()); - - // iterate over feds - // TODO: another idea - // - loop over all feds to unpack and enqueue cuda memcpy - // - accumulate the sizes - // - after the loop launch cuda memcpy for sizes - // - enqueue the kernel - uint32_t currentCummOffset = 0; - uint32_t counter = 0; - for (auto const& fed : fedsToUnpack_) { - auto const& data = rawDataHandle->FEDData(fed); - auto const nbytes = data.size(); - - // skip empty feds - if (nbytes < hcal::raw::empty_event_size) - continue; - -#ifdef HCAL_RAWDECODE_CPUDEBUG - printf("fed = %d nbytes = %lu\n", fed, nbytes); -#endif - - // copy raw data into plain buffer - std::memcpy(inputCPU.data.get() + currentCummOffset, data.data(), nbytes); - // set the offset in bytes from the start - inputCPU.offsets[counter] = currentCummOffset; - inputCPU.feds[counter] = fed; - - // this is the current offset into the vector - currentCummOffset += nbytes; - ++counter; - } - - hcal::raw::entryPoint(inputCPU, - inputGPU, - outputGPU_, - scratchGPU, - outputCPU_, - conditions, - config_, - ctx.stream(), - counter, - currentCummOffset); -} - -void HcalRawToDigiGPU::produce(edm::Event& event, edm::EventSetup const& setup) { - cms::cuda::ScopedContextProduce ctx{cudaState_}; - -#ifdef HCAL_RAWDECODE_CPUDEBUG - printf("f01he channels = %u f5hb channesl = %u\n", - outputCPU_.nchannels[hcal::raw::OutputF01HE], - outputCPU_.nchannels[hcal::raw::OutputF5HB]); -#endif - - // FIXME: use sizes of views directly for cuda mem cpy? - auto const nchannelsF01HE = outputCPU_.nchannels[hcal::raw::OutputF01HE]; - auto const nchannelsF5HB = outputCPU_.nchannels[hcal::raw::OutputF5HB]; - auto const nchannelsF3HB = outputCPU_.nchannels[hcal::raw::OutputF3HB]; - outputGPU_.digisF01HE.size = nchannelsF01HE; - outputGPU_.digisF5HB.size = nchannelsF5HB; - outputGPU_.digisF3HB.size = nchannelsF3HB; - outputGPU_.digisF01HE.stride = hcal::compute_stride(config_.nsamplesF01HE); - outputGPU_.digisF5HB.stride = hcal::compute_stride(config_.nsamplesF5HB); - outputGPU_.digisF3HB.stride = hcal::compute_stride(config_.nsamplesF3HB); - - ctx.emplace(event, digisF01HEToken_, std::move(outputGPU_.digisF01HE)); - ctx.emplace(event, digisF5HBToken_, std::move(outputGPU_.digisF5HB)); - ctx.emplace(event, digisF3HBToken_, std::move(outputGPU_.digisF3HB)); - - // reset ptrs that are carried as members - outputCPU_.nchannels.reset(); -} - -DEFINE_FWK_MODULE(HcalRawToDigiGPU);