Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Not Merge] Compare to static graph quant, just for debugging. #25010

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 55 additions & 6 deletions paddle/fluid/operators/fake_quantize_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,48 @@ class FakeQuantOrWithDequantMovingAverageAbsMaxOp
}
};

template <typename T>
class FakeQuantOrWithDequantMovingAverageAbsMaxGradMaker
: public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

protected:
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("fake_quantize_dequantize_moving_average_abs_max_grad");
grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
grad_op->SetAttrMap(this->Attrs());
}
};

class FakeQuantOrWithDequantMovingAverageAbsMaxGradOp
: public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;

void InferShape(framework::InferShapeContext* ctx) const override {
auto out_grad_name = framework::GradVarName("Out");
OP_INOUT_CHECK(ctx->HasInput(out_grad_name), "Input", out_grad_name,
"FakeQuantOrWithDequantMovingAverageAbsMaxGradOp");

auto x_grad_name = framework::GradVarName("X");
PADDLE_ENFORCE_EQ(ctx->HasOutput(x_grad_name), true,
platform::errors::PreconditionNotMet(
"FakeQuantOrWithDequantMovingAverageAbsMaxGradOp "
"doesn't have the output named %s.",
x_grad_name));
ctx->SetOutputDim(x_grad_name, ctx->GetInputDim(out_grad_name));
}

framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto input_data_type = OperatorWithKernel::IndicateVarDataType(
ctx, framework::GradVarName("Out"));
return framework::OpKernelType(input_data_type, ctx.GetPlace());
}
};

class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker
: public framework::OpProtoAndCheckerMaker {
public:
Expand Down Expand Up @@ -522,15 +564,22 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL(fake_quantize_moving_average_abs_max,
ops::FakeQuantizeMovingAverageAbsMaxKernel<CPU, float>);

REGISTER_OPERATOR(
fake_quantize_dequantize_moving_average_abs_max,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(fake_quantize_dequantize_moving_average_abs_max,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxGradMaker<
paddle::framework::OpDesc>,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxGradMaker<
paddle::imperative::OpBase>);
REGISTER_OPERATOR(fake_quantize_dequantize_moving_average_abs_max_grad,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxGradOp);

REGISTER_OP_CPU_KERNEL(
fake_quantize_dequantize_moving_average_abs_max,
ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel<CPU, float>);
REGISTER_OP_CPU_KERNEL(
fake_quantize_dequantize_moving_average_abs_max_grad,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxGradKernel<CPU, float>);

REGISTER_OPERATOR(
fake_channel_wise_quantize_abs_max, ops::FakeChannelWiseQuantizeAbsMaxOp,
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/operators/fake_quantize_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,6 @@ REGISTER_OP_CUDA_KERNEL(moving_average_abs_max_scale,
REGISTER_OP_CUDA_KERNEL(
fake_quantize_dequantize_moving_average_abs_max,
ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel<CUDA, float>);
REGISTER_OP_CUDA_KERNEL(
fake_quantize_dequantize_moving_average_abs_max_grad,
ops::FakeQuantOrWithDequantMovingAverageAbsMaxGradKernel<CUDA, float>);
20 changes: 20 additions & 0 deletions paddle/fluid/operators/fake_quantize_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/transform.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -277,5 +278,24 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel<T> {
}
};

template <typename DeviceContext, typename T>
class FakeQuantOrWithDequantMovingAverageAbsMaxGradKernel
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* d_out =
context.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto x_grad_name = framework::GradVarName("X");
auto* d_x = context.Output<framework::LoDTensor>(x_grad_name);
PADDLE_ENFORCE_NOT_NULL(d_x, platform::errors::PreconditionNotMet(
"FakeQuantOrWithDequantMovingAverageAbsMax"
"GradOp doesn't have the output named %s.",
x_grad_name));

d_x->mutable_data<T>(context.GetPlace());
framework::TensorCopy(*d_out, context.GetPlace(), d_x);
}
};

} // namespace operators
} // namespace paddle
1 change: 1 addition & 0 deletions paddle/fluid/platform/dynload/cusolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once

#include <cuda.h>
#include <cusolverDn.h>

#include <mutex> // NOLINT
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/fluid/contrib/slim/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@
from .qat2_int8_mkldnn_pass import *
from . import post_training_quantization
from .post_training_quantization import *
from . import dyquant
from .dyquant import *

__all__ = quantization_pass.__all__ + quantization_strategy.__all__
__all__ += mkldnn_post_training_strategy.__all__
__all__ += qat_int8_mkldnn_pass.__all__
__all__ += qat2_int8_mkldnn_pass.__all__
__all__ += post_training_quantization.__all__
__all__ += dyquant.__all__
25 changes: 25 additions & 0 deletions python/paddle/fluid/contrib/slim/quantization/dyquant/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

from . import quant_nn
from .quant_nn import *

from . import dygraph_quantization
from .dygraph_quantization import *

__all__ = []
__all__ += quant_nn.__all__
__all__ += dygraph_quantization.__all__
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys
import numpy as np
from paddle.fluid import dygraph
from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid.log_helper import get_logger
from .quant_nn import FakeQuant
from .quant_nn import QuantizedConv2D
from .quant_nn import QuantizedLinear

__all__ = ['DygraphQuantAware']

_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


class DygraphQuantAware(object):
def __init__(self,
weight_bits=8,
activation_bits=8,
moving_rate=0.9,
quantizable_layer_type=['Conv2D', 'Linear'],
program_translator=None):
super(DygraphQuantAware, self).__init__()
self._weight_bits = weight_bits
self._activation_bits = activation_bits
self._moving_rate = moving_rate
self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear}
self._translator = (program_translator if program_translator else
dygraph.ProgramTranslator())
self._quantizable_layer_type = tuple(
self._quant_layers_map[layer]
if layer in self._quant_layers_map else layer
for layer in quantizable_layer_type)
for layer in self._quantizable_layer_type:
assert not isinstance(
layer, str), "{} is unspported to be quantized.".format(layer)

def prepare(self):
self._translator.enable_declarative = False

def quantize(self, model):
for name, layer in model.named_sublayers():
if not isinstance(layer, self._quantizable_layer_type):
continue

scopes = name.split('.')
target = scopes[-1]
obj = model
parent = model
for i in range(len(scopes) - 1):
obj = getattr(parent, scopes[i])
parent = obj

quant_layer = self._get_quantized_counterpart(layer)
setattr(obj, target, quant_layer)

def save_infer_quant_model(self,
dirname,
model,
input_shape,
input_dtype='float32',
feed=None,
fetch=None,
append_batch_size=True):
with dygraph.guard():
self._translator.enable_declarative = True
model.eval()
raw_data = np.random.random(input_shape)
input_data = raw_data[np.newaxis, :].astype(
input_dtype) if append_batch_size else raw_data.astype(
input_dtype)
input_var = dygraph.to_variable(input_data)
out = model(input_var)

self._translator.save_inference_model(dirname, feed, fetch)

def _get_quantized_counterpart(self, layer):
quant_layers = tuple(self._quant_layers_map.values())
quantized_counterpart = tuple('Quantized' + k
for k in self._quant_layers_map.keys())

predicate = lambda value: isinstance(layer, value)
index_generator = (i for i, v in enumerate(quant_layers)
if predicate(v))

try:
index = next(index_generator)
except StopIteration:
_logger.fatal("The layer {} is unsupported to be quantized.".format(
layer.full_name()))
sys.exit(-1)

module = sys.modules[__name__]
quantized_layer = getattr(module, quantized_counterpart[index])(
layer, self._weight_bits, self._activation_bits, self._moving_rate)
return quantized_layer
Loading