Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GNNE-1714:Feature/add quant strict mode to config #1116

Merged
merged 3 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/MixQuant.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ compiler.use_ptq(ptq_options)

```python
ptq_options.quant_scheme = ""
ptq_options.quant_scheme_strict_mode = False
ptq_options.export_quant_scheme = False
ptq_options.export_weight_range_by_channel = False
```

* **quant_scheme:导入量化参数配置文件的路径**
* **quant_scheme_strict_mode:是否严格按照quant_scheme执行量化**
* **export_quant_scheme:是否导出量化参数配置文件**
* **export_weight_range_by_channel:是否导出** `bychannel`形式的weights量化参数,为了保证量化效果,该参数建议设置为 `True`

Expand All @@ -36,6 +38,7 @@ compile_options.dump_ir = True

```python
ptq_options.quant_scheme = ""
ptq_options.quant_scheme_strict_mode = False
ptq_options.export_quant_scheme = True
ptq_options.export_weight_range_by_channel = True
```
Expand Down Expand Up @@ -108,6 +111,7 @@ ptq_options.export_weight_range_by_channel = True

```python
ptq_options.quant_scheme = "./QuantScheme.json" # path to your 'QuantScheme.json'
ptq_options.quant_scheme_strict_mode = False # Whether to strictly follow quant_scheme for quantification
ptq_options.export_quant_scheme = False
ptq_options.export_weight_range_by_channel = False # whatever
```
Expand Down
1 change: 1 addition & 0 deletions docs/USAGE_v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ PTQTensorOptions类, 用于配置nncase PTQ选项,各属性说明如下
| dump_quant_error | bool | 否 | 是否生成量化损失,默认为False。在 `dump_ir=True`时生效 |
| dump_quant_error_symmetric_for_signed | bool | 否 | 是否生成使用范围对称的量化损失,默认为True。在 `dump_ir=True`时生效 |
| quant_scheme | string | 否 | 量化配置文件路径,默认为“ ”。在 `dump_ir=True`时生效 |
| quant_scheme_strict_mode | bool | 否 | 是否严格按照quant_scheme执行量化,默认为False。在 `quant_scheme`不为空时生效 |
| export_quant_scheme | bool | 否 | 是否导出量化配置文件,默认为False。在 `dump_ir=True`时生效 |
| export_weight_range_by_channel | bool | 否 | 导出量化配置文件时,是否按照channel统计权重的范围,默认为False。在 `dump_ir=True`时生效 |

Expand Down
1 change: 1 addition & 0 deletions docs/USAGE_v2_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ PTQTensorOptions is used to configure PTQ options. The details of all attributes
| dump_quant_error | bool | N | Specify whether dump quantification error, False by default. The parameters following worked when `dump_ir=True`. |
| dump_quant_error_symmetric_for_signed | bool | N | Specify whether dump quantification error by symmetric for signed number,True by default. |
| quant_scheme | string | N | specify the path of quantification scheme file,"" by default. |
| quant_scheme_strict_mode | bool | N | Specify whether strictly follow quant_scheme for quantification, False by default. |
| export_quant_scheme | bool | N | Specify whether export quantification scheme, False by default. |
| export_weight_range_by_channel | bool | N | Specify whether export weights range by channel, False by default. |

Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/k230_simulate-EN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
" # mix quantize options\n",
" # more details in docs/MixQuant.md\n",
" ptq_options.quant_scheme = \"\"\n",
" ptq_options.quant_scheme_strict_mode = False\n",
" ptq_options.export_quant_scheme = False\n",
" ptq_options.export_weight_range_by_channel = False\n",
" ############################################\n",
Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/k230_simulate-ZH.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
" # mix quantize options\n",
" # more details in docs/MixQuant.md\n",
" ptq_options.quant_scheme = \"\"\n",
" ptq_options.quant_scheme_strict_mode = False\n",
" ptq_options.export_quant_scheme = False\n",
" ptq_options.export_weight_range_by_channel = False\n",
" ############################################\n",
Expand Down
3 changes: 3 additions & 0 deletions python/nncase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class PTQTensorOptions:
input_mean: float
input_std: float
quant_scheme: str
quant_scheme_strict_mode: bool
samples_count: int
cali_data: List[RuntimeTensor]

Expand All @@ -83,6 +84,7 @@ def __init__(self) -> None:
self.input_mean: float = 0.5
self.input_std: float = 0.5
self.quant_scheme: str = ""
self.quant_scheme_strict_mode: bool = False
self.samples_count: int = 5
self.cali_data: List[RuntimeTensor] = []

Expand Down Expand Up @@ -244,6 +246,7 @@ def use_ptq(self, ptq_dataset_options: PTQTensorOptions) -> None:

self._quantize_options.use_mix_quant = ptq_dataset_options.use_mix_quant
self._quantize_options.quant_scheme = ptq_dataset_options.quant_scheme
self._quantize_options.quant_scheme_strict_mode = ptq_dataset_options.quant_scheme_strict_mode
self._quantize_options.export_quant_scheme = ptq_dataset_options.export_quant_scheme
self._quantize_options.export_weight_range_by_channel = ptq_dataset_options.export_weight_range_by_channel
self._quantize_options.dump_quant_error = ptq_dataset_options.dump_quant_error
Expand Down
5 changes: 5 additions & 0 deletions python/nncase/native/ffi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ PYBIND11_MODULE(_nncase, m) {
py::overload_cast<>(&quantize_options::quant_scheme),
py::overload_cast<std::string_view>(
&quantize_options::quant_scheme))
.def_property(
"quant_scheme_strict_mode",
py::overload_cast<>(&quantize_options::quant_scheme_strict_mode),
py::overload_cast<bool>(
&quantize_options::quant_scheme_strict_mode))
.def_property(
"export_quant_scheme",
py::overload_cast<>(&quantize_options::export_quant_scheme),
Expand Down
8 changes: 8 additions & 0 deletions src/Native/include/nncase/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ typedef struct {
void (*quantize_options_set_quant_scheme)(
clr_object_handle_t quantize_options, const char *quant_scheme,
size_t quant_scheme_length);
void (*quantize_options_set_quant_scheme_strict_mode)(
clr_object_handle_t quantize_options, bool quant_scheme_strict_mode);
void (*quantize_options_set_export_quant_scheme)(
clr_object_handle_t quantize_options, bool export_quant_scheme);
void (*quantize_options_set_export_weight_range_by_channel)(
Expand Down Expand Up @@ -401,6 +403,12 @@ class quantize_options : public clr_object_base {
obj_.get(), value.data(), value.length());
}

bool quant_scheme_strict_mode() { return false; }
void quant_scheme_strict_mode(bool value) {
nncase_clr_api()->quantize_options_set_quant_scheme_strict_mode(
obj_.get(), value);
}

bool export_quant_scheme() { return false; }
void export_quant_scheme(bool value) {
nncase_clr_api()->quantize_options_set_export_quant_scheme(obj_.get(),
Expand Down
18 changes: 18 additions & 0 deletions src/Nncase.Compiler/Interop/CApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public unsafe struct CApiMT
public delegate* unmanaged<IntPtr, FineTuneWeightsMethod, void> QuantOptionsSetFineTuneWeightsMethodPtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetUseMixQuantPtr;
public delegate* unmanaged<IntPtr, byte*, nuint, void> QuantOptionsSetQuantSchemePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetQuantSchemeStrictModePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportQuantSchemePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportWeightRangeByChannelPtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetDumpQuantErrorPtr;
Expand Down Expand Up @@ -154,6 +155,7 @@ public static void Initialize(CApiMT* mt)
mt->QuantOptionsSetFineTuneWeightsMethodPtr = &QuantizeOptionsSetFineTuneWeightsMethod;
mt->QuantOptionsSetUseMixQuantPtr = &QuantOptionsSetUseMixQuant;
mt->QuantOptionsSetQuantSchemePtr = &QuantizeOptionsSetQuantScheme;
mt->QuantOptionsSetQuantSchemeStrictModePtr = &QuantizeOptionsSetQuantSchemeStrictMode;
mt->QuantOptionsSetExportQuantSchemePtr = &QuantizeOptionsSetExportQuantScheme;
mt->QuantOptionsSetExportWeightRangeByChannelPtr = &QuantizeOptionsSetExportWeightRangeByChannel;
mt->QuantOptionsSetDumpQuantErrorPtr = &QuantizeOptionsSetDumpQuantError;
Expand Down Expand Up @@ -603,6 +605,22 @@ private static void QuantizeOptionsSetQuantScheme(IntPtr quantizeOptionsHandle,
Get<QuantizeOptions>(quantizeOptionsHandle).QuantScheme = ToString(quantSchemePtr, quantSchemeLength);
}

[UnmanagedCallersOnly]
private static void QuantizeOptionsSetQuantSchemeStrictMode(IntPtr quantizeOptionsHandle, byte quantSchemeStrictMode)
{
switch (quantSchemeStrictMode)
{
case 0:
Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = false;
break;
case 1:
Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = true;
break;
default:
throw new ArgumentException("Invalid QuantSchemeStrictMode Flag");
}
}

[UnmanagedCallersOnly]
private static void QuantizeOptionsSetExportQuantScheme(IntPtr quantizeOptionsHandle, byte exportQuantScheme)
{
Expand Down
5 changes: 4 additions & 1 deletion src/Nncase.Importer/TFLite/MatMul.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ private Expr VisitMatMul(in tflite.Operator op, bool isFullyConnected = true)
List<string> outputNames = new() { GetOutputTensor(op, 0).Name + "_matmul" };
matmul.Metadata.OutputNames = outputNames;
outputNames.Clear();
outputNames.Add(GetOutputTensor(op, 0).Name);
outputNames.Add(GetOutputTensor(op, 0).Name + "_bias");
bias.Metadata.OutputNames = outputNames;
var mm = matmul + bias;
outputNames.Clear();
outputNames.Add(GetOutputTensor(op, 0).Name);
mm.Metadata.OutputNames = outputNames;

return fusedActivationFunction switch
{
Expand Down
1 change: 1 addition & 0 deletions tests/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ finetune_weights_method = 'NoFineTuneWeights'
input_mean = 0.5
input_std = 0.5
quant_scheme = ""
quant_scheme_strict_mode = false

[infer_report_opt]
enabled = false
Expand Down