diff --git a/docs/MixQuant.md b/docs/MixQuant.md index c70de011f0..76197cc40d 100644 --- a/docs/MixQuant.md +++ b/docs/MixQuant.md @@ -16,11 +16,13 @@ compiler.use_ptq(ptq_options) ```python ptq_options.quant_scheme = "" +ptq_options.quant_scheme_strict_mode = False ptq_options.export_quant_scheme = False ptq_options.export_weight_range_by_channel = False ``` * **quant_scheme:导入量化参数配置文件的路径** +* **quant_scheme_strict_mode:是否严格按照quant_scheme执行量化** * **export_quant_scheme:是否导出量化参数配置文件** * **export_weight_range_by_channel:是否导出** `bychannel`形式的weights量化参数,为了保证量化效果,该参数建议设置为 `True` @@ -36,6 +38,7 @@ compile_options.dump_ir = True ```python ptq_options.quant_scheme = "" +ptq_options.quant_scheme_strict_mode = False ptq_options.export_quant_scheme = True ptq_options.export_weight_range_by_channel = True ``` @@ -108,6 +111,7 @@ ptq_options.export_weight_range_by_channel = True ```python ptq_options.quant_scheme = "./QuantScheme.json" # path to your 'QuantScheme.json' +ptq_options.quant_scheme_strict_mode = False # Whether to strictly follow quant_scheme for quantification ptq_options.export_quant_scheme = False ptq_options.export_weight_range_by_channel = False # whatever ``` diff --git a/docs/USAGE_v2.md b/docs/USAGE_v2.md index 68944d7bbf..a7aa4d3809 100644 --- a/docs/USAGE_v2.md +++ b/docs/USAGE_v2.md @@ -228,6 +228,7 @@ PTQTensorOptions类, 用于配置nncase PTQ选项,各属性说明如下 | dump_quant_error | bool | 否 | 是否生成量化损失,默认为False。在 `dump_ir=True`时生效 | | dump_quant_error_symmetric_for_signed | bool | 否 | 是否生成使用范围对称的量化损失,默认为True。在 `dump_ir=True`时生效 | | quant_scheme | string | 否 | 量化配置文件路径,默认为“ ”。在 `dump_ir=True`时生效 | +| quant_scheme_strict_mode | bool | 否 | 是否严格按照quant_scheme执行量化,默认为False。在 `quant_scheme`不为空时生效 | | export_quant_scheme | bool | 否 | 是否导出量化配置文件,默认为False。在 `dump_ir=True`时生效 | | export_weight_range_by_channel | bool | 否 | 导出量化配置文件时,是否按照channel统计权重的范围,默认为False。在 `dump_ir=True`时生效 | diff --git a/docs/USAGE_v2_EN.md b/docs/USAGE_v2_EN.md index 5800ccfc7e..8a19714c32 100644 --- a/docs/USAGE_v2_EN.md +++ b/docs/USAGE_v2_EN.md @@ -226,6 +226,7 @@ PTQTensorOptions is used to configure PTQ options. The details of all attributes | dump_quant_error | bool | N | Specify whether dump quantification error, False by default. The parameters following worked when `dump_ir=True`. | | dump_quant_error_symmetric_for_signed | bool | N | Specify whether dump quantification error by symmetric for signed number,True by default. | | quant_scheme | string | N | specify the path of quantification scheme file,"" by default. | +| quant_scheme_strict_mode | bool | N | Specify whether strictly follow quant_scheme for quantification, False by default. | | export_quant_scheme | bool | N | Specify whether export quantification scheme, False by default. | | export_weight_range_by_channel | bool | N | Specify whether export weights range by channel, False by default. | diff --git a/examples/user_guide/k230_simulate-EN.ipynb b/examples/user_guide/k230_simulate-EN.ipynb index a8630394df..b4c5aa2b91 100644 --- a/examples/user_guide/k230_simulate-EN.ipynb +++ b/examples/user_guide/k230_simulate-EN.ipynb @@ -150,6 +150,7 @@ " # mix quantize options\n", " # more details in docs/MixQuant.md\n", " ptq_options.quant_scheme = \"\"\n", + " ptq_options.quant_scheme_strict_mode = False\n", " ptq_options.export_quant_scheme = False\n", " ptq_options.export_weight_range_by_channel = False\n", " ############################################\n", diff --git a/examples/user_guide/k230_simulate-ZH.ipynb b/examples/user_guide/k230_simulate-ZH.ipynb index e9b9dc5329..3a099a1ea3 100644 --- a/examples/user_guide/k230_simulate-ZH.ipynb +++ b/examples/user_guide/k230_simulate-ZH.ipynb @@ -150,6 +150,7 @@ " # mix quantize options\n", " # more details in docs/MixQuant.md\n", " ptq_options.quant_scheme = \"\"\n", + " ptq_options.quant_scheme_strict_mode = False\n", " ptq_options.export_quant_scheme = False\n", " ptq_options.export_weight_range_by_channel = False\n", " ############################################\n", diff --git a/python/nncase/__init__.py b/python/nncase/__init__.py index 0663332e10..3c6aa2b69f 100644 --- a/python/nncase/__init__.py +++ b/python/nncase/__init__.py @@ -66,6 +66,7 @@ class PTQTensorOptions: input_mean: float input_std: float quant_scheme: str + quant_scheme_strict_mode: bool samples_count: int cali_data: List[RuntimeTensor] @@ -83,6 +84,7 @@ def __init__(self) -> None: self.input_mean: float = 0.5 self.input_std: float = 0.5 self.quant_scheme: str = "" + self.quant_scheme_strict_mode: bool = False self.samples_count: int = 5 self.cali_data: List[RuntimeTensor] = [] @@ -244,6 +246,7 @@ def use_ptq(self, ptq_dataset_options: PTQTensorOptions) -> None: self._quantize_options.use_mix_quant = ptq_dataset_options.use_mix_quant self._quantize_options.quant_scheme = ptq_dataset_options.quant_scheme + self._quantize_options.quant_scheme_strict_mode = ptq_dataset_options.quant_scheme_strict_mode self._quantize_options.export_quant_scheme = ptq_dataset_options.export_quant_scheme self._quantize_options.export_weight_range_by_channel = ptq_dataset_options.export_weight_range_by_channel self._quantize_options.dump_quant_error = ptq_dataset_options.dump_quant_error diff --git a/python/nncase/native/ffi.cpp b/python/nncase/native/ffi.cpp index 8bd6bd6ba9..b8c99ed966 100644 --- a/python/nncase/native/ffi.cpp +++ b/python/nncase/native/ffi.cpp @@ -185,6 +185,11 @@ PYBIND11_MODULE(_nncase, m) { py::overload_cast<>(&quantize_options::quant_scheme), py::overload_cast( &quantize_options::quant_scheme)) + .def_property( + "quant_scheme_strict_mode", + py::overload_cast<>(&quantize_options::quant_scheme_strict_mode), + py::overload_cast( + &quantize_options::quant_scheme_strict_mode)) .def_property( "export_quant_scheme", py::overload_cast<>(&quantize_options::export_quant_scheme), diff --git a/src/Native/include/nncase/compiler.h b/src/Native/include/nncase/compiler.h index 6b7b33ef92..7339d0b546 100644 --- a/src/Native/include/nncase/compiler.h +++ b/src/Native/include/nncase/compiler.h @@ -199,6 +199,8 @@ typedef struct { void (*quantize_options_set_quant_scheme)( clr_object_handle_t quantize_options, const char *quant_scheme, size_t quant_scheme_length); + void (*quantize_options_set_quant_scheme_strict_mode)( + clr_object_handle_t quantize_options, bool quant_scheme_strict_mode); void (*quantize_options_set_export_quant_scheme)( clr_object_handle_t quantize_options, bool export_quant_scheme); void (*quantize_options_set_export_weight_range_by_channel)( @@ -401,6 +403,12 @@ class quantize_options : public clr_object_base { obj_.get(), value.data(), value.length()); } + bool quant_scheme_strict_mode() { return false; } + void quant_scheme_strict_mode(bool value) { + nncase_clr_api()->quantize_options_set_quant_scheme_strict_mode( + obj_.get(), value); + } + bool export_quant_scheme() { return false; } void export_quant_scheme(bool value) { nncase_clr_api()->quantize_options_set_export_quant_scheme(obj_.get(), diff --git a/src/Nncase.Compiler/Interop/CApi.cs b/src/Nncase.Compiler/Interop/CApi.cs index 3ce2d0e289..69bacc8397 100644 --- a/src/Nncase.Compiler/Interop/CApi.cs +++ b/src/Nncase.Compiler/Interop/CApi.cs @@ -84,6 +84,7 @@ public unsafe struct CApiMT public delegate* unmanaged QuantOptionsSetFineTuneWeightsMethodPtr; public delegate* unmanaged QuantOptionsSetUseMixQuantPtr; public delegate* unmanaged QuantOptionsSetQuantSchemePtr; + public delegate* unmanaged QuantOptionsSetQuantSchemeStrictModePtr; public delegate* unmanaged QuantOptionsSetExportQuantSchemePtr; public delegate* unmanaged QuantOptionsSetExportWeightRangeByChannelPtr; public delegate* unmanaged QuantOptionsSetDumpQuantErrorPtr; @@ -154,6 +155,7 @@ public static void Initialize(CApiMT* mt) mt->QuantOptionsSetFineTuneWeightsMethodPtr = &QuantizeOptionsSetFineTuneWeightsMethod; mt->QuantOptionsSetUseMixQuantPtr = &QuantOptionsSetUseMixQuant; mt->QuantOptionsSetQuantSchemePtr = &QuantizeOptionsSetQuantScheme; + mt->QuantOptionsSetQuantSchemeStrictModePtr = &QuantizeOptionsSetQuantSchemeStrictMode; mt->QuantOptionsSetExportQuantSchemePtr = &QuantizeOptionsSetExportQuantScheme; mt->QuantOptionsSetExportWeightRangeByChannelPtr = &QuantizeOptionsSetExportWeightRangeByChannel; mt->QuantOptionsSetDumpQuantErrorPtr = &QuantizeOptionsSetDumpQuantError; @@ -603,6 +605,22 @@ private static void QuantizeOptionsSetQuantScheme(IntPtr quantizeOptionsHandle, Get(quantizeOptionsHandle).QuantScheme = ToString(quantSchemePtr, quantSchemeLength); } + [UnmanagedCallersOnly] + private static void QuantizeOptionsSetQuantSchemeStrictMode(IntPtr quantizeOptionsHandle, byte quantSchemeStrictMode) + { + switch (quantSchemeStrictMode) + { + case 0: + Get(quantizeOptionsHandle).QuantSchemeStrictMode = false; + break; + case 1: + Get(quantizeOptionsHandle).QuantSchemeStrictMode = true; + break; + default: + throw new ArgumentException("Invalid QuantSchemeStrictMode Flag"); + } + } + [UnmanagedCallersOnly] private static void QuantizeOptionsSetExportQuantScheme(IntPtr quantizeOptionsHandle, byte exportQuantScheme) { diff --git a/src/Nncase.Importer/TFLite/MatMul.cs b/src/Nncase.Importer/TFLite/MatMul.cs index 6b1724e5f5..59bbfcd1d7 100644 --- a/src/Nncase.Importer/TFLite/MatMul.cs +++ b/src/Nncase.Importer/TFLite/MatMul.cs @@ -69,9 +69,12 @@ private Expr VisitMatMul(in tflite.Operator op, bool isFullyConnected = true) List outputNames = new() { GetOutputTensor(op, 0).Name + "_matmul" }; matmul.Metadata.OutputNames = outputNames; outputNames.Clear(); - outputNames.Add(GetOutputTensor(op, 0).Name); + outputNames.Add(GetOutputTensor(op, 0).Name + "_bias"); bias.Metadata.OutputNames = outputNames; var mm = matmul + bias; + outputNames.Clear(); + outputNames.Add(GetOutputTensor(op, 0).Name); + mm.Metadata.OutputNames = outputNames; return fusedActivationFunction switch { diff --git a/tests/config.toml b/tests/config.toml index 11fe4ef48b..56f1c0f1bb 100644 --- a/tests/config.toml +++ b/tests/config.toml @@ -40,6 +40,7 @@ finetune_weights_method = 'NoFineTuneWeights' input_mean = 0.5 input_std = 0.5 quant_scheme = "" +quant_scheme_strict_mode = false [infer_report_opt] enabled = false