kendryte · uranus0515 · Oct 31, 2023 · Oct 30, 2023 · Oct 30, 2023 · Oct 30, 2023
diff --git a/docs/MixQuant.md b/docs/MixQuant.md
@@ -16,11 +16,13 @@ compiler.use_ptq(ptq_options)
 
 ```python
 ptq_options.quant_scheme = ""
+ptq_options.quant_scheme_strict_mode = False
 ptq_options.export_quant_scheme = False
 ptq_options.export_weight_range_by_channel = False
 ```
 
 * **quant_scheme：导入量化参数配置文件的路径**
+* **quant_scheme_strict_mode：是否严格按照quant_scheme执行量化**
 * **export_quant_scheme：是否导出量化参数配置文件**
 * **export_weight_range_by_channel：是否导出** `bychannel`形式的weights量化参数，为了保证量化效果，该参数建议设置为 `True`
 
@@ -36,6 +38,7 @@ compile_options.dump_ir = True
 
 ```python
 ptq_options.quant_scheme = ""
+ptq_options.quant_scheme_strict_mode = False
 ptq_options.export_quant_scheme = True
 ptq_options.export_weight_range_by_channel = True
 ```
@@ -108,6 +111,7 @@ ptq_options.export_weight_range_by_channel = True
 
 ```python
 ptq_options.quant_scheme = "./QuantScheme.json" # path to your 'QuantScheme.json'
+ptq_options.quant_scheme_strict_mode = False # Whether to strictly follow quant_scheme for quantification
 ptq_options.export_quant_scheme = False
 ptq_options.export_weight_range_by_channel = False # whatever
 ```

diff --git a/docs/USAGE_v2.md b/docs/USAGE_v2.md
@@ -228,6 +228,7 @@ PTQTensorOptions类, 用于配置nncase PTQ选项，各属性说明如下
 | dump_quant_error                      | bool   | 否       | 是否生成量化损失，默认为False。在 `dump_ir=True`时生效                                  |
 | dump_quant_error_symmetric_for_signed | bool   | 否       | 是否生成使用范围对称的量化损失，默认为True。在 `dump_ir=True`时生效                     |
 | quant_scheme                          | string | 否       | 量化配置文件路径，默认为“ ”。在 `dump_ir=True`时生效                                  |
+| quant_scheme_strict_mode              | bool   | 否       | 是否严格按照quant_scheme执行量化，默认为False。在 `quant_scheme`不为空时生效                              |
 | export_quant_scheme                   | bool   | 否       | 是否导出量化配置文件，默认为False。在 `dump_ir=True`时生效                              |
 | export_weight_range_by_channel        | bool   | 否       | 导出量化配置文件时，是否按照channel统计权重的范围，默认为False。在 `dump_ir=True`时生效 |
 

diff --git a/docs/USAGE_v2_EN.md b/docs/USAGE_v2_EN.md
@@ -226,6 +226,7 @@ PTQTensorOptions is used to configure PTQ options. The details of all attributes
 | dump_quant_error                      | bool      | N        | Specify whether dump quantification error, False by default. The parameters following worked when `dump_ir=True`. |
 | dump_quant_error_symmetric_for_signed | bool      | N        | Specify whether dump quantification error by symmetric for signed number,True by default.                           |
 | quant_scheme                          | string    | N        | specify the path of quantification scheme file,"" by default.                                                       |
+| quant_scheme_strict_mode              | bool      | N        | Specify whether strictly follow quant_scheme for quantification, False by default.                                                     |
 | export_quant_scheme                   | bool      | N        | Specify whether export quantification scheme, False by default.                                                     |
 | export_weight_range_by_channel        | bool      | N        | Specify whether export weights range by channel, False by default.                                                  |
 

diff --git a/examples/user_guide/k230_simulate-EN.ipynb b/examples/user_guide/k230_simulate-EN.ipynb
@@ -150,6 +150,7 @@
     "    # mix quantize options\n",
     "    # more details in docs/MixQuant.md\n",
     "    ptq_options.quant_scheme = \"\"\n",
+    "    ptq_options.quant_scheme_strict_mode = False\n",
     "    ptq_options.export_quant_scheme = False\n",
     "    ptq_options.export_weight_range_by_channel = False\n",
     "    ############################################\n",

diff --git a/examples/user_guide/k230_simulate-ZH.ipynb b/examples/user_guide/k230_simulate-ZH.ipynb
@@ -150,6 +150,7 @@
     "    # mix quantize options\n",
     "    # more details in docs/MixQuant.md\n",
     "    ptq_options.quant_scheme = \"\"\n",
+    "    ptq_options.quant_scheme_strict_mode = False\n",
     "    ptq_options.export_quant_scheme = False\n",
     "    ptq_options.export_weight_range_by_channel = False\n",
     "    ############################################\n",

diff --git a/python/nncase/__init__.py b/python/nncase/__init__.py
@@ -66,6 +66,7 @@ class PTQTensorOptions:
     input_mean: float
     input_std: float
     quant_scheme: str
+    quant_scheme_strict_mode: bool
     samples_count: int
     cali_data: List[RuntimeTensor]
 
@@ -83,6 +84,7 @@ def __init__(self) -> None:
         self.input_mean: float = 0.5
         self.input_std: float = 0.5
         self.quant_scheme: str = ""
+        self.quant_scheme_strict_mode: bool = False
         self.samples_count: int = 5
         self.cali_data: List[RuntimeTensor] = []
 
@@ -244,6 +246,7 @@ def use_ptq(self, ptq_dataset_options: PTQTensorOptions) -> None:
 
         self._quantize_options.use_mix_quant = ptq_dataset_options.use_mix_quant
         self._quantize_options.quant_scheme = ptq_dataset_options.quant_scheme
+        self._quantize_options.quant_scheme_strict_mode = ptq_dataset_options.quant_scheme_strict_mode
         self._quantize_options.export_quant_scheme = ptq_dataset_options.export_quant_scheme
         self._quantize_options.export_weight_range_by_channel = ptq_dataset_options.export_weight_range_by_channel
         self._quantize_options.dump_quant_error = ptq_dataset_options.dump_quant_error

diff --git a/python/nncase/native/ffi.cpp b/python/nncase/native/ffi.cpp
@@ -185,6 +185,11 @@ PYBIND11_MODULE(_nncase, m) {
                       py::overload_cast<>(&quantize_options::quant_scheme),
                       py::overload_cast<std::string_view>(
                           &quantize_options::quant_scheme))
+        .def_property(
+            "quant_scheme_strict_mode",
+            py::overload_cast<>(&quantize_options::quant_scheme_strict_mode),
+            py::overload_cast<bool>(
+                &quantize_options::quant_scheme_strict_mode))
         .def_property(
             "export_quant_scheme",
             py::overload_cast<>(&quantize_options::export_quant_scheme),

diff --git a/src/Native/include/nncase/compiler.h b/src/Native/include/nncase/compiler.h
@@ -199,6 +199,8 @@ typedef struct {
     void (*quantize_options_set_quant_scheme)(
         clr_object_handle_t quantize_options, const char *quant_scheme,
         size_t quant_scheme_length);
+    void (*quantize_options_set_quant_scheme_strict_mode)(
+        clr_object_handle_t quantize_options, bool quant_scheme_strict_mode);
     void (*quantize_options_set_export_quant_scheme)(
         clr_object_handle_t quantize_options, bool export_quant_scheme);
     void (*quantize_options_set_export_weight_range_by_channel)(
@@ -401,6 +403,12 @@ class quantize_options : public clr_object_base {
             obj_.get(), value.data(), value.length());
     }
 
+    bool quant_scheme_strict_mode() { return false; }
+    void quant_scheme_strict_mode(bool value) {
+        nncase_clr_api()->quantize_options_set_quant_scheme_strict_mode(
+            obj_.get(), value);
+    }
+
     bool export_quant_scheme() { return false; }
     void export_quant_scheme(bool value) {
         nncase_clr_api()->quantize_options_set_export_quant_scheme(obj_.get(),

diff --git a/src/Nncase.Compiler/Interop/CApi.cs b/src/Nncase.Compiler/Interop/CApi.cs
@@ -84,6 +84,7 @@ public unsafe struct CApiMT
     public delegate* unmanaged<IntPtr, FineTuneWeightsMethod, void> QuantOptionsSetFineTuneWeightsMethodPtr;
     public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetUseMixQuantPtr;
     public delegate* unmanaged<IntPtr, byte*, nuint, void> QuantOptionsSetQuantSchemePtr;
+    public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetQuantSchemeStrictModePtr;
     public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportQuantSchemePtr;
     public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportWeightRangeByChannelPtr;
     public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetDumpQuantErrorPtr;
@@ -154,6 +155,7 @@ public static void Initialize(CApiMT* mt)
         mt->QuantOptionsSetFineTuneWeightsMethodPtr = &QuantizeOptionsSetFineTuneWeightsMethod;
         mt->QuantOptionsSetUseMixQuantPtr = &QuantOptionsSetUseMixQuant;
         mt->QuantOptionsSetQuantSchemePtr = &QuantizeOptionsSetQuantScheme;
+        mt->QuantOptionsSetQuantSchemeStrictModePtr = &QuantizeOptionsSetQuantSchemeStrictMode;
         mt->QuantOptionsSetExportQuantSchemePtr = &QuantizeOptionsSetExportQuantScheme;
         mt->QuantOptionsSetExportWeightRangeByChannelPtr = &QuantizeOptionsSetExportWeightRangeByChannel;
         mt->QuantOptionsSetDumpQuantErrorPtr = &QuantizeOptionsSetDumpQuantError;
@@ -603,6 +605,22 @@ private static void QuantizeOptionsSetQuantScheme(IntPtr quantizeOptionsHandle,
         Get<QuantizeOptions>(quantizeOptionsHandle).QuantScheme = ToString(quantSchemePtr, quantSchemeLength);
     }
 
+    [UnmanagedCallersOnly]
+    private static void QuantizeOptionsSetQuantSchemeStrictMode(IntPtr quantizeOptionsHandle, byte quantSchemeStrictMode)
+    {
+        switch (quantSchemeStrictMode)
+        {
+            case 0:
+                Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = false;
+                break;
+            case 1:
+                Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = true;
+                break;
+            default:
+                throw new ArgumentException("Invalid QuantSchemeStrictMode Flag");
+        }
+    }
+
     [UnmanagedCallersOnly]
     private static void QuantizeOptionsSetExportQuantScheme(IntPtr quantizeOptionsHandle, byte exportQuantScheme)
     {

diff --git a/src/Nncase.Importer/TFLite/MatMul.cs b/src/Nncase.Importer/TFLite/MatMul.cs
@@ -69,9 +69,12 @@ private Expr VisitMatMul(in tflite.Operator op, bool isFullyConnected = true)
             List<string> outputNames = new() { GetOutputTensor(op, 0).Name + "_matmul" };
             matmul.Metadata.OutputNames = outputNames;
             outputNames.Clear();
-            outputNames.Add(GetOutputTensor(op, 0).Name);
+            outputNames.Add(GetOutputTensor(op, 0).Name + "_bias");
             bias.Metadata.OutputNames = outputNames;
             var mm = matmul + bias;
+            outputNames.Clear();
+            outputNames.Add(GetOutputTensor(op, 0).Name);
+            mm.Metadata.OutputNames = outputNames;
 
             return fusedActivationFunction switch
             {

diff --git a/tests/config.toml b/tests/config.toml
@@ -40,6 +40,7 @@ finetune_weights_method = 'NoFineTuneWeights'
 input_mean = 0.5
 input_std = 0.5
 quant_scheme = ""
+quant_scheme_strict_mode = false
 
 [infer_report_opt]
 enabled = false