refactor: modify template.yaml

yuanchaoa · yuanchaoa · commit a1502b20d519 · 2025-04-11T20:21:21.000+08:00
diff --git a/agent/src/flow_generator/perf/mod.rs b/agent/src/flow_generator/perf/mod.rs
@@ -232,6 +232,10 @@ impl FlowLog {
                 self.last_fail = None;
                 self.is_skip = false;
             }
+
+            if !self.is_success {
+                self.l7_protocol_log_parser = None
+            }
         }
     }
 
diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md
@@ -588,7 +588,8 @@ global:
 
 **详细描述**:
 
-操作系统尽可能使用指定 ID 的 CPU 核运行 deepflow-agent 进程。无效的 ID 将被忽略。举例：
+操作系统尽可能使用指定 ID 的 CPU 核运行 deepflow-agent 进程。无效的 ID 将被忽略。当前仅对
+dispatcher 线程生效。举例：
 ```yaml
 global:
   tunning:
@@ -6521,7 +6522,7 @@ Upgrade from old version: `static_config.l7-protocol-inference-max-fail-count`
 processors:
   request_log:
     application_protocol_inference:
-      inference_max_retries: 5
+      inference_max_retries: 128
 ```
 
 **模式**:
@@ -6532,10 +6533,14 @@ processors:
 
 **详细描述**:
 
-deepflow-agent 会周期性标记每一个 `<vpc, ip, protocol, port>` 四元组承载的应用协议类型，以加速
-后续数据的应用协议采集过程。如果一个时间周期内，连续多次尝试解析 Packet 数据、Socket 数据无法推断
-出该四元组承载的应用协议，agent 会将该四元组标记为 unknown 类型，并在本周期内暂停对后续数据的应用
-协议解析，以避免更多的无效运算。该参数控制每个时间周期内的应用协议解析重试次数。
+flow 应用协议的初始化判定会根据 IP、Port 等信息从哈希表中获取。若未能判定为具体应用协议时通过轮训
+所有应用协议方式检查具体的应用协议，识别成功后通过哈希表记录该应用协议；若未识别成功累计记录失败次数，
+若失败次数大于 inference_max_retries 并且记录未超时（inference_result_ttl），该 flow 会打上 skip 标记
+并记录当前时间到 last_fail，后续该流在未超时前不会再进行应用协议解析。若已经判定为具体的应用协议时，
+应用解析前会检查 flow 若有 skip 标记并且通过 last_fail 和 inference_result_ttl 计算已经超时会去掉 skip
+标记。后续第一次应用协议解析成功后使用哈希表记录应用协议，若从未解析成功累计记录失败次数，若失败次数大于
+inference_max_retries 并且记录未超时（inference_result_ttl），该 flow 会打上 skip 标记并记录当前时间到
+last_fail，后续该流在未超时前不会再进行应用协议解析；若该 flow 已经解析成功过解析失败不记录失败次数。
 
 #### 推断结果 TTL {#processors.request_log.application_protocol_inference.inference_result_ttl}
 
diff --git a/server/agent_config/README.md b/server/agent_config/README.md
@@ -598,7 +598,8 @@ global:
 **Description**:
 
 CPU affinity is the tendency of a process to run on a given CPU for as long as possible
-without being migrated to other processors. Invalid ID will be ignored. Example:
+without being migrated to other processors. Invalid ID will be ignored. Currently only
+works for dispatcher threads. Example:
 ```yaml
 global:
   tunning:
@@ -6681,7 +6682,7 @@ Upgrade from old version: `static_config.l7-protocol-inference-max-fail-count`
 processors:
   request_log:
     application_protocol_inference:
-      inference_max_retries: 5
+      inference_max_retries: 128
 ```
 
 **Schema**:
@@ -6692,12 +6693,22 @@ processors:
 
 **Description**:
 
-deepflow-agent will mark the long live stream and application protocol for each
-<vpc, ip, protocol, port> tuple, when the traffic corresponding to a tuple fails
-to be identified for many times (for multiple packets, Socket Data, Function Data),
-the tuple will be marked as an unknown type to avoid deepflow-agent continuing to
-try (incurring significant computational overhead) until the duration exceeds
-l7-protocol-inference-ttl.
+The initialization decision of the Flow app protocol is taken from the hash table based on information
+such as IP, port, etc. If it cannot be determined that it is a specific application protocol, it will
+pass the rotation training check the specific application protocol by all application protocol methods,
+and record the application protocol through the hash table after the identification is successful. If
+the number of successful record failures is not recognized, If the number of failures is greater than
+inference_max_retries and the record does not time out (inference_result_ttl), the flow is marked with
+a skip The current time is recorded to last_fail, and the subsequent flow will not be parsed by the
+application protocol before the timeout. If it has been determined to be a specific application protocol,
+The flow is checked before parsing and the skip is removed if it has a skip tag and has timed out through
+last_fail and inference_result_ttl calculations Mark. After the first application protocol resolution is
+successful, use a hash table to record the application protocol, and if the resolution is not successful,
+the cumulative number of failures is recorded, and if the number of failures is greater than that
+inference_max_retries and the record does not time out (inference_result_ttl), the flow is marked with a
+skip mark and records that the current time has arrived last_fail, the subsequent flow will not be parsed
+before the application protocol expires. If the flow has been parsed successfully, the number of failures
+will not be recorded.
 
 #### Inference Result TTL {#processors.request_log.application_protocol_inference.inference_result_ttl}
 
diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml
@@ -394,14 +394,16 @@ global:
     # description:
     #   en: |-
     #     CPU affinity is the tendency of a process to run on a given CPU for as long as possible
-    #     without being migrated to other processors. Invalid ID will be ignored. Example:
+    #     without being migrated to other processors. Invalid ID will be ignored. Currently only
+    #     works for dispatcher threads. Example:
     #     ```yaml
     #     global:
     #       tunning:
     #         cpu_affinity: [1, 3, 5, 7, 9]
     #     ```
     #   ch: |-
-    #     操作系统尽可能使用指定 ID 的 CPU 核运行 deepflow-agent 进程。无效的 ID 将被忽略。举例：
+    #     操作系统尽可能使用指定 ID 的 CPU 核运行 deepflow-agent 进程。无效的 ID 将被忽略。当前仅对
+    #     dispatcher 线程生效。举例：
     #     ```yaml
     #     global:
     #       tunning:
@@ -4501,19 +4503,33 @@ processors:
       # ee_feature: false
       # description:
       #   en: |-
-      #     deepflow-agent will mark the long live stream and application protocol for each
-      #     <vpc, ip, protocol, port> tuple, when the traffic corresponding to a tuple fails
-      #     to be identified for many times (for multiple packets, Socket Data, Function Data),
-      #     the tuple will be marked as an unknown type to avoid deepflow-agent continuing to
-      #     try (incurring significant computational overhead) until the duration exceeds
-      #     l7-protocol-inference-ttl.
+      #     The initialization decision of the Flow app protocol is taken from the hash table based on information
+      #     such as IP, port, etc. If it cannot be determined that it is a specific application protocol, it will
+      #     pass the rotation training check the specific application protocol by all application protocol methods,
+      #     and record the application protocol through the hash table after the identification is successful. If
+      #     the number of successful record failures is not recognized, If the number of failures is greater than
+      #     inference_max_retries and the record does not time out (inference_result_ttl), the flow is marked with
+      #     a skip The current time is recorded to last_fail, and the subsequent flow will not be parsed by the
+      #     application protocol before the timeout. If it has been determined to be a specific application protocol,
+      #     The flow is checked before parsing and the skip is removed if it has a skip tag and has timed out through
+      #     last_fail and inference_result_ttl calculations Mark. After the first application protocol resolution is
+      #     successful, use a hash table to record the application protocol, and if the resolution is not successful,
+      #     the cumulative number of failures is recorded, and if the number of failures is greater than that
+      #     inference_max_retries and the record does not time out (inference_result_ttl), the flow is marked with a
+      #     skip mark and records that the current time has arrived last_fail, the subsequent flow will not be parsed
+      #     before the application protocol expires. If the flow has been parsed successfully, the number of failures
+      #     will not be recorded.
       #   ch: |-
-      #     deepflow-agent 会周期性标记每一个 `<vpc, ip, protocol, port>` 四元组承载的应用协议类型，以加速
-      #     后续数据的应用协议采集过程。如果一个时间周期内，连续多次尝试解析 Packet 数据、Socket 数据无法推断
-      #     出该四元组承载的应用协议，agent 会将该四元组标记为 unknown 类型，并在本周期内暂停对后续数据的应用
-      #     协议解析，以避免更多的无效运算。该参数控制每个时间周期内的应用协议解析重试次数。
+      #     flow 应用协议的初始化判定会根据 IP、Port 等信息从哈希表中获取。若未能判定为具体应用协议时通过轮训
+      #     所有应用协议方式检查具体的应用协议，识别成功后通过哈希表记录该应用协议；若未识别成功累计记录失败次数，
+      #     若失败次数大于 inference_max_retries 并且记录未超时（inference_result_ttl），该 flow 会打上 skip 标记
+      #     并记录当前时间到 last_fail，后续该流在未超时前不会再进行应用协议解析。若已经判定为具体的应用协议时，
+      #     应用解析前会检查 flow 若有 skip 标记并且通过 last_fail 和 inference_result_ttl 计算已经超时会去掉 skip
+      #     标记。后续第一次应用协议解析成功后使用哈希表记录应用协议，若从未解析成功累计记录失败次数，若失败次数大于
+      #     inference_max_retries 并且记录未超时（inference_result_ttl），该 flow 会打上 skip 标记并记录当前时间到
+      #     last_fail，后续该流在未超时前不会再进行应用协议解析；若该 flow 已经解析成功过解析失败不记录失败次数。
       # upgrade_from: static_config.l7-protocol-inference-max-fail-count
-      inference_max_retries: 5
+      inference_max_retries: 128
       # type: duration
       # name:
       #   en: Inference Result TTL

Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,10 @@ impl FlowLog {`
`232`	`232`	`self.last_fail = None;`
`233`	`233`	`self.is_skip = false;`
`234`	`234`	`}`
	`235`	`+`
	`236`	`+ if !self.is_success {`
	`237`	`+ self.l7_protocol_log_parser = None`
	`238`	`+ }`
`235`	`239`	`}`
`236`	`240`	`}`
`237`	`241`