more noqas

aaronsteers · aaronsteers · commit 93e453495296 · 2024-12-03T10:08:14.000-08:00
diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py
@@ -72,7 +72,7 @@ def __init__(
         super().__init__(
             catalog=catalog,
             config=config,
-            state=state,
+            state=state,  # type: ignore [arg-type]
             path_to_yaml="manifest.yaml",
         )
 
diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py
@@ -274,7 +274,7 @@ def _get_message_groups(
                 if message.trace.type == TraceType.ERROR:  # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type
                     yield message.trace
             elif message.type == MessageType.RECORD:
-                current_page_records.append(message.record.data)  # type: ignore[union-attr] # AirbyteMessage with MessageType.RECORD has record.data
+                current_page_records.append(message.record.data)  # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data
                 records_count += 1
                 schema_inferrer.accumulate(message.record)
                 datetime_format_inferrer.accumulate(message.record)
@@ -355,7 +355,7 @@ def _close_page(
             StreamReadPages(
                 request=current_page_request,
                 response=current_page_response,
-                records=deepcopy(current_page_records),
+                records=deepcopy(current_page_records),  # type: ignore [arg-type]
             )  # type: ignore
         )
         current_page_records.clear()
diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py
@@ -83,14 +83,22 @@ def write(
                 yield message
             elif message.type == Type.RECORD:
                 record_chunks, record_id_to_delete = self.processor.process(message.record)
-                self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks)
+                self.chunks[
+                    (  # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
+                        message.record.namespace,  # type: ignore [union-attr] # record not None
+                        message.record.stream,  # type: ignore [union-attr] # record not None
+                    )
+                ].extend(record_chunks)
                 if record_id_to_delete is not None:
                     if message.record is None:
                         raise ValueError("Record messages cannot have null `record` property.")
 
-                    self.ids_to_delete[(message.record.namespace, message.record.stream)].append(
-                        record_id_to_delete
-                    )
+                    self.ids_to_delete[
+                        (  # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]"
+                            message.record.namespace,  # type: ignore [union-attr] # record not None
+                            message.record.stream,  # type: ignore [union-attr] # record not None
+                        )
+                    ].append(record_id_to_delete)
                 self.number_of_chunks += len(record_chunks)
                 if self.number_of_chunks >= self.batch_size:
                     self._process_batch()
diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py
@@ -41,12 +41,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
         self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {})
         self._parser = DatetimeParser()
         self.min_datetime = (
-            InterpolatedString.create(self.min_datetime, parameters=parameters)
+            InterpolatedString.create(self.min_datetime, parameters=parameters)  # type: ignore [assignment]  #  expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
             if self.min_datetime
             else None
         )  # type: ignore
         self.max_datetime = (
-            InterpolatedString.create(self.max_datetime, parameters=parameters)
+            InterpolatedString.create(self.max_datetime, parameters=parameters)  # type: ignore [assignment]  #  expression has type "InterpolatedString | None", variable has type "InterpolatedString | str"
             if self.max_datetime
             else None
         )  # type: ignore
@@ -66,7 +66,13 @@ def get_datetime(
             datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z"
 
         time = self._parser.parse(
-            str(self.datetime.eval(config, **additional_parameters)), datetime_format
+            str(
+                self.datetime.eval(  # type: ignore[union-attr] # str has no attribute "eval"
+                    config,
+                    **additional_parameters,
+                )
+            ),
+            datetime_format,
         )  # type: ignore # datetime is always cast to an interpolated string
 
         if self.min_datetime:
diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py
@@ -120,7 +120,7 @@ def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[
     def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]:
         try:
             undeclared = self._find_undeclared_variables(s)
-            undeclared_not_in_context = {var for var in undeclared if var not in context}
+            undeclared_not_in_context = {var for var in undeclared if var not in context}  # type: ignore [attr-defined]  # `Template` class not iterable
             if undeclared_not_in_context:
                 raise ValueError(
                     f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}"
@@ -137,11 +137,11 @@ def _find_undeclared_variables(self, s: Optional[str]) -> Template:
         Find undeclared variables and cache them
         """
         ast = self._environment.parse(s)  # type: ignore # parse is able to handle None
-        return meta.find_undeclared_variables(ast)
+        return meta.find_undeclared_variables(ast)  # type: ignore [return-value]  # Expected `Template` but got `set[str]`
 
     @cache
     def _compile(self, s: Optional[str]) -> Template:
         """
         We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
         """
-        return self._environment.from_string(s)
+        return self._environment.from_string(s)  # type: ignore [arg-type]  # Expected `str | Template` but passed `str | None`
diff --git a/airbyte_cdk/sources/embedded/base_integration.py b/airbyte_cdk/sources/embedded/base_integration.py
@@ -52,8 +52,9 @@ def _load_data(
         for message in self.source.read(self.config, configured_catalog, state):
             if message.type == Type.RECORD:
                 output = self._handle_record(
-                    message.record, get_defined_id(stream, message.record.data)
-                )  # type: ignore[union-attr] # record has `data`
+                    message.record,
+                    get_defined_id(stream, message.record.data),  # type: ignore[union-attr, arg-type]
+                )
                 if output:
                     yield output
             elif message.type is Type.STATE and message.state:
diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py
@@ -64,18 +64,20 @@ async def infer_schema(
             raise ValueError(f"Expected ParquetFormat, got {avro_format}")
 
         with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
-            avro_reader = fastavro.reader(fp)
+            avro_reader = fastavro.reader(fp)  # type: ignore [arg-type]
             avro_schema = avro_reader.writer_schema
-        if not avro_schema["type"] == "record":
-            unsupported_type = avro_schema["type"]
+        if not avro_schema["type"] == "record":  # type: ignore [index, call-overload]
+            unsupported_type = avro_schema["type"]  # type: ignore [index, call-overload]
             raise ValueError(
                 f"Only record based avro files are supported. Found {unsupported_type}"
             )
         json_schema = {
-            field["name"]: AvroParser._convert_avro_type_to_json(
-                avro_format, field["name"], field["type"]
+            field["name"]: AvroParser._convert_avro_type_to_json(  # type: ignore [index]
+                avro_format,
+                field["name"],  # type: ignore [index]
+                field["type"],  # type: ignore [index]
             )
-            for field in avro_schema["fields"]
+            for field in avro_schema["fields"]  # type: ignore [index, call-overload]
         }
         return json_schema
 
@@ -180,18 +182,19 @@ def parse_records(
         line_no = 0
         try:
             with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
-                avro_reader = fastavro.reader(fp)
+                avro_reader = fastavro.reader(fp)  # type: ignore [arg-type]
                 schema = avro_reader.writer_schema
                 schema_field_name_to_type = {
-                    field["name"]: cast(dict, field["type"]) for field in schema["fields"]
+                    field["name"]: cast(dict[str, Any], field["type"])  # type: ignore [index]
+                    for field in schema["fields"]  # type: ignore [index, call-overload]  # If schema is not dict, it is not subscriptable by strings
                 }
                 for record in avro_reader:
                     line_no += 1
                     yield {
                         record_field: self._to_output_value(
                             avro_format,
-                            schema_field_name_to_type[record_field],
-                            record[record_field],
+                            schema_field_name_to_type[record_field],  # type: ignore [index] # Any not subscriptable
+                            record[record_field],  # type: ignore [index] # Any not subscriptable
                         )
                         for record_field, record_value in schema_field_name_to_type.items()
                     }
diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py
@@ -70,7 +70,10 @@ async def infer_schema(
             for column, df_type in df.dtypes.items():
                 # Choose the broadest data type if the column's data type differs in dataframes
                 prev_frame_column_type = fields.get(column)
-                fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type)
+                fields[column] = self.dtype_to_json_type(  # type: ignore [index]
+                    prev_frame_column_type,
+                    df_type,
+                )
 
         schema = {
             field: (
@@ -187,4 +190,4 @@ def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame:
         Returns:
             pd.DataFrame: Parsed data from the Excel file.
         """
-        return pd.ExcelFile(fp, engine="calamine").parse()
+        return pd.ExcelFile(fp, engine="calamine").parse()  # type: ignore [arg-type]
diff --git a/airbyte_cdk/sources/http_logger.py b/airbyte_cdk/sources/http_logger.py
@@ -14,7 +14,7 @@ def format_http_message(
     title: str,
     description: str,
     stream_name: Optional[str],
-    is_auxiliary: bool = None,
+    is_auxiliary: bool | None = None,
 ) -> LogMessage:
     request = response.request
     log_message = {
@@ -42,10 +42,10 @@ def format_http_message(
         "url": {"full": request.url},
     }
     if is_auxiliary is not None:
-        log_message["http"]["is_auxiliary"] = is_auxiliary
+        log_message["http"]["is_auxiliary"] = is_auxiliary  # type: ignore [index]
     if stream_name:
         log_message["airbyte_cdk"] = {"stream": {"name": stream_name}}
-    return log_message
+    return log_message  # type: ignore [return-value]  # got "dict[str, object]", expected "dict[str, JsonType]"
 
 
 def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]:
diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py
@@ -300,7 +300,7 @@ def read(self) -> Iterable[Record]:
                     yield Record(
                         data=data_to_return,
                         stream_name=self.stream_name(),
-                        associated_slice=self._slice,
+                        associated_slice=self._slice,  # type: ignore [arg-type]
                     )
                 else:
                     self._message_repository.emit_message(record_data)
diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py
@@ -216,7 +216,8 @@ def read(  # type: ignore  # ignoring typing for ConnectorStateManager because o
                         # Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
                         # should be fixed on the stream implementation, but we should also protect against this in the CDK as well
                         stream_state_tracker = self.get_updated_state(
-                            stream_state_tracker, record_data
+                            stream_state_tracker,
+                            record_data,
                         )
                         self._observe_state(checkpoint_reader, stream_state_tracker)
                     record_counter += 1
diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py
@@ -164,7 +164,7 @@ def __init__(
         self._client_id = (
             client_id
             if client_id is not None
-            else dpath.get(connector_config, ("credentials", "client_id"))
+            else dpath.get(connector_config, ("credentials", "client_id")),  # type: ignore [arg-type]
         )
         self._client_secret = (
             client_secret
diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py
@@ -145,7 +145,7 @@ def __get_normalizer(
         self,
         schema_key: str,
         original_validator: Callable,  # type: ignore[type-arg]
-    ) -> Generator[Any, Any, None]:
+    ) -> Callable[[Any, Any, Any, dict[str, Any]], Generator[Any, Any, None]]:
         """
         Traverse through object fields using native jsonschema validator and apply normalization function.
         :param schema_key related json schema key that currently being validated/normalized.

Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ def __init__(`
`72`	`72`	`super().__init__(`
`73`	`73`	`catalog=catalog,`
`74`	`74`	`config=config,`
`75`		`- state=state,`
	`75`	`+ state=state, # type: ignore [arg-type]`
`76`	`76`	`path_to_yaml="manifest.yaml",`
`77`	`77`	`)`
`78`	`78`
Original file line number	Diff line number	Diff line change
`@@ -300,7 +300,7 @@ def read(self) -> Iterable[Record]:`
`300`	`300`	`yield Record(`
`301`	`301`	`data=data_to_return,`
`302`	`302`	`stream_name=self.stream_name(),`
`303`		`- associated_slice=self._slice,`
	`303`	`+ associated_slice=self._slice, # type: ignore [arg-type]`
`304`	`304`	`)`
`305`	`305`	`else:`
`306`	`306`	`self._message_repository.emit_message(record_data)`
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@ def __init__(`
`164`	`164`	`self._client_id = (`
`165`	`165`	`client_id`
`166`	`166`	`if client_id is not None`
`167`		`- else dpath.get(connector_config, ("credentials", "client_id"))`
	`167`	`+ else dpath.get(connector_config, ("credentials", "client_id")), # type: ignore [arg-type]`
`168`	`168`	`)`
`169`	`169`	`self._client_secret = (`
`170`	`170`	`client_secret`