-
Notifications
You must be signed in to change notification settings - Fork 208
/
Copy pathregex_str_replace_transformer.py
52 lines (35 loc) · 1.45 KB
/
regex_str_replace_transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Copyright Contributors to the Amundsen project.
# SPDX-License-Identifier: Apache-2.0
import logging
from typing import Any
from pyhocon import ConfigTree
from databuilder.transformer.base_transformer import Transformer
LOGGER = logging.getLogger(__name__)
# Config keys
REGEX_REPLACE_TUPLE_LIST = 'regex_replace_tuple_list'
ATTRIBUTE_NAME = 'attribute_name'
class RegexStrReplaceTransformer(Transformer):
"""
Generic string replacement transformer using REGEX.
User can pass list of tuples where tuple contains regex and replacement pair.
Any non-string values will be ignored.
"""
def init(self, conf: ConfigTree) -> None:
self._regex_replace_tuples = conf.get_list(REGEX_REPLACE_TUPLE_LIST)
self._attribute_name = conf.get_string(ATTRIBUTE_NAME)
def transform(self, record: Any) -> Any:
if isinstance(record, dict):
val = record.get(self._attribute_name)
else:
val = getattr(record, self._attribute_name)
if val is None or not isinstance(val, str):
return record
for regex_replace_tuple in self._regex_replace_tuples:
val = val.replace(regex_replace_tuple[0], regex_replace_tuple[1])
if isinstance(record, dict):
record[self._attribute_name] = val
else:
setattr(record, self._attribute_name, val)
return record
def get_scope(self) -> str:
return 'transformer.regex_str_replace'