Rename "pipeline" to "cleaners"

keithito · keithito · commit c4e14ad3b9b7 · 2017-09-04T21:54:23.000-07:00
No need to introduce new terminology.
diff --git a/TRAINING_DATA.md b/TRAINING_DATA.md
@@ -50,21 +50,21 @@ following the example of the other preprocessors in that file.
 ### Non-English Data
 
 If your training data is in a language other than English, you will probably want to change the
-text cleaning pipeline by setting the `cleaners` hyperparameter.
+text cleaners by setting the `cleaners` hyperparameter.
 
   * If your text is in a Latin script or can be transliterated to ASCII using the
     [Unidecode](https://pypi.python.org/pypi/Unidecode) library, you can use the transliteration
-    pipeline by setting the hyperparameter `cleaners=transliteration_pipeline`.
+    cleaners by setting the hyperparameter `cleaners=transliteration_cleaners`.
 
   * If you don't want to transliterate, you can define a custom character set.
     This allows you to train directly on the character set used in your data.
 
     To do so, edit [symbols.py](text/symbols.py) and change the `_characters` variable to be a
-    string containing the UTF-8 characters in your data. Then set the hyperparameter `cleaners=basic_pipeline`.
+    string containing the UTF-8 characters in your data. Then set the hyperparameter `cleaners=basic_cleaners`.
 
-  * If you're not sure which option to use, you can evaluate the transliteration pipeline like so:
+  * If you're not sure which option to use, you can evaluate the transliteration cleaners like this:
 
     ```python
     from text import cleaners
-    cleaners.transliteration_pipeline('Здравствуйте')   # Replace with the text you want to try
+    cleaners.transliteration_cleaners('Здравствуйте')   # Replace with the text you want to try
     ```
diff --git a/hparams.py b/hparams.py
@@ -4,8 +4,8 @@
 # Default hyperparameters:
 hparams = tf.contrib.training.HParams(
   # Comma-separated list of cleaners to run on text prior to training and eval. For non-English
-  # text, you may want to use "basic_pipeline" or "transliteration_pipeline" See TRAINING_DATA.md.
-  cleaners='english_pipeline',
+  # text, you may want to use "basic_cleaners" or "transliteration_cleaners" See TRAINING_DATA.md.
+  cleaners='english_cleaners',
 
   # Audio:
   num_mels=80,
diff --git a/tests/text_test.py b/tests/text_test.py
@@ -14,7 +14,7 @@ def test_text_to_sequence():
   assert text_to_sequence('"A"_B', []) == [2, 3, 1]
   assert text_to_sequence('A {AW1 S} B', []) == [2, 64, 83, 132, 64, 3, 1]
   assert text_to_sequence('Hi', ['lowercase']) == [35, 36, 1]
-  assert text_to_sequence('A {AW1 S}  B', ['english_pipeline']) == [28, 64, 83, 132, 64, 29, 1]
+  assert text_to_sequence('A {AW1 S}  B', ['english_cleaners']) == [28, 64, 83, 132, 64, 29, 1]
 
 
 def test_sequence_to_text():
@@ -52,9 +52,9 @@ def test_expand_numbers():
   assert cleaners.expand_numbers('$3.50 for gas.') == 'three dollars, fifty cents for gas.'
 
 
-def test_pipelines():
+def test_cleaner_pipelines():
   text = 'Mr. Müller ate  2 Apples'
-  assert cleaners.english_pipeline(text) == 'mister muller ate two apples'
-  assert cleaners.transliteration_pipeline(text) == 'mr. muller ate 2 apples'
-  assert cleaners.basic_pipeline(text) == 'mr. müller ate 2 apples'
+  assert cleaners.english_cleaners(text) == 'mister muller ate two apples'
+  assert cleaners.transliteration_cleaners(text) == 'mr. muller ate 2 apples'
+  assert cleaners.basic_cleaners(text) == 'mr. müller ate 2 apples'
 
diff --git a/text/cleaners.py b/text/cleaners.py
@@ -3,10 +3,10 @@
 
 Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
 hyperparameter. Some cleaners are English-specific. You'll typically want to use:
-  1. "english_pipeline" for English text
-  2. "transliteration_pipeline" for non-English text that can be transliterated to ASCII using
+  1. "english_cleaners" for English text
+  2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
      the Unidecode library (https://pypi.python.org/pypi/Unidecode)
-  3. "basic_pipeline" if you do not want to transliterate (in this case, you should also update
+  3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
      the symbols in symbols.py to match your data).
 '''
 
@@ -63,22 +63,22 @@ def convert_to_ascii(text):
   return unidecode(text)
 
 
-def basic_pipeline(text):
+def basic_cleaners(text):
   '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
   text = lowercase(text)
   text = collapse_whitespace(text)
   return text
 
 
-def transliteration_pipeline(text):
+def transliteration_cleaners(text):
   '''Pipeline for non-English text that transliterates to ASCII.'''
   text = convert_to_ascii(text)
   text = lowercase(text)
   text = collapse_whitespace(text)
   return text
 
 
-def english_pipeline(text):
+def english_cleaners(text):
   '''Pipeline for English text, including number and abbreviation expansion.'''
   text = convert_to_ascii(text)
   text = lowercase(text)