bomboradata
diff --git a/‎airflow/contrib/hooks/gcp_dataflow_hook.py
+18-24 b/‎airflow/contrib/hooks/gcp_dataflow_hook.py
+18-24
diff --git a/‎airflow/contrib/operators/dataflow_operator.py
+86-9 b/‎airflow/contrib/operators/dataflow_operator.py
+86-9
@@ -190,11 +190,9 @@ def get_conn(self):
         return build(
             'dataflow', 'v1b3', http=http_authorized, cache_discovery=False)
 
-    def _start_dataflow(self, task_id, variables, name,
-                        command_prefix, label_formatter):
+    def _start_dataflow(self, variables, name, command_prefix, label_formatter):
         variables = self._set_variables(variables)
-        cmd = command_prefix + self._build_cmd(task_id, variables,
-                                               label_formatter)
+        cmd = command_prefix + self._build_cmd(variables, label_formatter)
         job_id = _Dataflow(cmd).wait_for_done()
         _DataflowJob(self.get_conn(), variables['project'], name,
                      variables['region'],
@@ -208,58 +206,54 @@ def _set_variables(variables):
             variables['region'] = DEFAULT_DATAFLOW_LOCATION
         return variables
 
-    def start_java_dataflow(self, task_id, variables, dataflow, job_class=None,
+    def start_java_dataflow(self, job_name, variables, dataflow, job_class=None,
                             append_job_name=True):
-        name = self._build_dataflow_job_name(task_id, append_job_name)
+        name = self._build_dataflow_job_name(job_name, append_job_name)
         variables['jobName'] = name
 
         def label_formatter(labels_dict):
             return ['--labels={}'.format(
                 json.dumps(labels_dict).replace(' ', ''))]
         command_prefix = (["java", "-cp", dataflow, job_class] if job_class
                           else ["java", "-jar", dataflow])
-        self._start_dataflow(task_id, variables, name,
-                             command_prefix, label_formatter)
+        self._start_dataflow(variables, name, command_prefix, label_formatter)
 
-    def start_template_dataflow(self, task_id, variables, parameters, dataflow_template,
+    def start_template_dataflow(self, job_name, variables, parameters, dataflow_template,
                                 append_job_name=True):
-        name = self._build_dataflow_job_name(task_id, append_job_name)
+        name = self._build_dataflow_job_name(job_name, append_job_name)
         self._start_template_dataflow(
             name, variables, parameters, dataflow_template)
 
-    def start_python_dataflow(self, task_id, variables, dataflow, py_options,
+    def start_python_dataflow(self, job_name, variables, dataflow, py_options,
                               append_job_name=True):
-        name = self._build_dataflow_job_name(task_id, append_job_name)
+        name = self._build_dataflow_job_name(job_name, append_job_name)
         variables['job_name'] = name
 
         def label_formatter(labels_dict):
             return ['--labels={}={}'.format(key, value)
                     for key, value in labels_dict.items()]
-        # TODO: Change python2 to python when Beam supports both python 2 and 3
-        # Remember to change the test case too
-        self._start_dataflow(task_id, variables, name,
-                             ["python2"] + py_options + [dataflow],
+        self._start_dataflow(variables, name, ["python2"] + py_options + [dataflow],
                              label_formatter)
 
     @staticmethod
-    def _build_dataflow_job_name(task_id, append_job_name=True):
-        task_id = str(task_id).replace('_', '-')
+    def _build_dataflow_job_name(job_name, append_job_name=True):
+        base_job_name = str(job_name).replace('_', '-')
 
-        if not re.match(r"^[a-z]([-a-z0-9]*[a-z0-9])?$", task_id):
+        if not re.match(r"^[a-z]([-a-z0-9]*[a-z0-9])?$", base_job_name):
             raise ValueError(
                 'Invalid job_name ({}); the name must consist of'
                 'only the characters [-a-z0-9], starting with a '
-                'letter and ending with a letter or number '.format(task_id))
+                'letter and ending with a letter or number '.format(base_job_name))
 
         if append_job_name:
-            job_name = task_id + "-" + str(uuid.uuid1())[:8]
+            safe_job_name = base_job_name + "-" + str(uuid.uuid4())[:8]
         else:
-            job_name = task_id
+            safe_job_name = base_job_name
 
-        return job_name
+        return safe_job_name
 
     @staticmethod
-    def _build_cmd(task_id, variables, label_formatter):
+    def _build_cmd(variables, label_formatter):
         command = ["--runner=DataflowRunner"]
         if variables is not None:
             for attr, value in variables.items():
 
@@ -33,6 +33,43 @@ class DataFlowJavaOperator(BaseOperator):
     Start a Java Cloud DataFlow batch job. The parameters of the operation
     will be passed to the job.
 
+    .. seealso::
+        For more detail on job submission have a look at the reference:
+        https://cloud.google.com/dataflow/pipelines/specifying-exec-params
+
+    :param jar: The reference to a self executing DataFlow jar (templated).
+    :type jar: str
+    :param job_name: The 'jobName' to use when executing the DataFlow job
+        (templated). This ends up being set in the pipeline options, so any entry
+        with key ``'jobName'`` in ``options`` will be overwritten.
+    :type job_name: str
+    :param dataflow_default_options: Map of default job options.
+    :type dataflow_default_options: dict
+    :param options: Map of job specific options.
+    :type options: dict
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud
+        Platform.
+    :type gcp_conn_id: str
+    :param delegate_to: The account to impersonate, if any.
+        For this to work, the service account making the request must have
+        domain-wide delegation enabled.
+    :type delegate_to: str
+    :param poll_sleep: The time in seconds to sleep between polling Google
+        Cloud Platform for the dataflow job status while the job is in the
+        JOB_STATE_RUNNING state.
+    :type poll_sleep: int
+    :param job_class: The name of the dataflow job class to be executued, it
+        is often not the main class configured in the dataflow jar file.
+    :type job_class: str
+
+    ``jar``, ``options``, and ``job_name`` are templated so you can use variables in them.
+
+    Note that both
+    ``dataflow_default_options`` and ``options`` will be merged to specify pipeline
+    execution parameter, and ``dataflow_default_options`` is expected to save
+    high-level options, for instances, project and zone information, which
+    apply to all dataflow operators in the DAG.
+
     It's a good practice to define dataflow_* parameters in the default_args of the dag
     like the project, zone and staging location.
 
@@ -68,13 +105,14 @@ class DataFlowJavaOperator(BaseOperator):
 
     Both ``jar`` and ``options`` are templated so you can use variables in them.
     """
-    template_fields = ['options', 'jar']
+    template_fields = ['options', 'jar', 'job_name']
     ui_color = '#0273d4'
 
     @apply_defaults
     def __init__(
             self,
             jar,
+            job_name='{{task.task_id}}',
             dataflow_default_options=None,
             options=None,
             gcp_conn_id='google_cloud_default',
@@ -125,6 +163,7 @@ def __init__(
         self.gcp_conn_id = gcp_conn_id
         self.delegate_to = delegate_to
         self.jar = jar
+        self.job_name = job_name
         self.dataflow_default_options = dataflow_default_options
         self.options = options
         self.poll_sleep = poll_sleep
@@ -141,14 +180,35 @@ def execute(self, context):
         dataflow_options = copy.copy(self.dataflow_default_options)
         dataflow_options.update(self.options)
 
-        hook.start_java_dataflow(self.task_id, dataflow_options,
+        hook.start_java_dataflow(self.job_name, dataflow_options,
                                  self.jar, self.job_class)
 
 
 class DataflowTemplateOperator(BaseOperator):
     """
     Start a Templated Cloud DataFlow batch job. The parameters of the operation
     will be passed to the job.
+
+    :param template: The reference to the DataFlow template.
+    :type template: str
+    :param job_name: The 'jobName' to use when executing the DataFlow template
+        (templated).
+    :param dataflow_default_options: Map of default job environment options.
+    :type dataflow_default_options: dict
+    :param parameters: Map of job specific parameters for the template.
+    :type parameters: dict
+    :param gcp_conn_id: The connection ID to use connecting to Google Cloud
+        Platform.
+    :type gcp_conn_id: str
+    :param delegate_to: The account to impersonate, if any.
+        For this to work, the service account making the request must have
+        domain-wide delegation enabled.
+    :type delegate_to: str
+    :param poll_sleep: The time in seconds to sleep between polling Google
+        Cloud Platform for the dataflow job status while the job is in the
+        JOB_STATE_RUNNING state.
+    :type poll_sleep: int
+
     It's a good practice to define dataflow_* parameters in the default_args of the dag
     like the project, zone and staging location.
 
@@ -183,16 +243,27 @@ class DataflowTemplateOperator(BaseOperator):
            gcp_conn_id='gcp-airflow-service-account',
            dag=my-dag)
 
-    ``template``, ``dataflow_default_options`` and ``parameters`` are templated so you can
-    use variables in them.
+    ``template``, ``dataflow_default_options``, ``parameters``, and ``job_name`` are
+    templated so you can use variables in them.
+
+    Note that ``dataflow_default_options`` is expected to save high-level options
+    for project information, which apply to all dataflow operators in the DAG.
+
+        .. seealso::
+            https://cloud.google.com/dataflow/docs/reference/rest/v1b3
+            /LaunchTemplateParameters
+            https://cloud.google.com/dataflow/docs/reference/rest/v1b3/RuntimeEnvironment
+            For more detail on job template execution have a look at the reference:
+            https://cloud.google.com/dataflow/docs/templates/executing-templates
     """
-    template_fields = ['parameters', 'dataflow_default_options', 'template']
+    template_fields = ['parameters', 'dataflow_default_options', 'template', 'job_name']
     ui_color = '#0273d4'
 
     @apply_defaults
     def __init__(
             self,
             template,
+            job_name='{{task.task_id}}',
             dataflow_default_options=None,
             parameters=None,
             gcp_conn_id='google_cloud_default',
@@ -240,14 +311,15 @@ def __init__(
         self.dataflow_default_options = dataflow_default_options
         self.poll_sleep = poll_sleep
         self.template = template
+        self.job_name = job_name
         self.parameters = parameters
 
     def execute(self, context):
         hook = DataFlowHook(gcp_conn_id=self.gcp_conn_id,
                             delegate_to=self.delegate_to,
                             poll_sleep=self.poll_sleep)
 
-        hook.start_template_dataflow(self.task_id, self.dataflow_default_options,
+        hook.start_template_dataflow(self.job_name, self.dataflow_default_options,
                                      self.parameters, self.template)
 
 
@@ -266,6 +338,10 @@ class DataFlowPythonOperator(BaseOperator):
     :param py_file: Reference to the python dataflow pipleline file.py, e.g.,
         /some/local/file/path/to/your/python/pipeline/file.
     :type py_file: string
+    :param job_name: The 'job_name' to use when executing the DataFlow job
+        (templated). This ends up being set in the pipeline options, so any entry
+        with key ``'jobName'`` or ``'job_name'`` in ``options`` will be overwritten.
+    :type job_name: str
     :param py_options: Additional python options.
     :type pyt_options: list of strings, e.g., ["-m", "-v"].
     :param dataflow_default_options: Map of default job options.
@@ -284,13 +360,13 @@ class DataFlowPythonOperator(BaseOperator):
         JOB_STATE_RUNNING state.
     :type poll_sleep: int
     """
-
-    template_fields = ['options', 'dataflow_default_options']
+    template_fields = ['options', 'dataflow_default_options', 'job_name']
 
     @apply_defaults
     def __init__(
             self,
             py_file,
+            job_name='{{task.task_id}}',
             py_options=None,
             dataflow_default_options=None,
             options=None,
@@ -303,6 +379,7 @@ def __init__(
         super(DataFlowPythonOperator, self).__init__(*args, **kwargs)
 
         self.py_file = py_file
+        self.job_name = job_name
         self.py_options = py_options or []
         self.dataflow_default_options = dataflow_default_options or {}
         self.options = options or {}
@@ -328,7 +405,7 @@ def execute(self, context):
         formatted_options = {camel_to_snake(key): dataflow_options[key]
                              for key in dataflow_options}
         hook.start_python_dataflow(
-            self.task_id, formatted_options,
+            self.job_name, formatted_options,
             self.py_file, self.py_options)