@@ -67,7 +67,10 @@ class DataprocClusterCreateOperator(BaseOperator):
67
67
to add to all instances
68
68
:type metadata: dict
69
69
:param image_version: the version of software inside the Dataproc cluster
70
- :type image_version: string
70
+ :type image_version: str
71
+ :param custom_image: custom Dataproc image for more info see
72
+ https://cloud.google.com/dataproc/docs/guides/dataproc-images
73
+ :type: custom_image: str
71
74
:param properties: dict of properties to set on
72
75
config files (e.g. spark-defaults.conf), see
73
76
https://cloud.google.com/dataproc/docs/reference/rest/v1/ \
@@ -138,6 +141,7 @@ def __init__(self,
138
141
init_actions_uris = None ,
139
142
init_action_timeout = "10m" ,
140
143
metadata = None ,
144
+ custom_image = None ,
141
145
image_version = None ,
142
146
properties = None ,
143
147
master_machine_type = 'n1-standard-4' ,
@@ -168,6 +172,7 @@ def __init__(self,
168
172
self .init_actions_uris = init_actions_uris
169
173
self .init_action_timeout = init_action_timeout
170
174
self .metadata = metadata
175
+ self .custom_image = custom_image
171
176
self .image_version = image_version
172
177
self .properties = properties
173
178
self .master_machine_type = master_machine_type
@@ -187,6 +192,9 @@ def __init__(self,
187
192
self .auto_delete_time = auto_delete_time
188
193
self .auto_delete_ttl = auto_delete_ttl
189
194
195
+ assert not (self .custom_image and self .image_version ), \
196
+ "custom_image and image_version can't be both set"
197
+
190
198
def _get_cluster_list_for_project (self , service ):
191
199
result = service .projects ().regions ().clusters ().list (
192
200
projectId = self .project_id ,
@@ -321,6 +329,12 @@ def _build_cluster_data(self):
321
329
cluster_data ['config' ]['gceClusterConfig' ]['tags' ] = self .tags
322
330
if self .image_version :
323
331
cluster_data ['config' ]['softwareConfig' ]['imageVersion' ] = self .image_version
332
+ elif self .custom_image :
333
+ custom_image_url = 'https://www.googleapis.com/compute/beta/projects/' \
334
+ '{}/global/images/{}' .format (self .project_id ,
335
+ self .custom_image )
336
+ cluster_data ['config' ]['masterConfig' ]['imageUri' ] = custom_image_url
337
+ cluster_data ['config' ]['workerConfig' ]['imageUri' ] = custom_image_url
324
338
if self .properties :
325
339
cluster_data ['config' ]['softwareConfig' ]['properties' ] = self .properties
326
340
if self .idle_delete_ttl :
0 commit comments