Skip to content

Commit 1541e98

Browse files
authored
feat: model extract (#832)
* feat: adds model extract method and tests * fix types & tests * update docstrings * lint * updates tests * updates test * fix year
1 parent 01f9fe1 commit 1541e98

File tree

3 files changed

+635
-13
lines changed

3 files changed

+635
-13
lines changed

src/model.ts

+290-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,53 @@
1515
*/
1616

1717
import * as common from '@google-cloud/common';
18-
import {Dataset} from './dataset';
18+
import {promisifyAll} from '@google-cloud/promisify';
19+
import arrify = require('arrify');
20+
import * as extend from 'extend';
21+
import {
22+
BigQuery,
23+
Job,
24+
Dataset,
25+
ResourceCallback,
26+
RequestCallback,
27+
JobRequest,
28+
} from '.';
29+
import {JobMetadata} from './job';
30+
import bigquery from './types';
31+
32+
// This is supposed to be a @google-cloud/storage `File` type. The storage npm
33+
// module includes these types, but is currently installed as a devDependency.
34+
// Unless it's included as a production dependency, the types would not be
35+
// included. The storage module is fairly large, and only really needed for
36+
// types. We need to figure out how to include these types properly.
37+
export interface File {
38+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
39+
bucket: any;
40+
kmsKeyName?: string;
41+
userProject?: string;
42+
name: string;
43+
generation?: number;
44+
}
45+
46+
export type JobMetadataCallback = RequestCallback<JobMetadata>;
47+
export type JobMetadataResponse = [JobMetadata];
48+
49+
export type JobResponse = [Job, bigquery.IJob];
50+
export type JobCallback = ResourceCallback<Job, bigquery.IJob>;
51+
52+
export type CreateExtractJobOptions = JobRequest<
53+
bigquery.IJobConfigurationExtract
54+
> & {
55+
format?: 'ML_TF_SAVED_MODEL' | 'ML_XGBOOST_BOOSTER';
56+
};
57+
58+
/**
59+
* The model export formats accepted by BigQuery.
60+
*
61+
* @type {array}
62+
* @private
63+
*/
64+
const FORMATS = ['ML_TF_SAVED_MODEL', 'ML_XGBOOST_BOOSTER'];
1965

2066
/**
2167
* Model objects are returned by methods such as {@link Dataset#model} and
@@ -33,6 +79,9 @@ import {Dataset} from './dataset';
3379
* const model = dataset.model('my-model');
3480
*/
3581
class Model extends common.ServiceObject {
82+
dataset: Dataset;
83+
bigQuery: BigQuery;
84+
3685
constructor(dataset: Dataset, id: string) {
3786
const methods = {
3887
/**
@@ -175,9 +224,249 @@ class Model extends common.ServiceObject {
175224
id,
176225
methods,
177226
});
227+
228+
this.dataset = dataset;
229+
this.bigQuery = dataset.bigQuery;
230+
}
231+
232+
createExtractJob(
233+
destination: string | File,
234+
options?: CreateExtractJobOptions
235+
): Promise<JobResponse>;
236+
createExtractJob(
237+
destination: string | File,
238+
options: CreateExtractJobOptions,
239+
callback: JobCallback
240+
): void;
241+
createExtractJob(destination: string | File, callback: JobCallback): void;
242+
/**
243+
* Export model to Cloud Storage.
244+
*
245+
* @see [Jobs: insert API Documentation]{@link https://cloud.google.com/bigquery/docs/reference/v2/jobs/insert}
246+
*
247+
* @param {string|File} destination Where the model should be exported
248+
* to. A string or {@link
249+
* https://cloud.google.com/nodejs/docs/reference/storage/latest/File File}
250+
* object.
251+
* @param {object} [options] The configuration object.
252+
* @param {string} [options.format] The format to export the data in.
253+
* Allowed options are "ML_TF_SAVED_MODEL" or "ML_XGBOOST_BOOSTER".
254+
* Default: "ML_TF_SAVED_MODEL".
255+
* @param {string} [options.jobId] Custom job id.
256+
* @param {string} [options.jobPrefix] Prefix to apply to the job id.
257+
* @param {function} [callback] The callback function.
258+
* @param {?error} callback.err An error returned while making this request.
259+
* @param {Job} callback.job The job used to export the model.
260+
* @param {object} callback.apiResponse The full API response.
261+
*
262+
* @throws {Error} If a destination isn't a string or File object.
263+
*
264+
* @example
265+
* const {BigQuery} = require('@google-cloud/bigquery');
266+
* const bigquery = new BigQuery();
267+
* const dataset = bigquery.dataset('my-dataset');
268+
* const model = dataset.model('my-model');
269+
*
270+
* const extractedModel = 'gs://my-bucket/extracted-model';
271+
*
272+
* function callback(err, job, apiResponse) {
273+
* // `job` is a Job object that can be used to check the status of the
274+
* // request.
275+
* }
276+
*
277+
* //-
278+
* // To use the default options, just pass a string or a {@link
279+
* https://cloud.google.com/nodejs/docs/reference/storage/latest/File File}
280+
* object.
281+
* //
282+
* // Note: The default format is 'ML_TF_SAVED_MODEL'.
283+
* //-
284+
* model.createExtractJob(extractedModel, callback);
285+
*
286+
* //-
287+
* // If you need more customization, pass an `options` object.
288+
* //-
289+
* const options = {
290+
* format: 'ML_TF_SAVED_MODEL',
291+
* jobId: '123abc'
292+
* };
293+
*
294+
* model.createExtractJob(extractedModel, options, callback);
295+
*
296+
* //-
297+
* // If the callback is omitted, we'll return a Promise.
298+
* //-
299+
* model.createExtractJob(extractedModel, options).then((data) => {
300+
* const job = data[0];
301+
* const apiResponse = data[1];
302+
* });
303+
*/
304+
createExtractJob(
305+
destination: string | File,
306+
optionsOrCallback?: CreateExtractJobOptions | JobCallback,
307+
cb?: JobCallback
308+
): void | Promise<JobResponse> {
309+
let options =
310+
typeof optionsOrCallback === 'object' ? optionsOrCallback : {};
311+
const callback =
312+
typeof optionsOrCallback === 'function' ? optionsOrCallback : cb;
313+
314+
options = extend(true, options, {
315+
destinationUris: (arrify(destination) as Array<File | string>).map(
316+
dest => {
317+
if (common.util.isCustomType(dest, 'storage/file')) {
318+
return (
319+
'gs://' + (dest as File).bucket.name + '/' + (dest as File).name
320+
);
321+
}
322+
323+
if (typeof dest === 'string') {
324+
return dest;
325+
}
326+
throw new Error('Destination must be a string or a File object.');
327+
}
328+
),
329+
});
330+
331+
if (options.format) {
332+
options.format = options.format.toUpperCase() as typeof options.format;
333+
334+
if (FORMATS.includes(options.format as string)) {
335+
options.destinationFormat = options.format!;
336+
delete options.format;
337+
} else {
338+
throw new Error('Destination format not recognized: ' + options.format);
339+
}
340+
}
341+
342+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
343+
const body: any = {
344+
configuration: {
345+
extract: extend(true, options, {
346+
sourceModel: {
347+
datasetId: this.dataset.id,
348+
projectId: this.bigQuery.projectId,
349+
modelId: this.id,
350+
},
351+
}),
352+
},
353+
};
354+
355+
if (options.jobPrefix) {
356+
body.jobPrefix = options.jobPrefix;
357+
delete options.jobPrefix;
358+
}
359+
360+
if (options.jobId) {
361+
body.jobId = options.jobId;
362+
delete options.jobId;
363+
}
364+
365+
this.bigQuery.createJob(body, callback!);
366+
}
367+
368+
extract(
369+
destination: string | File,
370+
options?: CreateExtractJobOptions
371+
): Promise<JobMetadataResponse>;
372+
extract(
373+
destination: string | File,
374+
options: CreateExtractJobOptions,
375+
callback?: JobMetadataCallback
376+
): void;
377+
extract(destination: string | File, callback?: JobMetadataCallback): void;
378+
/**
379+
* Export model to Cloud Storage.
380+
*
381+
* @param {string|File} destination Where the model should be exported
382+
* to. A string or {@link
383+
* https://cloud.google.com/nodejs/docs/reference/storage/latest/File File}
384+
* object.
385+
* @param {object} [options] The configuration object.
386+
* @param {string} [options.format] The format to export
387+
* the data in. Allowed options are "ML_TF_SAVED_MODEL" or
388+
* "ML_XGBOOST_BOOSTER". Default: "ML_TF_SAVED_MODEL".
389+
* @param {string} [options.jobId] Custom id for the underlying job.
390+
* @param {string} [options.jobPrefix] Prefix to apply to the underlying job id.
391+
* @param {function} [callback] The callback function.
392+
* @param {?error} callback.err An error returned while making this request
393+
* @param {object} callback.apiResponse The full API response.
394+
* @returns {Promise}
395+
*
396+
* @throws {Error} If destination isn't a string or File object.
397+
*
398+
* @example
399+
* const {BigQuery} = require('@google-cloud/bigquery');
400+
* const bigquery = new BigQuery();
401+
* const dataset = bigquery.dataset('my-dataset');
402+
* const model = dataset.model('my-model');
403+
*
404+
* const extractedModel = 'gs://my-bucket/extracted-model';
405+
*
406+
*
407+
* //-
408+
* function callback(err, job, apiResponse) {
409+
* // `job` is a Job object that can be used to check the status of the
410+
* // request.
411+
* }
412+
*
413+
* //-
414+
* // To use the default options, just pass a string or a {@link
415+
* https://cloud.google.com/nodejs/docs/reference/storage/latest/File File}
416+
* object.
417+
* //
418+
* // Note: The default format is 'ML_TF_SAVED_MODEL'.
419+
* //-
420+
* model.createExtractJob(extractedModel, callback);
421+
*
422+
* //-
423+
* // If you need more customization, pass an `options` object.
424+
* //-
425+
* const options = {
426+
* format: 'ML_TF_SAVED_MODEL',
427+
* jobId: '123abc'
428+
* };
429+
*
430+
* model.createExtractJob(extractedModel, options, callback);
431+
*
432+
* //-
433+
* // If the callback is omitted, we'll return a Promise.
434+
* //-
435+
* model.createExtractJob(extractedModel, options).then((data) => {
436+
* const job = data[0];
437+
* const apiResponse = data[1];
438+
* });
439+
*/
440+
extract(
441+
destination: string | File,
442+
optionsOrCallback?: CreateExtractJobOptions | JobMetadataCallback,
443+
cb?: JobMetadataCallback
444+
): void | Promise<JobMetadataResponse> {
445+
const options =
446+
typeof optionsOrCallback === 'object' ? optionsOrCallback : {};
447+
const callback =
448+
typeof optionsOrCallback === 'function' ? optionsOrCallback : cb;
449+
450+
this.createExtractJob(destination, options, (err, job, resp) => {
451+
if (err) {
452+
callback!(err, resp);
453+
return;
454+
}
455+
456+
job!.on('error', callback!).on('complete', metadata => {
457+
callback!(null, metadata);
458+
});
459+
});
178460
}
179461
}
180462

463+
/*! Developer Documentation
464+
*
465+
* All async methods (except for streams) will return a Promise in the event
466+
* that a callback is omitted.
467+
*/
468+
promisifyAll(Model);
469+
181470
/**
182471
* Reference to the {@link Model} class.
183472
* @name module:@google-cloud/bigquery.Model

system-test/bigquery.ts

+14-2
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ describe('BigQuery', () => {
612612
});
613613
});
614614

615-
it('should extract the table', () => {
615+
it('should extract a table', () => {
616616
return table.createExtractJob(extractFile).then(data => {
617617
const job = data[0];
618618

@@ -627,8 +627,13 @@ describe('BigQuery', () => {
627627

628628
describe('BigQuery/Model', () => {
629629
let model: Model;
630+
const bucket = storage.bucket(generateName('bucket'));
631+
const extractDest =
632+
'gs://' + bucket.name + '/' + generateName('model-export');
633+
634+
before(async () => {
635+
await bucket.create();
630636

631-
before(() => {
632637
model = dataset.model('testmodel');
633638
return bigquery.query(`
634639
CREATE MODEL \`${dataset.id}.${model.id}\`
@@ -674,6 +679,13 @@ describe('BigQuery', () => {
674679
const [metadata] = await model.getMetadata();
675680
assert.strictEqual(metadata.friendlyName, friendlyName);
676681
});
682+
683+
it('should extract a model', async () => {
684+
const jobId = generateName('model-export-job');
685+
686+
const [job] = await model.createExtractJob(extractDest, {jobId});
687+
assert.strictEqual(job.id, jobId);
688+
});
677689
});
678690

679691
describe('BigQuery/Routine', () => {

0 commit comments

Comments
 (0)