Skip to content

Commit 90a691d

Browse files
committed
model params
1 parent fae0bea commit 90a691d

File tree

4 files changed

+57
-35
lines changed

4 files changed

+57
-35
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -131,5 +131,6 @@ dmypy.json
131131
# Pyre type checker
132132
.pyre/
133133

134-
# Trained sample model
134+
# Test models
135135
/sample_model/
136+
/test_model/

loda/ml/keras/program_generation_rnn.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,21 @@
3131
class Model(tf.keras.Model):
3232
"""Keras model for program generation using RNN."""
3333

34-
def __init__(self, vocabulary: list, num_ops_per_sample: int, num_nops_separator: int,
35-
embedding_dim: int, num_rnn_units: int):
34+
def __init__(self, vocabulary: list,
35+
embedding_dim: int, num_rnn_units: int,
36+
num_samples: int, sample_size: int,
37+
num_ops_per_sample: int, num_nops_separator: int,
38+
program_ids: list):
3639

3740
super().__init__(self)
3841
self.vocabulary = vocabulary
39-
self.num_ops_per_sample = num_ops_per_sample
40-
self.num_nops_separator = num_nops_separator
4142
self.embedding_dim = embedding_dim
4243
self.num_rnn_units = num_rnn_units
44+
self.num_samples = num_samples
45+
self.sample_size = sample_size
46+
self.num_ops_per_sample = num_ops_per_sample
47+
self.num_nops_separator = num_nops_separator
48+
self.program_ids = program_ids
4349

4450
# Initialize token <-> ID lookup layers.
4551
self.tokens_to_ids = tf.keras.layers.StringLookup(
@@ -73,10 +79,22 @@ def call(self, inputs, states=None, return_state=False, training=False):
7379

7480
def get_config(self):
7581
return {"vocabulary": self.vocabulary,
82+
"embedding_dim": self.embedding_dim,
83+
"num_rnn_units": self.num_rnn_units,
84+
"num_samples": self.num_samples,
85+
"sample_size": self.sample_size,
7686
"num_ops_per_sample": self.num_ops_per_sample,
7787
"num_nops_separator": self.num_nops_separator,
78-
"embedding_dim": self.embedding_dim,
79-
"num_rnn_units": self.num_rnn_units}
88+
"program_ids": self.program_ids}
89+
90+
def summary(self, line_length=None, positions=None, print_fn=None,
91+
expand_nested=False, show_trainable=False, layer_range=None):
92+
super().summary(line_length, positions, print_fn,
93+
expand_nested, show_trainable, layer_range)
94+
print("Vocabulary size:", self.get_vocab_size())
95+
print("Sample size:", self.sample_size)
96+
print("Trained samples:", self.num_samples)
97+
print("Trained programs:", len(self.program_ids))
8098

8199
@classmethod
82100
def from_config(cls, config):
@@ -287,20 +305,26 @@ def train_model(program_cache: ProgramCache, num_programs: int = -1,
287305
Return:
288306
This function returns the trained Keras model.
289307
"""
308+
# Get random program IDs.
309+
program_ids = util.get_random_program_ids(program_cache, num_programs)
310+
290311
# Load programs and convert to tokens and vocabulary.
291-
merged_programs, _, sample_size = util.merge_programs(
292-
program_cache,
293-
num_programs=num_programs,
312+
merged_programs, num_samples, sample_size = util.merge_programs(
313+
program_cache, program_ids,
294314
num_ops_per_sample=num_ops_per_sample,
295315
num_nops_separator=num_nops_separator)
296316
tokens, vocabulary = util.program_to_tokens(merged_programs)
297317

298318
# Create Keras model and dataset, run the training, and save the model.
319+
program_ids = sorted(program_ids)
299320
model = Model(vocabulary,
321+
embedding_dim=embedding_dim,
322+
num_rnn_units=num_rnn_units,
323+
num_samples=num_samples,
324+
sample_size=sample_size,
300325
num_ops_per_sample=num_ops_per_sample,
301326
num_nops_separator=num_nops_separator,
302-
embedding_dim=embedding_dim,
303-
num_rnn_units=num_rnn_units)
327+
program_ids=program_ids)
304328
ids = model.tokens_to_ids(tokens)
305329
dataset = __create_dataset(ids, sample_size=sample_size)
306330
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

loda/ml/util.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -65,22 +65,25 @@ def append_nops(program: Program, num_nops: int):
6565
program.operations.append(Operation()) # nop
6666

6767

68-
def merge_programs(program_cache: ProgramCache, num_programs: int,
69-
num_ops_per_sample: int, num_nops_separator: int) -> Program:
70-
68+
def get_random_program_ids(program_cache: ProgramCache, num_programs: int = -1):
7169
# Get IDs of all existing programs. Shuffle them and reduce
7270
# the number of program IDs if requested.
7371
ids = program_cache.all_ids()
7472
random.shuffle(ids)
7573
if num_programs >= 0 and len(ids) > num_programs:
7674
ids = ids[0:num_programs]
75+
return ids
76+
77+
78+
def merge_programs(program_cache: ProgramCache, program_ids: list,
79+
num_ops_per_sample: int, num_nops_separator: int):
7780

7881
# Merge all programs into one program. Invidual programs are
7982
# separated by (multiple) nops. The number nops equals the
8083
# number of operations per sample.
8184
merged = Program()
8285
num_loaded = 0
83-
for id in ids:
86+
for id in program_ids:
8487
program = program_cache.get(id)
8588
append_nops(merged, num_nops_separator)
8689
for op in program.operations:

tests/test_ml.py

+13-19
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,26 @@
11
# -*- coding: utf-8 -*-
22

33
from unittest import TestCase
4-
from loda.ml.keras.program_generation_rnn import Model
4+
from loda.ml.keras.program_generation_rnn import *
55

66
from loda.oeis import ProgramCache
77
from loda.ml import util
88
from tests.helpers import PROGRAMS_TEST_DIR
99

1010

11-
class KerasTests(TestCase):
11+
class ProgramGenerationRNNTests(TestCase):
1212

1313
def setUp(self):
1414
self.program_cache = ProgramCache(PROGRAMS_TEST_DIR)
15-
self.num_ops_per_sample = 3
16-
self.num_nops_separator = 2
17-
merged_programs, self.num_samples, _ = util.merge_programs(
18-
self.program_cache,
19-
num_programs=-1,
20-
num_ops_per_sample=self.num_ops_per_sample,
21-
num_nops_separator=self.num_nops_separator)
22-
self.tokens, self.vocabulary = util.program_to_tokens(merged_programs)
23-
24-
def test_model_tokens_to_ids(self):
25-
model = Model(
26-
self.vocabulary, self.num_ops_per_sample, self.num_nops_separator)
27-
ids = model.tokens_to_ids(self.tokens)
28-
self.assertGreater(len(ids), 0)
29-
self.assertEqual(len(self.tokens), len(ids))
30-
self.assertGreater(self.num_samples, 0)
15+
16+
def test_model(self):
17+
model = train_model(self.program_cache)
18+
model.save("test_model")
19+
loaded = load_model("test_model")
20+
loaded.summary()
21+
generator = Generator(loaded, num_lanes=10)
22+
for _ in range(10):
23+
generator()
3124

3225

3326
class UtilTests(TestCase):
@@ -45,8 +38,9 @@ def test_program_to_tokens_A000005(self):
4538
self.assertEqual(program, program2)
4639

4740
def __merge_progs(self):
41+
program_ids = util.get_random_program_ids(self.program_cache)
4842
merged, _, _ = util.merge_programs(
49-
self.program_cache, num_programs=-1, num_ops_per_sample=3, num_nops_separator=3)
43+
self.program_cache, program_ids=program_ids, num_ops_per_sample=3, num_nops_separator=3)
5044
return merged
5145

5246
def test_program_to_tokens(self):

0 commit comments

Comments
 (0)