Skip to content

Commit 2f261c2

Browse files
committed
basic mining
1 parent a5ab58e commit 2f261c2

File tree

5 files changed

+107
-5
lines changed

5 files changed

+107
-5
lines changed

loda/mine/miner.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
"""Miner class for finding integer sequence programs."""
22

3+
import os
4+
import time
5+
6+
from loda.lang import Program
37
from loda.oeis import PrefixIndex, Sequence
48
from loda.runtime import Evaluator, Interpreter
59

@@ -10,22 +14,52 @@ def __init__(self, sequences: list, interpreter: Interpreter, generator):
1014
self.__index = PrefixIndex(sequences)
1115
self.__interpreter = interpreter
1216
self.__generator = generator
17+
self.__basic_timeout = 60
18+
self.__extended_timeout = 600
1319

1420
def __call__(self):
1521
program = self.__generator()
1622
evaluator = Evaluator(program, self.__interpreter)
1723
match = self.__index.global_match()
1824
refine = True
25+
start_time = time.time()
1926
try:
2027
while refine:
2128
term = evaluator()
22-
# print(term)
2329
refine = self.__index.refine_match(match, term)
30+
if (time.time() - start_time) > self.__basic_timeout:
31+
print("timeout")
32+
return
2433
except Exception as e:
2534
print("evaluation error: {}".format(e))
2635
return
27-
2836
ids = self.__index.get_match_ids(match)
2937
for id in ids:
30-
print("Found match for {}".format(Sequence(id)))
31-
print(program)
38+
self.__check_match(program, id)
39+
40+
def __check_match(self, program: Program, id: int):
41+
seq = self.__index.get(id)
42+
terms = seq.load_b_file(os.path.expanduser("~/loda/oeis"))
43+
if terms is None or len(terms) == len(seq.terms):
44+
print("Skipping check for {}".format(seq))
45+
return
46+
print("Checking match for {}".format(seq))
47+
start_time = time.time()
48+
if len(terms) > 1000:
49+
terms = terms[0:1000]
50+
evaluator = Evaluator(program, self.__interpreter)
51+
correct = True
52+
for t in terms:
53+
n = evaluator()
54+
if n != t:
55+
correct = False
56+
break
57+
if (time.time() - start_time) > self.__extended_timeout:
58+
print("timeout")
59+
return
60+
if correct:
61+
print("Found match for {}".format(seq))
62+
p = "~/loda/programs/local/{}.asm".format(seq.id_str())
63+
with open(os.path.expanduser(p), "w") as asm_file:
64+
asm_file.write("; {}\n".format(seq))
65+
asm_file.write(str(program))

loda/oeis/sequence.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import functools
44
import os.path
55
import re
6+
import requests
7+
import subprocess
68

79

810
@functools.total_ordering
@@ -86,10 +88,18 @@ def load_b_file(self, path: str) -> list:
8688
folder that contains the b-files in sub-directories, e.g. `b/123/b123456.txt`.
8789
"""
8890
terms = []
91+
txt = "b{:06}.txt".format(self.id)
8992
if len(path) == 0 or os.path.isdir(path):
9093
dir = "{:03}".format(self.id//1000)
91-
txt = "b{:06}.txt".format(self.id)
9294
path = os.path.join(path, "b", dir, txt)
95+
if not os.path.isfile(path):
96+
b_url = "http://api.loda-lang.org/miner/v1/oeis/{}.gz".format(txt)
97+
print("Fetching {}".format(b_url))
98+
req = requests.get(b_url)
99+
gz_path = path + ".gz"
100+
with open(gz_path, 'wb') as gz:
101+
gz.write(req.content)
102+
subprocess.run(["gunzip", gz_path])
93103
with open(path) as b_file:
94104
expected_index = -1
95105
for line in b_file:

mine.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import datetime
2+
import os.path
3+
4+
from loda.oeis import ProgramCache, Sequence
5+
from loda.runtime import Interpreter
6+
from loda.mine import Miner
7+
from loda.ml.keras.program_generation_rnn import load_model, Generator
8+
9+
10+
def mine(model_path: str):
11+
12+
model = load_model(model_path)
13+
model.summary()
14+
generator = Generator(model, num_lanes=10)
15+
programs_dir = os.path.expanduser("~/loda/programs/oeis")
16+
program_cache = ProgramCache(programs_dir)
17+
existing_ids = set(program_cache.all_ids())
18+
seqs = Sequence.load_oeis(os.path.expanduser("~/loda/oeis"))
19+
seqs = list(filter(lambda s: len(s.terms) >=
20+
8 and s.id not in existing_ids, seqs))
21+
print("Loaded {} sequences".format(len(seqs)))
22+
interpreter = Interpreter(program_cache)
23+
miner = Miner(seqs, interpreter, generator)
24+
i = 0
25+
while True:
26+
miner()
27+
i += 1
28+
if i % 100 == 0:
29+
ct = datetime.datetime.now()
30+
print(ct, generator.get_stats_info_str())
31+
32+
33+
if __name__ == "__main__":
34+
mine(os.path.expanduser("~/scripts/model-001"))

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
nose2
22
parameterized
3+
requests
34
tensorflow; sys_platform != 'darwin'
45
tensorflow-macos; sys_platform == 'darwin'

train.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import os.path
2+
3+
from loda.oeis import ProgramCache
4+
from loda.ml.keras.program_generation_rnn import train_model
5+
6+
7+
def train(programs_percentage: int):
8+
programs_dir = os.path.expanduser("~/loda/programs/oeis")
9+
program_cache = ProgramCache(programs_dir)
10+
num_train_programs = -1
11+
if programs_percentage < 100:
12+
num_total_programs = len(program_cache.all_ids())
13+
num_train_programs = (programs_percentage * num_total_programs) // 100
14+
print("Training using {} programs".format(num_train_programs))
15+
model = train_model(program_cache, num_programs=num_train_programs)
16+
model.save("model-{:03}".format(programs_percentage))
17+
18+
19+
if __name__ == "__main__":
20+
train(1)
21+
train(25)
22+
train(50)
23+
train(100)

0 commit comments

Comments
 (0)