Skip to content

Commit b8b7580

Browse files
committed
sequence index
1 parent d5cbaaf commit b8b7580

File tree

7 files changed

+217
-18
lines changed

7 files changed

+217
-18
lines changed

.vscode/settings.json

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"editor.formatOnSave": true
3+
}

loda/oeis.py

+118-16
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,141 @@
11
# -*- coding: utf-8 -*-
22

3+
import copy
4+
import functools
35
import os.path
6+
import re
7+
48
from loda.lang import Program
59

610

11+
@functools.total_ordering
712
class Sequence:
813
def __init__(self, id: int, name="", terms=[]):
914
self.id = id
1015
self.name = name
1116
self.terms = terms
1217

18+
def __str__(self) -> str:
19+
return "{}: {}".format(self.id_str(), self.name)
20+
21+
def __eq__(self, other) -> bool:
22+
return self.id == other.id and self.terms == other.terms
23+
24+
def __lt__(self, other) -> bool:
25+
if self.terms < other.terms:
26+
return True
27+
if self.terms == other.terms:
28+
return self.id < other.id
29+
return False
30+
1331
def id_str(self) -> str:
1432
return "A{:06}".format(self.id)
1533

1634

17-
class SequenceCache:
18-
def __init__(self, path: str, auto_fetch = False):
35+
class SequenceMatch:
36+
def __init__(self, size: int):
37+
self.prefix_length = 0
38+
self.start_index = 0
39+
self.end_index = size # exclusive
40+
self.finished_ids = []
41+
42+
43+
class SequenceIndex:
44+
45+
def __init__(self, path: str):
1946
self.__path = path
20-
self.__auto_fetch = auto_fetch
21-
self.__cache = None
47+
self.__index = None
48+
self.__lookup = None
2249

23-
def __fetch():
24-
# TODO
25-
pass
50+
def size(self) -> int:
51+
if self.__index is None:
52+
self.__load()
53+
return len(self.__index)
54+
55+
def get(self, id: int):
56+
if self.__index is None:
57+
self.__load()
58+
return copy.copy(self.__get(id))
59+
60+
def __get(self, id: int):
61+
return self.__index[self.__lookup[id]]
2662

27-
def __load():
28-
# TODO
29-
pass
63+
def __parse_line(self, line: str, pattern):
64+
line = line.strip()
65+
if len(line) == 0 or line.startswith("#"):
66+
return None
67+
match = pattern.match(line)
68+
if not match:
69+
raise ValueError("parse error: {}".format(line))
70+
return match
3071

31-
def get(self, id: int, use_b_file=False) -> Sequence:
32-
if self.__cache is None:
72+
def __load(self):
73+
seqs = []
74+
# load sequence terms
75+
stripped = os.path.join(self.__path, "stripped")
76+
expected_id = 1
77+
with open(stripped) as file:
78+
pattern = re.compile("^A([0-9]+) ,([0-9,]+),$")
79+
for line in file:
80+
match = self.__parse_line(line, pattern)
81+
if not match:
82+
continue
83+
id = int(match.group(1))
84+
if id != expected_id:
85+
raise ValueError("unexpected ID: {}".format(line))
86+
terms_str = match.group(2).split(",")
87+
terms = [int(t) for t in terms_str]
88+
seqs.append(Sequence(id, "", terms))
89+
expected_id += 1
90+
# load sequence names
91+
names = os.path.join(self.__path, "names")
92+
expected_id = 1
93+
with open(names) as file:
94+
pattern = re.compile("^A([0-9]+) (.+)$")
95+
for line in file:
96+
match = self.__parse_line(line, pattern)
97+
if not match:
98+
continue
99+
id = int(match.group(1))
100+
if id != expected_id:
101+
raise ValueError("unexpected ID: {}".format(line))
102+
name = match.group(2)
103+
seqs[id - 1].name = name
104+
expected_id += 1
105+
self.__index = sorted(seqs)
106+
self.__lookup = [0] * (len(seqs) + 1)
107+
for i in range(len(seqs)):
108+
id = self.__index[i].id
109+
self.__lookup[id] = i
110+
111+
def global_match(self) -> SequenceMatch:
112+
if self.__index is None:
33113
self.__load()
34-
# TODO
35-
if use_b_file:
36-
pass
114+
return SequenceMatch(len(self.__index))
115+
116+
def refine_match(self, match: SequenceMatch, term: int) -> bool:
117+
if match.start_index >= match.end_index:
118+
return False
119+
arg = match.prefix_length
120+
match.prefix_length += 1
121+
new_start = match.start_index
122+
while new_start < match.end_index and self.__index[new_start].terms[arg] < term:
123+
new_start += 1
124+
while new_start < match.end_index and self.__index[new_start].terms[arg] == term and len(self.__index[new_start].terms) == match.prefix_length:
125+
match.finished_ids.append(self.__index[new_start].id)
126+
new_start += 1
127+
new_end = new_start
128+
while new_end < match.end_index and self.__index[new_end].terms[arg] == term:
129+
new_end += 1
130+
match.start_index = new_start
131+
match.end_index = new_end
132+
return new_start < new_end
133+
134+
def get_match_ids(self, match: SequenceMatch) -> list[int]:
135+
ids = [self.__index[i].id for i in range(
136+
match.start_index, match.end_index)]
137+
ids.extend(match.finished_ids)
138+
return sorted(ids)
37139

38140

39141
class ProgramCache:
@@ -47,7 +149,7 @@ def path(self, id: int) -> str:
47149
asm = "{}.asm".format(Sequence(id).id_str())
48150
return os.path.join(self.__path, dir, asm)
49151

50-
def get(self, id: int) -> Program:
152+
def get(self, id: int):
51153
if id not in self.__cache:
52154
with open(self.path(id), "r") as file:
53155
self.__cache[id] = Program(file.read())

sample.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
from loda.oeis import ProgramCache
44
from loda.runtime import Interpreter
55

6-
# Sample program using the LODA Pathon module
7-
if __name__ == "__main__":
86

7+
def basic_loda():
98
# Initialize LODA programs cache using *.asm files from tests folder
109
program_dir = os.path.join('tests', 'programs', 'oeis')
1110
program_cache = ProgramCache(program_dir)
@@ -19,3 +18,7 @@
1918
interpreter = Interpreter(program_cache=program_cache)
2019
sequence, _ = interpreter.eval_to_seq(program, num_terms=20)
2120
print(sequence)
21+
22+
23+
if __name__ == "__main__":
24+
basic_loda()

tests/helpers.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os.path
55
from loda.oeis import Sequence
66

7+
OEIS_TEST_DIR = os.path.join('tests', 'oeis')
78
OPERATIONS_TEST_DIR = os.path.join('tests', 'operations')
89
PROGRAMS_TEST_DIR = os.path.join('tests', 'programs', 'oeis')
910

tests/oeis/names

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# OEIS Sequence Names (http://oeis.org/names.gz)
2+
# Last Modified: February 19 00:58 EST 2023
3+
# Use of this content is governed by the
4+
# OEIS End-User License: http://oeis.org/LICENSE
5+
A000001 Number of groups of order n.
6+
A000002 Kolakoski sequence: a(n) is length of n-th run; a(1) = 1; sequence consists just of 1's and 2's.
7+
A000003 Number of classes of primitive positive definite binary quadratic forms of discriminant D = -4n; or equivalently the class number of the quadratic order of discriminant D = -4n.
8+
A000004 The zero sequence.
9+
A000005 d(n) (also called tau(n) or sigma_0(n)), the number of divisors of n.

tests/oeis/stripped

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# OEIS Sequence Data (http://oeis.org/stripped.gz)
2+
# Last Modified: February 19 05:58 UTC 2023
3+
# Use of this content is governed by the
4+
# OEIS End-User License: http://oeis.org/LICENSE
5+
A000001 ,0,1,1,1,2,1,2,1,5,2,2,1,5,1,2,1,14,1,5,1,5,2,2,1,15,2,2,5,4,1,4,1,51,1,2,1,14,1,2,2,14,1,6,1,4,2,2,1,52,2,5,1,5,1,15,2,13,2,2,1,13,1,2,4,267,1,4,1,5,1,4,1,50,1,2,3,4,1,6,1,52,15,2,1,15,1,2,1,12,1,10,1,4,2,
6+
A000002 ,1,2,2,1,1,2,1,2,2,1,2,2,1,1,2,1,1,2,2,1,2,1,1,2,1,2,2,1,1,2,1,1,2,1,2,2,1,2,2,1,1,2,1,2,2,1,2,1,1,2,1,1,2,2,1,2,2,1,1,2,1,2,2,1,2,2,1,1,2,1,1,2,1,2,2,1,2,1,1,2,2,1,2,2,1,1,2,1,2,2,1,2,2,1,1,2,1,1,2,2,1,2,1,1,2,1,2,2,
7+
A000003 ,1,1,1,1,2,2,1,2,2,2,3,2,2,4,2,2,4,2,3,4,4,2,3,4,2,6,3,2,6,4,3,4,4,4,6,4,2,6,4,4,8,4,3,6,4,4,5,4,4,6,6,4,6,6,4,8,4,2,9,4,6,8,4,4,8,8,3,8,8,4,7,4,4,10,6,6,8,4,5,8,6,4,9,8,4,10,6,4,12,8,6,6,4,8,8,8,4,8,6,4,
8+
A000004 ,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
9+
A000005 ,1,2,2,3,2,4,2,4,3,4,2,6,2,4,4,5,2,6,2,6,4,4,2,8,3,4,4,6,2,8,2,6,4,4,4,9,2,4,4,8,2,8,2,6,6,4,2,10,3,6,4,6,2,8,4,8,4,4,2,12,2,4,6,7,4,8,2,6,4,8,2,12,2,4,6,6,4,8,2,10,5,4,2,12,4,4,4,8,2,12,4,6,4,4,4,12,2,6,6,9,2,8,2,8,

tests/test_oeis.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from unittest import TestCase
4+
5+
from loda.oeis import SequenceIndex, Sequence
6+
from tests.helpers import OEIS_TEST_DIR
7+
8+
NUM_SEQS = 5
9+
10+
11+
class SequenceIndexTests(TestCase):
12+
13+
def setUp(self):
14+
self.index = SequenceIndex(OEIS_TEST_DIR)
15+
16+
def test_index_size(self):
17+
self.assertEqual(NUM_SEQS, self.index.size())
18+
19+
def test_index_get_A000004(self):
20+
a4: Sequence = self.index.get(4)
21+
self.assertEqual(4, a4.id)
22+
self.assertEqual("The zero sequence.", a4.name)
23+
self.assertEqual([0]*102, a4.terms)
24+
25+
def test_index_get_A000005(self):
26+
a5: Sequence = self.index.get(5)
27+
self.assertEqual(5, a5.id)
28+
self.assertEqual(
29+
"d(n) (also called tau(n) or sigma_0(n)), the number of divisors of n.", a5.name)
30+
self.assertEqual([1, 2, 2, 3, 2, 4, 2, 4, 3, 4, 2, 6, 2, 4,
31+
4, 5, 2, 6, 2, 6, 4, 4, 2, 8, 3, 4, 4, 6,
32+
2, 8, 2, 6, 4, 4, 4, 9, 2, 4, 4, 8, 2, 8,
33+
2, 6, 6, 4, 2, 10, 3, 6, 4, 6, 2, 8, 4, 8,
34+
4, 4, 2, 12, 2, 4, 6, 7, 4, 8, 2, 6, 4, 8,
35+
2, 12, 2, 4, 6, 6, 4, 8, 2, 10, 5, 4, 2, 12,
36+
4, 4, 4, 8, 2, 12, 4, 6, 4, 4, 4, 12, 2, 6,
37+
6, 9, 2, 8, 2, 8], a5.terms)
38+
39+
def test_global_match(self):
40+
m = self.index.global_match()
41+
expected = [i+1 for i in range(NUM_SEQS)]
42+
self.assertEqual(expected, self.index.get_match_ids(m))
43+
44+
def test_refine_match_A000001(self):
45+
self.__test_refine([
46+
(0, [1, 4], True),
47+
(1, [1], True),
48+
(1, [1], True),
49+
(47, [], False), # test incorrect term
50+
])
51+
52+
def test_refine_match_A000004(self):
53+
refinements = [(0, [1, 4], True)]
54+
refinements.extend([(0, [4], True)] * 100)
55+
refinements.append((0, [4], False))
56+
self.__test_refine(refinements)
57+
58+
def test_refine_match_A000005(self):
59+
self.__test_refine([
60+
(1, [2, 3, 5], True),
61+
(2, [2, 5], True),
62+
(2, [2, 5], True),
63+
(3, [5], True),
64+
(2, [5], True),
65+
(47, [], False), # test incorrect term
66+
])
67+
68+
def __test_refine(self, refinements):
69+
m = self.index.global_match()
70+
for (term, expected_ids, more) in refinements:
71+
self.assertEqual(more, self.index.refine_match(m, term))
72+
self.assertEqual(expected_ids, self.index.get_match_ids(m))

0 commit comments

Comments
 (0)