@@ -28,51 +28,115 @@ def __lt__(self, other) -> bool:
28
28
def id_str (self ) -> str :
29
29
return "A{:06}" .format (self .id )
30
30
31
+ @classmethod
32
+ def load_oeis (cls , oeis_path : str ) -> list :
33
+ """
34
+ Load sequences from `stripped` from `names` files.
35
+ """
36
+ seqs = []
37
+ # load sequence terms
38
+ stripped = os .path .join (oeis_path , "stripped" )
39
+ with open (stripped ) as file :
40
+ pattern = re .compile ("^A([0-9]+) ,([\\ -0-9,]+),$" )
41
+ for line in file :
42
+ match = cls .__parse_line (line , pattern )
43
+ if not match :
44
+ continue
45
+ id = int (match .group (1 ))
46
+ cls .__fill_seqs (seqs , id )
47
+ seqs [id ].id = id
48
+ terms_str = match .group (2 ).split ("," )
49
+ seqs [id ].terms = [int (t ) for t in terms_str ]
50
+ # load sequence names
51
+ names = os .path .join (oeis_path , "names" )
52
+ with open (names ) as file :
53
+ pattern = re .compile ("^A([0-9]+) (.+)$" )
54
+ for line in file :
55
+ match = cls .__parse_line (line , pattern )
56
+ if not match :
57
+ continue
58
+ id = int (match .group (1 ))
59
+ cls .__fill_seqs (seqs , id )
60
+ name = match .group (2 )
61
+ seqs [id ].name = name
62
+ return seqs
31
63
32
- def __parse_line (line : str , pattern ):
33
- line = line .strip ()
34
- if len (line ) == 0 or line .startswith ("#" ):
35
- return None
36
- match = pattern .match (line )
37
- if not match :
38
- raise ValueError ("parse error: {}" .format (line ))
39
- return match
64
+ @classmethod
65
+ def __parse_line (cls , line : str , pattern ):
66
+ line = line .strip ()
67
+ if len (line ) == 0 or line .startswith ("#" ):
68
+ return None
69
+ match = pattern .match (line )
70
+ if not match :
71
+ raise ValueError ("parse error: {}" .format (line ))
72
+ return match
40
73
74
+ @classmethod
75
+ def __fill_seqs (cls , seqs : list , id : int ):
76
+ current_size = len (seqs )
77
+ for i in range (current_size , id + 2 ):
78
+ seqs .append (Sequence (i , "" , []))
41
79
42
- def __fill_seqs (seqs : list , id : int ):
43
- current_size = len (seqs )
44
- for i in range (current_size , id + 2 ):
45
- seqs .append (Sequence (i , "" , []))
80
+ def load_b_file (self , path : str ) -> list :
81
+ """
82
+ Load additional terms from a b-file.
46
83
84
+ Args:
85
+ path: Either path to a b-file (uncompressed `b*.txt` file) or a
86
+ folder that contains the b-files in sub-directories, e.g. `b/123/b123456.txt`.
87
+ """
88
+ terms = []
89
+ if len (path ) == 0 or os .path .isdir (path ):
90
+ dir = "{:03}" .format (self .id // 1000 )
91
+ txt = "b{:06}.txt" .format (self .id )
92
+ path = os .path .join (path , "b" , dir , txt )
93
+ with open (path ) as b_file :
94
+ expected_index = - 1
95
+ for line in b_file :
96
+ line = line .strip ()
97
+ if len (line ) == 0 or line [0 ] == "#" :
98
+ continue
99
+ fields = line .split ()
100
+ if len (fields ) < 2 :
101
+ raise ValueError ("unexpected line: {}" .format (line ))
102
+ index = int (fields [0 ])
103
+ value = int (fields [1 ])
104
+ if expected_index == - 1 :
105
+ expected_index = index
106
+ if index != expected_index :
107
+ raise ValueError ("unexpected index: {}" .format (index ))
108
+ terms .append (value )
109
+ expected_index += 1
110
+ terms = self .__align (terms )
111
+ if terms is None :
112
+ raise ValueError ("unexpected terms in b-file" )
113
+ if len (terms ) < len (self .terms ):
114
+ terms = self .terms
115
+ elif terms [0 :len (self .terms )] != self .terms :
116
+ raise ValueError ("unexpected terms in b-file" )
117
+ return terms
47
118
48
- def load (oeis_path : str ) -> list :
49
- """
50
- Load sequences from `stripped` from `names` files.
51
- """
52
- seqs = []
53
- # load sequence terms
54
- stripped = os .path .join (oeis_path , "stripped" )
55
- with open (stripped ) as file :
56
- pattern = re .compile ("^A([0-9]+) ,([\\ -0-9,]+),$" )
57
- for line in file :
58
- match = __parse_line (line , pattern )
59
- if not match :
60
- continue
61
- id = int (match .group (1 ))
62
- __fill_seqs (seqs , id )
63
- seqs [id ].id = id
64
- terms_str = match .group (2 ).split ("," )
65
- seqs [id ].terms = [int (t ) for t in terms_str ]
66
- # load sequence names
67
- names = os .path .join (oeis_path , "names" )
68
- with open (names ) as file :
69
- pattern = re .compile ("^A([0-9]+) (.+)$" )
70
- for line in file :
71
- match = __parse_line (line , pattern )
72
- if not match :
73
- continue
74
- id = int (match .group (1 ))
75
- __fill_seqs (seqs , id )
76
- name = match .group (2 )
77
- seqs [id ].name = name
78
- return seqs
119
+ def __align (self , terms : list , max_offset : int = 10 ) -> list :
120
+ """Align terms from a b-file possible by shifting by an offset"""
121
+ # check if they agree on prefix already
122
+ min_length = min (len (self .terms ), len (terms ))
123
+ if self .terms [0 :min_length ] == terms [0 :min_length ]:
124
+ return terms
125
+ # try to align them
126
+ for offset in range (1 , max_offset + 1 ):
127
+ if offset >= min_length :
128
+ break
129
+ agree_pos = True
130
+ agree_neg = True
131
+ for i in range (min_length ):
132
+ if i + offset < len (terms ) and terms [i + offset ] != self .terms [i ]:
133
+ agree_pos = False
134
+ if i + offset < len (self .terms ) and terms [i ] != self .terms [i + offset ]:
135
+ agree_neg = False
136
+ if agree_pos :
137
+ return terms [offset :]
138
+ if agree_neg :
139
+ result = self .terms [0 :offset ]
140
+ result .extend (terms )
141
+ return result
142
+ return None
0 commit comments