Skip to content

Commit c485c89

Browse files
committed
FEAT: improved the speed of the soundex code and fixed it to produce the same results as PHP.
1 parent 0b0b6ec commit c485c89

File tree

3 files changed

+91
-65
lines changed

3 files changed

+91
-65
lines changed

src/boot/sysobj.reb

+1
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ modules: object [
256256
httpd: https://src.rebol.tech/modules/httpd.reb
257257
prebol: https://src.rebol.tech/modules/prebol.reb
258258
scheduler: https://src.rebol.tech/modules/scheduler.reb
259+
soundex: https://src.rebol.tech/modules/soundex.reb
259260
spotify: https://src.rebol.tech/modules/spotify.reb
260261
thru-cache: https://src.rebol.tech/modules/thru-cache.reb
261262
to-ascii: https://src.rebol.tech/modules/to-ascii.reb

src/modules/soundex.reb

+68-65
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,81 @@
11
REBOL [
2-
Title: "Soundex"
3-
Date: 16-Jul-2024
4-
File: %soundex.r
5-
Author: "Allen Kamp, Oldes"
6-
Purpose: {Soundex Encoding returns similar codes for similar sounding words or names. eg Stephens, Stevens are both S315, Smith and Smythe are both S53. Useful for adding Sounds-like searching to databases}
7-
Comment: {
8-
This is the basic Soundex algorithm (There are a number of different
9-
one floating around)
2+
Title: "Soundex"
3+
Date: 16-Jul-2024
4+
File: %soundex.reb
5+
Author: "Allen Kamp, Oldes"
6+
Purpose: {Soundex Encoding returns similar codes for similar sounding words or names. eg Stephens, Stevens are both S315, Smith and Smythe are both S53. Useful for adding Sounds-like searching to databases}
7+
Comment: {
8+
This is the basic Soundex algorithm: https://en.wikipedia.org/wiki/Soundex
109
11-
1. Remove vowels, H, W and Y
12-
2. Encode each char with its code value
13-
3. Remove adjacent duplicate numbers
10+
1. Remove vowels, H, W and Y
11+
2. Encode each char with its code value
12+
3. Remove adjacent duplicate numbers
1413
15-
4. Return First letter, followed by the next 3 letter's code
16-
numbers, if they exist.
14+
4. Return First letter, followed by the next 3 letter's code
15+
numbers, if they exist.
1716
18-
TODO: Other algorithms: Extended Soundex, Metaphone and the LC Cutter table
19-
}
20-
Language: "English"
21-
Email: %allenk--powerup--com--au
22-
library: [
23-
level: 'intermediate
24-
platform: 'all
25-
type: 'tool
26-
domain: [DB text text-processing]
27-
tested-under: none
28-
support: none
29-
license: none
30-
see-also: none
31-
]
32-
Version: 1.1.0
33-
Type: module
34-
Exports: [soundex]
35-
Needs: 3.0.0
36-
History: [
37-
17-Jul-1999 @Allen "Initial version"
38-
16-Jul-2024 @Oldes "Ported to Rebol3"
17+
TODO: Other algorithms: Extended Soundex, Metaphone and the LC Cutter table
18+
}
19+
Version: 2.0.0
20+
Type: module
21+
Name: soundex
22+
Exports: [soundex]
23+
Needs: 3.0.0
24+
History: [
25+
17-Jul-1999 @Allen "Initial version"
26+
16-Jul-2024 @Oldes "Ported to Rebol3"
3927

40-
]
28+
]
4129
]
4230

4331
soundex: function/with [
44-
{Returns the Census Soundex Code for the given string}
45-
string [any-string!] "String to Encode"
32+
{Returns the Census Soundex Code for the given string}
33+
string [any-string!] "String to Encode"
4634
][
35+
code: make string! 4
36+
prev: none
4737

48-
code: make string! ""
38+
if empty? string [return "0000"]
4939

50-
either all [string? string string <> ""] [
51-
string: uppercase trim copy string
52-
53-
foreach letter string [
54-
parse to string! letter [soundex-match | soundex-no-match]
55-
if 4 = length? code [break] ;maximum length for code is 4
56-
]
57-
] [
58-
return string ; return unchanged
59-
]
60-
change code first string ; replace first number with first letter
61-
return code
40+
foreach letter string [
41+
either val: mapping/:letter [
42+
if val != prev [append code val prev: val]
43+
][
44+
if find "aeiouhwy" letter [prev: #" "]
45+
if empty? code [append code #"0"]
46+
]
47+
if 4 = length? code [break] ;maximum length for code is 4
48+
]
49+
change code uppercase first string
50+
pad/with code 4 #"0"
51+
code
6252
][
63-
code: val: none
64-
; Create Rules
65-
set1: [[#"B" | #"F" | #"P" | #"V"](val: #"1")]
66-
set2: [[#"C" | #"G" | #"J" | #"K" | #"Q" | #"S" | #"X" | #"Z"](val: #"2")]
67-
set3: [[#"D" | #"T"](val: #"3")]
68-
set4: [[#"L"](val: "4")]
69-
set5: [[#"M" | #"N"] (val: #"5")]
70-
set6: [[#"R"](val: #"6")]
71-
; Append val to code if not a duplicate of previous code val
72-
soundex-match: [[set1 | set2 | set3 | set4 | set5 | set6 ]
73-
(if val <> back tail code [append code val]) ]
53+
code: val: prev: none
54+
mapping: make map! [
55+
;Set1
56+
#"B" #"1"
57+
#"F" #"1"
58+
#"P" #"1"
59+
#"V" #"1"
60+
;Set2
61+
#"C" #"2"
62+
#"G" #"2"
63+
#"J" #"2"
64+
#"K" #"2"
65+
#"Q" #"2"
66+
#"S" #"2"
67+
#"X" #"2"
68+
#"Z" #"2"
69+
;Set3
70+
#"D" #"3"
71+
#"T" #"3"
72+
;Set4
73+
#"L" #"4"
74+
;Set5
75+
#"M" #"5"
76+
#"N" #"5"
77+
;Set6
78+
#"R" #"6"
79+
]
80+
]
7481

75-
; If letter not a matched letter its val is 0, but we only care
76-
; about it if it is the first letter.
77-
soundex-no-match: [(if (length? code) = 0 [append code "0"])]
78-
]

src/tests/test-soundex.r3

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Rebol [
2+
Title: "Test Soundex function"
3+
Date: 16-Jul-2024
4+
Author: "Oldes"
5+
File: %test-soundex.r3
6+
Version: 1.0.0
7+
]
8+
use [tmp][
9+
tmp: none
10+
foreach [code name] [
11+
"R163" "Robert"
12+
"R163" "Rupert"
13+
"R150" "Rubin"
14+
"A226" "Ashcraft"
15+
"A226" "Ashcroft"
16+
"T522" "Tymczak" ;; the chars 'z' and 'k' in the name are coded as 2 twice since a vowel lies in between them
17+
"P236" "Pfister"
18+
"H555" "Honeyman"
19+
][
20+
printf [5 9 5] reduce [code name tmp: soundex name code == tmp]
21+
]
22+
]

0 commit comments

Comments
 (0)