Skip to content

Commit 7eaabf8

Browse files
committed
Add text code benchmark
1 parent 366185f commit 7eaabf8

File tree

2 files changed

+31
-28
lines changed

2 files changed

+31
-28
lines changed

.github/workflows/tests.yml

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ jobs:
3232
- name: Run Tests (With Extension Modules)
3333
run: poetry run pytest -q tests --turbo
3434

35+
- name: Benchmark (With Extension Modules)
36+
run: poetry run python tests\benchmark.py
37+
3538
- name: Upload coverage to Codecov
3639
uses: codecov/codecov-action@v3
3740
if: matrix.os == 'ubuntu-20.04' && matrix.python-version == '3.9'

tests/benchmark.py

+28-28
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import time
33
import psutil
44
import random
5-
import string
6-
import unicodedata
75
import platform
86
import iscc_core as ic
97
from iscc_core.code_content_text import gen_text_code
@@ -16,33 +14,35 @@
1614

1715

1816
def generate_text(length, seed=42):
19-
random.seed(seed)
20-
unicode_ranges = [
21-
(0x0021, 0x007E), # Basic Latin
17+
"""
18+
Generates deterministic random Unicode text with a given length and seed.
19+
20+
Parameters:
21+
length (int): The number of characters to generate.
22+
seed (int): The seed for the random generator to make the function deterministic.
23+
24+
Returns:
25+
str: A deterministic random Unicode string of the specified length.
26+
"""
27+
# UTF-8 encodable Unicode character ranges
28+
ranges = [
29+
(0x0020, 0x007E), # Basic Latin (includes common characters, numbers, punctuation)
2230
(0x00A1, 0x00FF), # Latin-1 Supplement
23-
(0x0100, 0x017F), # Latin Extended-A
24-
(0x0180, 0x024F), # Latin Extended-B
25-
(0x0370, 0x03FF), # Greek and Coptic
2631
(0x0400, 0x04FF), # Cyrillic
27-
(0x3040, 0x309F), # Hiragana
28-
(0x4E00, 0x9FFF), # CJK Unified Ideographs (subset)
32+
(0x0370, 0x03FF), # Greek
33+
(0x0530, 0x058F), # Armenian
34+
(0x4E00, 0x9FFF), # CJK Unified Ideographs (common in Chinese, Japanese, Korean)
35+
(0x1F300, 0x1F5FF), # Miscellaneous Symbols and Pictographs (includes emojis)
2936
]
3037

31-
text = []
32-
while len("".join(text)) < length:
33-
char_type = random.choice(["letter", "digit", "punctuation", "whitespace"])
34-
if char_type == "letter":
35-
range_start, range_end = random.choice(unicode_ranges)
36-
char = chr(random.randint(range_start, range_end))
37-
elif char_type == "digit":
38-
char = random.choice(string.digits)
39-
elif char_type == "punctuation":
40-
char = random.choice(string.punctuation)
41-
else:
42-
char = " "
43-
text.append(char)
38+
random.seed(seed)
39+
40+
def get_random_char():
41+
# Choose a random range and then pick a random character within that range
42+
char_range = random.choice(ranges)
43+
return chr(random.randint(*char_range))
4444

45-
return "".join(text)[:length]
45+
return "".join(get_random_char() for _ in range(length))
4646

4747

4848
def benchmark_gen_text_code(text_length, iterations=100):
@@ -63,10 +63,8 @@ def benchmark_gen_text_code(text_length, iterations=100):
6363

6464

6565
def main():
66-
text_length = 3000 * 10 # 10 pages
67-
iterations = 10
68-
69-
pages_per_second, memory_increase = benchmark_gen_text_code(text_length, iterations)
66+
text_length = 3000 * 100 # 100 pages
67+
iterations = 3
7068

7169
print("System Information:")
7270
print(f"OS: {platform.system()} {platform.release()}")
@@ -103,6 +101,8 @@ def main():
103101
supported_flags = [flag for flag in relevant_flags if flag in cpu_info["flags"]]
104102
print(f"Instructions: {', '.join(supported_flags)}")
105103

104+
pages_per_second, memory_increase = benchmark_gen_text_code(text_length, iterations)
105+
106106
print("\nBenchmark results for gen_text_code:")
107107
print(f"Pages per second: {pages_per_second:.2f}")
108108
print(f"Max memory increase: {memory_increase:.2f} MB")

0 commit comments

Comments
 (0)