2
2
import time
3
3
import psutil
4
4
import random
5
- import string
6
- import unicodedata
7
5
import platform
8
6
import iscc_core as ic
9
7
from iscc_core .code_content_text import gen_text_code
16
14
17
15
18
16
def generate_text (length , seed = 42 ):
19
- random .seed (seed )
20
- unicode_ranges = [
21
- (0x0021 , 0x007E ), # Basic Latin
17
+ """
18
+ Generates deterministic random Unicode text with a given length and seed.
19
+
20
+ Parameters:
21
+ length (int): The number of characters to generate.
22
+ seed (int): The seed for the random generator to make the function deterministic.
23
+
24
+ Returns:
25
+ str: A deterministic random Unicode string of the specified length.
26
+ """
27
+ # UTF-8 encodable Unicode character ranges
28
+ ranges = [
29
+ (0x0020 , 0x007E ), # Basic Latin (includes common characters, numbers, punctuation)
22
30
(0x00A1 , 0x00FF ), # Latin-1 Supplement
23
- (0x0100 , 0x017F ), # Latin Extended-A
24
- (0x0180 , 0x024F ), # Latin Extended-B
25
- (0x0370 , 0x03FF ), # Greek and Coptic
26
31
(0x0400 , 0x04FF ), # Cyrillic
27
- (0x3040 , 0x309F ), # Hiragana
28
- (0x4E00 , 0x9FFF ), # CJK Unified Ideographs (subset)
32
+ (0x0370 , 0x03FF ), # Greek
33
+ (0x0530 , 0x058F ), # Armenian
34
+ (0x4E00 , 0x9FFF ), # CJK Unified Ideographs (common in Chinese, Japanese, Korean)
35
+ (0x1F300 , 0x1F5FF ), # Miscellaneous Symbols and Pictographs (includes emojis)
29
36
]
30
37
31
- text = []
32
- while len ("" .join (text )) < length :
33
- char_type = random .choice (["letter" , "digit" , "punctuation" , "whitespace" ])
34
- if char_type == "letter" :
35
- range_start , range_end = random .choice (unicode_ranges )
36
- char = chr (random .randint (range_start , range_end ))
37
- elif char_type == "digit" :
38
- char = random .choice (string .digits )
39
- elif char_type == "punctuation" :
40
- char = random .choice (string .punctuation )
41
- else :
42
- char = " "
43
- text .append (char )
38
+ random .seed (seed )
39
+
40
+ def get_random_char ():
41
+ # Choose a random range and then pick a random character within that range
42
+ char_range = random .choice (ranges )
43
+ return chr (random .randint (* char_range ))
44
44
45
- return "" .join (text )[: length ]
45
+ return "" .join (get_random_char () for _ in range ( length ))
46
46
47
47
48
48
def benchmark_gen_text_code (text_length , iterations = 100 ):
@@ -63,10 +63,8 @@ def benchmark_gen_text_code(text_length, iterations=100):
63
63
64
64
65
65
def main ():
66
- text_length = 3000 * 10 # 10 pages
67
- iterations = 10
68
-
69
- pages_per_second , memory_increase = benchmark_gen_text_code (text_length , iterations )
66
+ text_length = 3000 * 100 # 100 pages
67
+ iterations = 3
70
68
71
69
print ("System Information:" )
72
70
print (f"OS: { platform .system ()} { platform .release ()} " )
@@ -103,6 +101,8 @@ def main():
103
101
supported_flags = [flag for flag in relevant_flags if flag in cpu_info ["flags" ]]
104
102
print (f"Instructions: { ', ' .join (supported_flags )} " )
105
103
104
+ pages_per_second , memory_increase = benchmark_gen_text_code (text_length , iterations )
105
+
106
106
print ("\n Benchmark results for gen_text_code:" )
107
107
print (f"Pages per second: { pages_per_second :.2f} " )
108
108
print (f"Max memory increase: { memory_increase :.2f} MB" )
0 commit comments