Skip to content

Commit 9536307

Browse files
committed
Update glove2word2vec.py
1 parent f607d5f commit 9536307

File tree

1 file changed

+6
-11
lines changed

1 file changed

+6
-11
lines changed

glove2word2vec.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,12 @@ def glove2word2vec(glove_vector_file, output_model_file):
3131
"""Convert GloVe vectors into word2vec C format"""
3232

3333
def get_info(glove_file_name):
34-
"""
35-
Function to calculate the number of lines and dimensions of the GloVe vectors to make it Gensim compatible
36-
"""
37-
num_lines = sum(1 for line in smart_open.smart_open(glove_vector_file))
38-
if 'twitter' in glove_file_name:
39-
dims= re.findall('\d+',glove_vector_file.split('.')[3])
40-
dims=''.join(dims)
41-
else:
42-
dims=re.findall('\d+',glove_vector_file.split('.')[2])
43-
dims=''.join(dims)
44-
return num_lines, dims
34+
"""Return the number of vectors and dimensions in a file in GloVe format."""
35+
with smart_open(glove_file_name) as f:
36+
num_lines = sum(1 for line in f)
37+
with smart_open(glove_file_name) as f:
38+
num_dims = len(f.readline().split()) - 1
39+
return num_lines, num_dims
4540

4641
def prepend_line(infile, outfile, line):
4742
"""

0 commit comments

Comments
 (0)