-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrun.sh
executable file
·62 lines (45 loc) · 1.87 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# dataset directory
dataset=datasets/arxiv
# text file name; one document per line
text_file=phrase_text.txt
# category name file; one category per line; each line contains "[category id] [category names]"
category_file=category_names.txt
# taxonomy file; one edge per line; each line contains "[parent id] [child id]"
taxo_file=taxonomy.txt
# taxonomy file in matrix form; generated by read_taxo.py
matrix_file=matrix_$taxo_file
# node level information file; generated by read_taxo.py
level_file=level_$taxo_file
if [ ! -f "$dataset/$matrix_file" ] || [ ! -f "$dataset/$level_file" ]; then
python src/read_taxo.py --dataset $dataset --category_file $category_file --taxo_file $taxo_file
fi
tree_emb_file=tree_emb.txt
# word embedding output file name
word_emb_file=emb.txt
# word embedding dimension
word_dim=100
# local context window size
window_size=5
# minimum word count in corpus; words that appear less than this threshold will be discarded
min_count=5
# number of total iterations to run on the corpus
iter=10
# number of threads to be run in parallel
threads=20
# pretrained word embedding file
pretrain_emb=jose_100.txt
if [ ! -f "$pretrain_emb" ] && [ -f "jose_100.zip" ]; then
echo "Unzipping downloaded pretrained embedding"
unzip jose_100.zip && rm jose_100.zip
fi
cd ./src
make josh
cd ..
./src/josh -train ${dataset}/${text_file} \
-category-file ${dataset}/${category_file} -matrix-file ${dataset}/${matrix_file} -level-file ${dataset}/${level_file} \
-load-emb ${pretrain_emb} -res ${dataset}/res.txt -k 10 \
-word-emb ${dataset}/${word_emb_file} -tree-emb ${dataset}/${tree_emb_file} \
-size ${word_dim} -window ${window_size} -sample 1e-3 -word-margin 0.25 -cat-margin 0.9 \
-alpha 0.025 -tree-period 128 -global-lambda 1.5 -lambda-cat 1.0 -lambda-tree 1.0 -negative 2 \
-expand 1 -pretrain 2 \
-min-count ${min_count} -iter ${iter} -threads ${threads}