-
Notifications
You must be signed in to change notification settings - Fork 0
/
4gram-feats.sh
executable file
·47 lines (31 loc) · 1.16 KB
/
4gram-feats.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash
FLAG=dbpedia-ngram-dic
DATA_DIR="./data/"
OUT_DIR="./result/${FLAG}"
mkdir -p $OUT_DIR
TRAIN_DATA="train_4gram_sep"
TEST_DATA="dev_4gram_sep"
TRAIN_FEAT=${OUT_DIR}/${TRAIN_DATA}.feats
TEST_FEAT=${OUT_DIR}/${TEST_DATA}.feats
MODEL=${OUT_DIR}/${TRAIN_DATA}.model
FEATURIZER="python ./featurizer_chardistance.py"
CRF="../../crfsuite-0.12/bin/crfsuite"
EVAL="perl connlleval.pl"
mkdir -p ${OUT_DIR}
echo "***** Running ${FEATURIZER} on ${TRAIN_DATA} (`date`) *****"
cat ${DATA_DIR}/${TRAIN_DATA} | ${FEATURIZER} > ${TRAIN_FEAT}
echo "***** Running ${FEATURIZER} on ${TEST_DATA} (`date`) *****"
cat ${DATA_DIR}/${TEST_DATA} | ${FEATURIZER} > ${TEST_FEAT}
TRAIN_OPTS="learn -a ap"
RUN_CMD="${CRF} ${TRAIN_OPTS} -m ${MODEL} ${TRAIN_FEAT}"
#training
eval "${RUN_CMD}"
#dumping a model
${CRF} dump ${MODEL} > ${MODEL}.txt
TEST_OPTS="tag -r"
RUN_CMD="${CRF} ${TEST_OPTS} -m ${MODEL} ${TEST_FEAT}"
#prediction
eval "${RUN_CMD} > ${TEST_FEAT}.results"
echo "${TEST_FEAT} finish prediction"
cat ${TEST_FEAT}.results | tr '\t' ' ' | perl -ne '{chomp;s/\r//g;print $_,"\n";}' | perl ngram2token.pl 4 $DATA_DIR/dev | ${EVAL} > ${TEST_FEAT}.SUMMARY
cat ${TEST_FEAT}.SUMMARY