使用JtessBoxEditor训练
过程比较复杂,考虑集成脚本
集成后结合JtessBoxEditor
工具,只需要标注一下box
即可生成训练文件以供使用
具体脚本
#!/bin/bash
font_name='myfont'
workpath="./"
mergedtif_file='myfont.arial.exp0'
echo 'cd '${workpath}
cd $workpath
ls -l | grep *.tif
echo 'generator box file from tif file...'
tesseract ${mergedtif_file}'.tif' ${mergedtif_file} batch.nochop makebox
echo 'generator box file from tif file sucess!'
ls -l | grep ${mergedtif_file}'.box'
echo '请在JtessBoxEditor中编辑每个字符的box定位数据'
# pause
get_char()
{
SAVEDSTTY=`stty -g`
stty -echo
stty raw
dd if=/dev/tty bs=1 count=1 2> /dev/null
stty -raw
stty echo
stty $SAVEDSTTY
}
if [ -z "$1" ]; then
echo '请按任意键继续...'
else
echo -e "$1"
fi
get_char
# confim clear
function confim_clear(){
read -r -p "是否删除训练过程文件? [Y/n] " input
case $input in
[yY][eE][sS]|[yY])
rm -rf font_properties ${mergedtif_file}'.tr' ${font_name}'.shapetable' ${font_name}'.normproto' ${font_name}'.inttemp' ${font_name}'.pffmtable' ${font_name}'.unicharset'
echo "删除成功"
;;
[nN][oO]|[nN])
echo "未删除训练过程文件"
;;
*)
echo "Invalid input..."
confim_msg
;;
esac
}
# clustering
function clustering(){
mftraining -F font_properties -U unicharset -O ${font_name}'.unicharset' ${mergedtif_file}'.tr'
cntraining ${mergedtif_file}'.tr'
mv shapetable ${font_name}'.shapetable'
mv normproto ${font_name}'.normproto'
mv inttemp ${font_name}'.inttemp'
mv pffmtable ${font_name}'.pffmtable'
mv unicharset ${font_name}'.unicharset'
combine_tessdata ${font_name}'.'
confim_clear
}
# define font properties
generator_font_properties(){
echo "generator font properties..."
echo ${font_name}' 0 0 0 0 0' >> font_properties
clustering
}
# generator unicharset
function generator_unicharset(){
echo "generator unicharset..."
unicharset_extractor ${mergedtif_file}'.box'
generator_font_properties
}
# train
function train(){
echo "start train..."
tesseract ${mergedtif_file}'.tif' ${mergedtif_file} box.train
echo "train has complete!"
ls -l | grep ${mergedtif_file}'.tr'
generator_unicharset
}
# confim
function confim_msg(){
read -r -p "确认已经完成box定位偏移的修正? [Y/n] " input
case $input in
[yY][eE][sS]|[yY])
train
;;
[nN][oO]|[nN])
echo "请完成box定位偏移的修正后继续"
confim_msg
;;
*)
echo "Invalid input..."
confim_msg
;;
esac
}
confim_msg