使用JtessBoxEditor训练

过程比较复杂,考虑集成脚本 集成后结合JtessBoxEditor工具,只需要标注一下box即可生成训练文件以供使用

具体脚本

#!/bin/bash
font_name='myfont'
workpath="./"
mergedtif_file='myfont.arial.exp0'
echo 'cd '${workpath}
cd $workpath
ls -l | grep *.tif
echo 'generator box file from tif file...'
tesseract ${mergedtif_file}'.tif' ${mergedtif_file} batch.nochop makebox
echo 'generator box file from tif file sucess!'
ls -l | grep ${mergedtif_file}'.box'
echo '请在JtessBoxEditor中编辑每个字符的box定位数据'

# pause
get_char()  
{  
  SAVEDSTTY=`stty -g`  
  stty -echo  
  stty raw  
  dd if=/dev/tty bs=1 count=1 2> /dev/null  
  stty -raw  
  stty echo  
  stty $SAVEDSTTY  
}  
  
if [ -z "$1" ]; then  
    echo '请按任意键继续...'  
else  
    echo -e "$1"  
fi  
get_char
# confim clear
function confim_clear(){
	read -r -p "是否删除训练过程文件? [Y/n] " input
	case $input in
	    [yY][eE][sS]|[yY])
			rm -rf font_properties ${mergedtif_file}'.tr' ${font_name}'.shapetable' ${font_name}'.normproto' ${font_name}'.inttemp' ${font_name}'.pffmtable' ${font_name}'.unicharset'
			echo "删除成功"
			;;

	    [nN][oO]|[nN])
			echo "未删除训练过程文件"
	       		;;

	    *)
		echo "Invalid input..."
			confim_msg
		;;
	esac
}
# clustering
function clustering(){
	mftraining -F font_properties -U unicharset -O ${font_name}'.unicharset' ${mergedtif_file}'.tr'
	cntraining ${mergedtif_file}'.tr'
	mv shapetable ${font_name}'.shapetable'
	mv normproto ${font_name}'.normproto'
	mv inttemp ${font_name}'.inttemp'
	mv pffmtable ${font_name}'.pffmtable'
	mv unicharset ${font_name}'.unicharset'
	combine_tessdata ${font_name}'.'
	confim_clear
}
# define font properties
generator_font_properties(){
	echo "generator font properties..."
	echo ${font_name}' 0 0 0 0 0' >> font_properties
	clustering
}
# generator unicharset
function generator_unicharset(){
	echo "generator unicharset..."
	unicharset_extractor ${mergedtif_file}'.box'
	generator_font_properties
}
# train
function train(){
	echo "start train..."
	tesseract ${mergedtif_file}'.tif'  ${mergedtif_file} box.train
	echo "train has complete!"
	ls -l | grep ${mergedtif_file}'.tr'
	generator_unicharset
}
# confim
function confim_msg(){
	read -r -p "确认已经完成box定位偏移的修正? [Y/n] " input
	case $input in
	    [yY][eE][sS]|[yY])
			train
			;;

	    [nN][oO]|[nN])
			echo "请完成box定位偏移的修正后继续"
			confim_msg
	       		;;

	    *)
		echo "Invalid input..."
			confim_msg
		;;
	esac
}
confim_msg