#!/bin/bash
# Purpose:
# To carry out OCR on a PDF file
if [[ $# -ne 1 ]]; then
echo "This script expects one argument."
echo " This argument is the name of the pdf file"
echo " including the .pdf extension"
echo "Usage: $0 file.pdf "
else
filename=$(basename "$1")
filename="${filename%.*}"
echo "Converting $1 to a tiff file named $filename.tiff"
echo "... "
convert -density 300 $1 -depth 8 -strip -background white -alpha off $filename.tiff
echo "Carrying out OCR on $filename.tiff to create $filename.txt"
tesseract $filename.tiff $filename
echo "The recognizable text in $1 has been output to the $filename.txt file."
fi
For information on how to install the needed software, see this web page.