#!/usr/bin/env bash # HEADER #======================================================================= # # 2utf8 -- Shell script to get safe UTF-8 text files. # # SYNOPSIS # 2utf8 <...> # 2utf8 [-i|--input] [-o|--output ] # # DESCRIPTION # Shell script to get safe UTF-8 text files. # # EXAMPLES # $ 2utf8 -i infile.tex -o outfile.tex # $ 2utf8 file.tex # $ 2utf8 *.tex # # POSITIONAL ARGUMENTS # input text files # # OPTIONAL ARGUMENTS # -h, --help show this help message and exit # -i , --input # input text file (single) # -o , --output # output text file (single) # # DEPENDENCIES # - GNU coreutils: head, tail, cp, cut # - iconv (2.24) # - file (5.30) # - sed (4.4) # - grep # # Copyright (C) 2018-2020 Nicolas Mesnier # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License version 3 or # above as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # #======================================================================= # END_OF_HEADER #----------------------------------------------------------------------- set -e #======================================================================= # *** functions *** #======================================================================= #----------------------------------------------------------------------- # *** read this header to display help #----------------------------------------------------------------------- SCRIPT_NAME="$(basename ${0})" BOH=$(head -200 ${0} | grep -n "^# HEADER" | cut -f1 -d:) EOH=$(head -200 ${0} | grep -n "^# END_OF_HEADER" | cut -f1 -d:) Help(){ head -$(($EOH-1)) ${0} | tail -$(($EOH-$BOH-1)) \ | grep -e "^#$" -e "^# " \ | sed -e "s/^#=*//g" \ -e "s/\${SCRIPT_NAME}/${SCRIPT_NAME}/g" } #----------------------------------------------------------------------- # *** to recode file (core function) #----------------------------------------------------------------------- __Recode__(){ # recode to UTF-8 if [[ `file ${1}` != *" UTF-8"* ]]; then case `uname -s` in Linux ) infos=$(file -bi ${1}) ;; Darwin ) infos=$(file -bI ${1}) ;; esac init_encoding=$(echo ${infos} | cut -d= -f2) # in case of non-ISO, may be windows-1252/cp1252, a superset of # ISO-8859-1 or ISO-8859-15 if [[ ${infos} = *"unknown-8bit"* ]]; then iconv -f cp1252 -t UTF-8 ${1} > ${2} else iconv -f ${init_encoding} -t UTF-8 ${1} > ${2} fi else cp ${1} ${2} fi # change MS windows EOL (^M = ) to Unix line feed if [[ `file ${1}` = *" CRLF line terminators"* ]]; then sed -i 's/ //g' ${2} fi # change Mac OS X EOL to Unix line feed if [[ `file ${1}` = *" CR line terminators"* ]]; then sed -i 's/\r/\n/g' ${2} fi # suppress BOM if [[ `file ${1}` = *"with BOM"* ]]; then tmp=$(__TmpFile__ ${2}) mv ${2} $tmp tail --bytes=+4 $tmp > ${2} rm $tmp fi } #----------------------------------------------------------------------- # *** to generate a temp file #----------------------------------------------------------------------- __TmpFile__(){ tmp=$1 while [ -f $tmp ] do tmp=$tmp".tmp" done echo $tmp } #----------------------------------------------------------------------- # *** IO prompt #----------------------------------------------------------------------- __RecodeIO__(){ if [ -f $1 ]; then if [ -f $2 ]; then read -p " File \"$2\" allready exists. Replace? y/[n] " rep case $rep in [Yy]* ) __Recode__ $1 $2 ;; * ) exit 1;; esac else __Recode__ $1 $2 fi else echo " File \"$1\" doesn't exists." exit 1 fi } #----------------------------------------------------------------------- # *** IO prompt #----------------------------------------------------------------------- __InPlaceRecodeIO__(){ outfile=${1} tmpfile=$(__TmpFile__ ${1}) mv ${1} $tmpfile __RecodeIO__ $tmpfile $outfile rm $tmpfile } #======================================================================= # *** get options *** #======================================================================= infile="" outfile="" infiles=() while [[ ${1} ]]; do case "${1}" in -h|--help) Help exit 0 ;; -i|--input) infile=${2} shift ;; -o|--output) outfile=${2} shift ;; *) if [ -z $infile ];then infiles[${#infiles[@]}]=${1} else echo "Input file \"$infile\" allready given." >&2 exit 1 fi ;; esac if ! shift; then echo 'Missing parameter argument.' >&2 exit 1 fi done nfiles=${#infiles[@]} if [ ${nfiles} -ge 1 ];then if [ -z "${infile}" -a -z "${outfile}" ];then # process only if no input file given for i in $(seq 0 $(($nfiles -1)) );do __InPlaceRecodeIO__ ${infiles[$i]} done else echo 'Incompatible arguments.' >&2 exit 1 fi else # only one inputfile given; just check if outfile name given if [ -z $outfile ];then __InPlaceRecodeIO__ ${infile} else __RecodeIO__ ${infile} ${outfile} fi fi #====================================================================eof # vim: set tw=72 ts=4 sw=4 nu: