#!/usr/bin/env bash
# HEADER
#=======================================================================
#
#  2utf8 -- Shell script to get safe UTF-8 text files.
#
#  SYNOPSIS
#    2utf8 <inputfile1> <inputfile2> <...>
#    2utf8 [-i|--input] <inputfile> [-o|--output <outputfile>]
#
#  DESCRIPTION
#    Shell script to get safe UTF-8 text files.
#
#  EXAMPLES
#    $ 2utf8 -i infile.tex -o outfile.tex
#    $ 2utf8 file.tex
#    $ 2utf8 *.tex
#
#  POSITIONAL ARGUMENTS
#    <inputfile>           input text files
#
#  OPTIONAL ARGUMENTS
#    -h, --help            show this help message and exit
#    -i <inputfile>, --input <inputfile>
#                          input text file (single)
#    -o <outputfile>, --output <outputfile>
#                          output text file (single)
#
#  DEPENDENCIES
#    - GNU coreutils: head, tail, cp, cut
#    - iconv (2.24)
#    - file (5.30)
#    - sed (4.4)
#    - grep
#
#  Copyright (C) 2018-2020 Nicolas Mesnier <nmesnier@free.fr>
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 3 or
#  above as published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#=======================================================================
# END_OF_HEADER
#-----------------------------------------------------------------------
set -e
#=======================================================================
# *** functions ***
#=======================================================================
#-----------------------------------------------------------------------
# *** read this header to display help
#-----------------------------------------------------------------------
SCRIPT_NAME="$(basename ${0})"
BOH=$(head -200 ${0} | grep -n "^# HEADER" | cut -f1 -d:)
EOH=$(head -200 ${0} | grep -n "^# END_OF_HEADER" | cut -f1 -d:)
Help(){
  head -$(($EOH-1)) ${0} | tail -$(($EOH-$BOH-1)) \
    | grep -e "^#$" -e "^# " \
    | sed -e "s/^#=*//g" \
          -e "s/\${SCRIPT_NAME}/${SCRIPT_NAME}/g"
}
#-----------------------------------------------------------------------
# *** to recode file (core function)
#-----------------------------------------------------------------------
__Recode__(){
    # recode to UTF-8
    if [[ `file ${1}` != *" UTF-8"* ]]; then
        case `uname -s` in
            Linux )
                infos=$(file -bi ${1})
                ;;
            Darwin )
                infos=$(file -bI ${1})
                ;;
        esac
        init_encoding=$(echo ${infos} | cut -d= -f2)
        # in case of non-ISO, may be windows-1252/cp1252, a superset of
        # ISO-8859-1 or ISO-8859-15
        if [[ ${infos} = *"unknown-8bit"* ]]; then
            iconv -f cp1252 -t UTF-8 ${1} > ${2}
        else
            iconv -f ${init_encoding} -t UTF-8 ${1} > ${2}
        fi
    else
        cp ${1} ${2}
    fi
    # change MS windows EOL (^M = <C-v><C-m>) to Unix line feed
    if [[ `file ${1}` = *" CRLF line terminators"* ]]; then
        sed -i 's/
//g' ${2}
    fi
    # change Mac OS X EOL to Unix line feed
    if [[ `file ${1}` = *" CR line terminators"* ]]; then
        sed -i 's/\r/\n/g' ${2}
    fi
    # suppress BOM
    if [[ `file ${1}` = *"with BOM"* ]]; then
        tmp=$(__TmpFile__ ${2})
        mv ${2} $tmp
        tail --bytes=+4 $tmp > ${2}
        rm $tmp
    fi
}
#-----------------------------------------------------------------------
# *** to generate a temp file
#-----------------------------------------------------------------------
__TmpFile__(){
    tmp=$1
    while [ -f $tmp ]
    do
        tmp=$tmp".tmp"
    done
    echo $tmp
}
#-----------------------------------------------------------------------
# *** IO prompt
#-----------------------------------------------------------------------
__RecodeIO__(){
    if [ -f $1 ]; then
        if [ -f $2 ]; then
            read -p " File \"$2\" allready exists. Replace? y/[n]  " rep
            case $rep in
                [Yy]* )
                    __Recode__ $1 $2
                    ;;
                * ) exit 1;;
            esac
        else
            __Recode__ $1 $2
        fi
    else
        echo " File \"$1\" doesn't exists."
        exit 1
    fi
}
#-----------------------------------------------------------------------
# *** IO prompt
#-----------------------------------------------------------------------
__InPlaceRecodeIO__(){
    outfile=${1}
    tmpfile=$(__TmpFile__ ${1})
    mv ${1} $tmpfile
    __RecodeIO__ $tmpfile $outfile
    rm $tmpfile
}
#=======================================================================
# *** get options ***
#=======================================================================
infile=""
outfile=""
infiles=()

while [[ ${1} ]]; do
    case "${1}" in
        -h|--help)
            Help
            exit 0 ;;
        -i|--input)
            infile=${2}
            shift
            ;;
        -o|--output)
            outfile=${2}
            shift
            ;;
        *)
            if [ -z $infile ];then
                infiles[${#infiles[@]}]=${1}
            else
                echo "Input file \"$infile\" allready given." >&2
                exit 1
            fi
            ;;
    esac
    if ! shift; then
        echo 'Missing parameter argument.' >&2
        exit 1
    fi
done

nfiles=${#infiles[@]}
if [ ${nfiles} -ge 1 ];then
    if [ -z "${infile}" -a -z "${outfile}" ];then
    # process only if no input file given
        for i in $(seq 0 $(($nfiles -1)) );do
            __InPlaceRecodeIO__ ${infiles[$i]}
        done
    else
        echo 'Incompatible arguments.' >&2
        exit 1
    fi
else
    # only one inputfile given; just check if outfile name given
    if [ -z $outfile ];then
        __InPlaceRecodeIO__ ${infile}
    else
        __RecodeIO__ ${infile} ${outfile}
    fi
fi
#====================================================================eof
# vim: set tw=72 ts=4 sw=4 nu: