#!/bin/bash

###########################################################
## Copyright (c) 2002-2023 Alexey Kuryakin daqgroup@mail.ru
###########################################################

###########################################################
## fixutf8 -e e f => fix encoding (e) of files (f) to UTF-8
## checks files before conversion, convert only files which
## really needed to be fixed and keep other files unchanged
## option -e e specify original encoding (e) of given files
###########################################################

function selfname(){
 echo "$(basename ${BASH_SOURCE[0]})";
};
function print_version(){
 echo "$(selfname) version 1.0";
};
function print_copyright(){
 echo "Copyright (c) 2002-2023 Alexey Kuryakin daqgroup@mail.ru";
};
function print_help(){
 print_version;
 print_copyright;
 echo "about:";
 echo " $(selfname) is utility to fix (validate) text encoding to UTF8.";
 echo " $(selfname) converts a text files with given encoding to UTF8.";
 echo " $(selfname) also can check validity of files UTF8 encoding.";
 echo "usage:";
 echo " $(selfname) [options] [parameters]";
 echo "options:";
 echo " --version        - print version";
 echo " -h,--help        - print help screen";
 echo " -t,--test        - run in test mode, no file conversion";
 echo " -e,--encoding  e - set original file encoding to convert";
 echo " -cs,--charset  c - set original file encoding to convert";
 echo " -cp,--copepage p - set original file encoding to convert";
 echo "parameters:";
 echo " text file(s) to fix encoding to UTF8";
 echo " if '-' or no parameters then uses stdin/stdout";
 echo "examples:";
 echo " $(selfname) -h";
 echo " $(selfname) -t test.txt";
 echo " $(selfname) -cp 1251 *.txt";
 echo " $(selfname) -e CP1251 *.txt";
 echo " cat test.txt | $(selfname) -e CP1251 > fixed.txt";
};

# detect location of script (with symlink resolved)

readonly scriptfile="${BASH_SOURCE[0]}";          # caller location of script
readonly scriptFILE="$(realpath $scriptfile)";    # physic location of script
readonly scriptHOME="$(dirname  $scriptFILE)";    # physic location of folder

function is_number(){
 case $1 in
  ''|*[!0-9]*)  return 1; ;;
  *)            return 0; ;;
 esac;
}; 

function check_iconv(){
 if [ -z "$(which iconv)" ]; then
  1>&2 echo "$(selfname): iconv not found";
  exit 1;
 fi;
};

# octal file mode like 755
function oct_file_mode(){
 if [ -n "$1" ] && [ -e "$1" ]; then
  stat -c "%a %n" $1 | cut -d ' ' -f 1;
 fi;
};

# for each file check and call converter if needed

function main(){
 check_iconv;
 local code=0; # exit code
 local test=0; # test mode
 local from="UTF8"; # source file encoding
 local temp="$HOME/.local/share/daqgroup/$(selfname)";
 case $1 in
  -t|--test) test=1; shift; ;;
  -h|--help) print_help; return 0; ;;
  --version) print_version; return 0; ;;
  -e|--encoding) from="$2"; shift; shift; ;;
  -cs|--charset) from="$2"; shift; shift; ;;
  -cp|--codepage) from="$2"; shift; shift; ;;
  -*)        1>&2 echo "$(selfname): invalid option $1"; exit 1; ;;
  *)         ;;
 esac;
 if [ $# = 0 ] || [ "$1" = "-" ]; then
  exec iconv -f $from -t UTF8;
  return $?;
 fi;
 if [ ! -e "$temp/" ]; then
  mkdir -p $temp;
 fi;
 if is_number $from; then
  from="CP$from";
 fi;
 if [ -z "$(iconv -l | grep -i "$from//")" ]; then
  1>&2 echo "$(selfname): unknown encoding $from";
  exit 1;
 fi;
 for file in "$@"; do
  if [ -e $file ]; then
   if [ -d $file ]; then continue; fi; # skip directories
   if [ -L $file ]; then continue; fi; # skip symbolic links
   if [ ! -f $file ]; then continue; fi; # only regular files
   local diff="$(iconv -c -f UTF8 -t UTF8 < $file | cmp - $file 2>/dev/null | head)";
   if [ -n "$diff" ]; then
    if [ $test = 1 ]; then
     echo "$(selfname): faulty - $file";
     code=1;
     continue;
    fi;
    local file_tmp="$temp/$(basename $file).iconv.tmp";
    rm -f $file_tmp;
    local mode="$(oct_file_mode $file)";
    if iconv -f $from -t UTF8 $file > $file_tmp 2>/dev/null && cp -f $file_tmp $file 2>/dev/null; then
     echo "$(selfname): fixed  - $file";
     chmod -f $mode $file;
    else
     1>&2 echo "$(selfname): failed - $file";
     code=1;
    fi;
    rm -f $file_tmp;
   else
    1>&2 echo "$(selfname): passed - $file";
   fi;
  else
   1>&2 echo "$(selfname): missed - $file";
   code=1;
  fi;
 done;
 return $code;
};

main "$@";

##############
## END OF FILE
##############
