#!/bin/bash

###########################################################
## Copyright (c) 2002-2023 Alexey Kuryakin daqgroup@mail.ru
###########################################################

###########################################################
## utility to call fixutf8 . with confirmation using zenity
## options:
##  -t --test    test mode
##  -l --list f  take list of files from file f
###########################################################

###########################################################
source $(crwkit which crwlib_base.sh); # Use base library #
###########################################################

function print_version(){
 echo "$scriptname version 1.0";
};
function print_copyright(){
 echo "Copyright (c) 2002-2023 Alexey Kuryakin daqgroup@mail.ru";
};
function print_help(){
 print_version;
 print_copyright;
 echo "about:";
 echo " $scriptname is utility to fix (validate) text to UTF8.";
 echo " Zenity based dialog uses to choose files to be fixed.";
 echo " $scriptname converts a text files to UTF8.";
 echo "usage:";
 echo " $scriptname [options] [parameters]";
 echo "options:";
 echo " --version   - print version";
 echo " -h,--help   - print help screen";
 echo " -w,--wait n - if nonzero, wait n seconds on exit";
 echo " -t,--test   - run in test mode, no file conversion";
 echo " -l,--list f - use file f with list of files to fix";
 echo "parameters:";
 echo " text file(s) to fix UTF8";
 echo " if '-' or no parameters then uses stdin/stdout";
 echo "examples:";
 echo " $scriptname -h";
 echo " $scriptname *.txt";
 echo " $scriptname -t -w 60 test.txt";
};

function split(){
 local line="";
 while read -r line || [[ -n "$line" ]]; do
  local w1="$(echo "$line" | cut -d ' ' -f 1)";
  local w2="$(echo "$line" | cut -d ' ' -f 2)";
  if [ "$w1" = "fixutf8:" ]; then
   case $w2 in
    passed|fixed)         echo "$line"; ;;
    missed|failed|faulty) echo "$line" 1>&2; ;;
    *)                    ;;
   esac;
  fi;
 done;
};

function check_iconv(){
 if [ -z "$(which iconv)" ]; then
  fatal 1 "$scriptname: iconv not found";
 fi;
};

function check_enca(){
 if [ -z "$(which enca)" ]; then
  fatal 1 "$scriptname: enca not found";
 fi;
};

function check_enconv(){
 if [ -z "$(which enconv)" ]; then
  fatal 1 "$scriptname: enconv not found";
 fi;
};

function check_fixutf8(){
 if [ -z "$(unix which fixutf8)" ]; then
  fatal 1 "$scriptname: fixutf8 not found";
 fi;
};

function detect_charset(){
 if [ -n "$1" ]; then
  local info="$(enca $1 | grep 'code page')";
  local charset="$(extract_last_word $info)";
  if [ -z "$charset" ]; then charset="NONE"; fi;
  echo "$charset";
 fi;
};

function list_iconv_cp(){
 iconv -l | sed 's|//||';
};

function list_iconv_check_items(){
 local cp="${1:-CP1251}";
 for item in $(list_iconv_cp | tac | xargs); do
  local flag="FALSE";
  if [[ "$item" = "$cp" ]]; then flag="TRUE"; fi;
  echo "$flag $item";
 done;
};

function is_valid_codepage(){
 if [[ -n $1 ]] && [[ $(list_iconv_cp | grep -i -P "^$1$" | wc -l) -gt 0 ]]; then return 0; fi;
 return 1;
};

function report_summ_pass_fail(){
 local nsumm="$1"; local npass="$2"; local nfail="$3";
 if is_number "$nsumm" && is_number "$npass" && is_number "$nfail"; then
  local optsumm=""; local optpass=""; local optfail="";
  if [[ -t 1 ]]; then
   optsumm="--bg-black --bold --white-intense";
   optpass="--bg-black --bold --green-intense";
   optfail="--bg-black --bold --green-intense";
   if [[ $npass -eq 0 ]]; then optpass="--bg-black --cyan-intense"; fi;
   if [[ $nfail -gt 0 ]]; then optfail="--bg-black --bold --red-intense"; fi;
  fi;
  unix ansi $optsumm -n "$nsumm $(langstr ru "файл(ов) обработал" en "file(s) processed"), ";
  unix ansi $optpass -n "$npass $(langstr ru "успешно" en "succeed"), ";
  unix ansi $optfail    "$nfail $(langstr ru "неудачно" en "failed").";
 else
  fatal 1 "Error: invalid result.";
 fi;
};

function zen_confirm_charset(){
 local cs="$1"; local fn="$2";
 if [[ -z $cs ]] || [[ -z $fn ]]; then return 1; fi;
 local tit="$(langstr ru "Подтверждение исходной КОДИРОВКИ файла" en "Confirm original file CHARSET") …";
 local txt="$(langstr ru "<b><big>Подтверждаете исходную КОДИРОВКУ файла <span color='green'>$cs</span>?</big><small>\n\nФайл:\n\n$fn\n</small></b>" \
                      en "<b><big>Confirm the original file CODEPAGE <span color='green'>$cs</span>?</big><small>\n\nFile:\n\n$fn\n</small></b>")";
 zenity --question --modal --timeout 180 --default-cancel --ellipsize --title "$tit" --text "$txt" 2>/dev/null;
};

function zen_choose_charset(){
 local info="";
 if [[ -e $1 ]]; then
  local fn="$(file "$1" | sed 's/:.*//')";
  local ft="$(file "$1" | sed 's/.*:\s*//')";
  info="\n\n<small><b><span color='blue'>$fn</span></b>:\n<span color='green'>$ft</span></small>\n";
 fi;
 zenity --title "$scriptname: $(langstr ru "Выбрать исходную КОДИРОВКУ файла для преобразования в" en "Choose original file CHARSET for encoding to") UTF-8 …" \
        --list --modal --timeout 180 --width 680 --height 600 \
        --text "<big><b>$(langstr ru "Пожалуйста выбирете <span color='blue'>исходную КОДИРОВКУ</span> файла:" en "Please select <span color='blue'>original CHARSET</span> of file:")</b></big>$info" \
        --radiolist --column "$(langstr ru Выбор en Choice)" --column "$(langstr ru "КОДИРОВКА символов" en "CHARSET/CodePage")" \
        --separator " " --print-column 2 \
        $(list_iconv_check_items WINDOWS-1251) \
        2>/dev/null;
};

function zen_choose_action(){
 zenity --title "$scriptname: $(langstr ru "Проверить/Исправить кодировку" en "Check or Fix encoding ") UTF-8 …" \
        --list --modal --timeout 180 --width 680 --height 230 \
        --text "<big><b>$(langstr ru "Пожалуйста выбирете Действие для выполнения:" en "Please select Action you want to execute:")</b></big>" \
        --radiolist --column "$(langstr ru Выбор en Choice)" --column "$(langstr ru Действие en Action)"  --column "$(langstr ru Комментарий en Comment)" \
        --separator " " --print-column 2 \
        "TRUE"  "test"    "$(langstr ru "Проверить кодировку текста" en "Verify (test)  text encoding") (UTF-8)" \
        "FALSE" "fix"     "$(langstr ru "Исправить кодировку текста" en "Fix (validate) text encoding") (UTF-8)" \
        "FALSE" "check"   "$(langstr ru "Проверить кодировку текста" en "Verify (check) text encoding") (UTF-8) $(langstr ru "рекурсивно" en "recursive")" \
        "FALSE" "checkv4" "$(langstr ru "Проверить кодировку текста" en "Verify (check) text encoding") (UTF-8) $(langstr ru "рекурсивно+подробно" en "recursive+verbose")" \
        "FALSE" "checkv7" "$(langstr ru "Проверить кодировку текста" en "Verify (check) text encoding") (UTF-8) $(langstr ru "рекурсивно+более_подробно" en "recursive+more_verbose")" \
        2>/dev/null;
};

function zenselection(){
 local list="";
 if [ -z "$*" ]; then return; fi;
 for item in $(unix fixutf8 -t "$@" 2>&1 | grep 'fixutf8: faulty' | cut -d ' ' -f 4); do
  local check="false"; local codepage="$(detect_charset $item)";
  if is_number $codepage; then check="true"; codepage="CP$codepage"; fi;
  list="$list $check $codepage $item";
 done;
 if [ -z "$list" ]; then return; fi;
 zenity --title "$scriptname: $(langstr ru "выбрать файлы для исправления" en "select files to fix") UTF8 …" \
        --list --modal --timeout 120 --width 800 --height 600 \
        --text "<big><b>$(langstr ru "Выбирайте файлы для исправления" en "Check files to fix") UTF8:</b></big>" \
        --checklist --column "$(langstr ru Выбор en Check)" --column "CodePage" --column "$(langstr ru "Файлы для исправления" en "Files to fix") UTF8" \
        --separator " " --print-column 3 $list 2>/dev/null;
};

function run_fixutf8_work(){
 if [ -z "$fixutf8_charset" ]; then return 1; fi;
 unix fixutf8 --charset $fixutf8_charset "$@" 2>&1 | split;
 #notify-send -t 30000 "unix fixutf8 --charset $fixutf8_charset $*";
};

function run_fixutf8_test(){
 unix fixutf8 -t "$@" 2>&1 | split;
};

function list_only_text_files(){
 if [ -n "$*" ]; then
  file $* | grep -i -e ' text' | cut -d ':' -f 1 2>/dev/null;
 fi;
};

 # DoubleCommander (DC) creates temporary file list in /tmp/_dc~~~/,
 # so we need to enable file write access for any users to avoid
 # access deny problems when running several DC instances.
readonly tmp_dc_dir="/tmp/_dc~~~";
function validate_dc_temp_dir(){
 local dir="$1";
 if [ -z "$COMMANDER_PATH" ]; then return; fi;
 if [ "$dir" = "$tmp_dc_dir" ] && [ -d "$dir" ]; then
  local mode="$(oct_file_mode $dir)";
  if [ "$mode" != "777" ]; then
   echo "sudo -n chmod 777 $dir";
   sudo -n chmod 777 $dir;
  fi;
 fi;
};
function fix_tmp_dc(){
 if [ -z "$COMMANDER_PATH" ]; then return; fi;
 if is_iam_root && is_iam_sudo; then
  validate_dc_temp_dir $tmp_dc_dir;
 fi;
 if is_iam_root; then
  validate_dc_temp_dir $tmp_dc_dir;
 fi;
};

function no_text_files_found_message(){
 echo "$scriptname received $(echo $items | wc -w) file(s) to check/fix UTF8." 1>&2;
 echo "But was NOT found any TEXT file(s) to process." 1>&2;
 echo "All files are binary,directory,link,device etc" 1>&2;
};

function welcome_doublecmd(){
 if [ -z "$COMMANDER_PATH" ]; then return; fi;
 echo "Caller information:";
 echo "scriptfile=$scriptfile";
 echo "scriptFILE=$scriptFILE";
 echo "CmdLine=$0 $*"; echo "PWD=$PWD";
 echo "COMMANDER_PATH=$COMMANDER_PATH";
 fix_tmp_dc; echo "";
};

export fixutf8_charset="UTF8";

function main(){
 welcome_doublecmd "$@";
 check_iconv;
 check_fixutf8;
 local test=0;
 local wait=0;
 local check=0;
 local list="";
 local files="";
 local items="";
 local optck="";
 while is_option $1; do
  case $1 in
   -t|--test)  test=1; ;;
   -c|--check) check=1; ;;
   -l|--list)  list=$2; shift; fix_tmp_dc; ;;
   -w|--wait)  wait=$2; shift; ;;
   -h|--help)  print_help; return 0; ;;
   --version)  print_version; return 0; ;;
   -*)         fatal 1 "$scriptname: invalid option $1"; ;;
   *)          ;;
  esac;
  shift;
 done;
 if [ "$test" = "0" ] && [ "$check" = "0" ]; then
  local act="$(zen_choose_action)";
  case $act in
   test)   test=1; ;;
   fix)    test=0; ;;
   check)  check=1; ;;
   checkv4) check=1; optck="-v4"; ;;
   checkv7) check=1; optck="-v7"; ;;
   *) cancel 0 "$scriptname: $(langstr ru Отмена en Cancel)."; ;;
  esac;
 fi;
 files="$*";
 if [ -n "$list" ] && [ -e "$list" ]; then
  validate_dc_temp_dir $(dirname $list);
  files="$files $(cat $list)";
 fi;
 # trim list of files
 files="$(echo "$files")";
 items="$(echo "$files")";
 if [ -z "$files" ]; then
  echo "$scriptname: no agruments. use $scriptname -h for help." 1>&2;
  if [ "$wait" = "0" ]; then return; fi;
 fi;
 if [ "$check" = "1" ]; then
  colorize_head echo "$(langstr ru "Проверка кодировки текста (UTF8) рекурсивно:" en "Check text encoding validity (UTF8):")";
  colorize_head echo "#########################################################";
  unix chkutf8 $optck $files;
  colorize_head echo "#########################################################";
  if [ "$wait" = "0" ]; then return; fi;
  wait_any_key $wait; echo "$scriptname: done.";
  return;
 fi;
 # skip all non-text files
 files="$(list_only_text_files $files)";
 if [ -n "$files" ]; then
  if [ $test = 1 ]; then
   colorize_head echo "$(langstr ru "Проверка кодировки текста (UTF8):" en "Check text encoding validity (UTF8):")";
   colorize_head echo "##############################################";
   colorize_bold run_fixutf8_test $files;
   local npass="$(run_fixutf8_test $files 2>/dev/null | wc -l)";
   local nsumm="$(run_fixutf8_test $files 2>&1        | wc -l)";
   local nfail=0; let nfail=$nsumm-$npass;
   colorize_head echo "##############################################";
   report_summ_pass_fail "$nsumm" "$npass" "$nfail";
  else
   check_enca;
   colorize_head echo "$(langstr ru "Проверка кодировки текста (UTF8):" en "Check text encoding validity (UTF8):")";
   colorize_head echo "##############################################";
   colorize_bold run_fixutf8_test $files;
   colorize_head echo "$(langstr ru "Правка кодировки текста (UTF8):" en "Fix (validate) text encoding (UTF8):")";
   colorize_head echo "##############################################";
   files="$(zenselection $files)";
   if [ -z "$files" ]; then
    colorize_bold echo_to_stderr -ne "\n\n$(langstr ru "Нет файлов для обработки." en "No files to process.")\n\n";
    wait_any_key $wait; echo "$scriptname: done.";
    return;
   fi;
   for file in $files; do
    local codepage="$(detect_charset $file)";
    if is_number $codepage; then
     codepage="CP$codepage";
    else
     local cp="$(zen_choose_charset $file)";
     if [[ -n $cp ]]; then codepage="$cp"; else continue; fi;
     if zen_confirm_charset "$codepage" "$file"; then
      echo "$(langstr ru "Выбрана КОДИРОВКА" en "Selected CHARSET is") $codepage $(langstr ru "для" en "for") $file";
     else
      continue;
     fi;
    fi;
    if is_valid_codepage "$codepage"; then
     export fixutf8_charset="$codepage";
     colorize_bold run_fixutf8_work $file;
    else
     colorize_bold echo_to_stderr "Error: invalid CodePage $codepage";
    fi;
   done;
   local npass="$(run_fixutf8_test $files 2>/dev/null | wc -l)";
   local nsumm="$(run_fixutf8_test $files 2>&1        | wc -l)";
   local nfail=0; let nfail=$nsumm-$npass;
   colorize_head echo "##############################################";
   report_summ_pass_fail "$nsumm" "$npass" "$nfail";
  fi;
 else
  colorize_head echo "##############################################";
  colorize_bold no_text_files_found_message;
  colorize_head echo "##############################################";
 fi;
 # wait press enter or timeout
 if [ "$wait" = "0" ]; then return; fi;
 wait_any_key $wait; echo "$scriptname: done.";
};

main "$@";

##############
## END OF FILE
##############
