#!/bin/bash

# chkconfig: 345 99 01
# description: VictoriaMetrics time series db

### BEGIN INIT INFO
# Provides:       victoriametrics
# Required-Start:
# Required-Stop:
# Default-Start:  2 3 5
# Default-Stop:
# Description:   VictoriaMetrics time series db
### END INIT INFO

# Author: Ulrike Klusik <ulrike.klusik@consol.de>

# Notes for OMD init script requirements
# - Must handle omd config options like daemon enabling/disabling
# - When a daemon is disabled by omd config it needs
#   to return an exit code of 5.
# - The init script must output an exit code of 2 when
#   an unknown param is used.
# - In general the exit code for succeeded actions is
#   0 and for failed actions it is 1.
# - There are exceptions for the exit code handling:
#   - When a service is already stopped and should be
#     restarted/stopped, it should result in an exit code of 0.
#   - When a service is already running and should be started
#     this also should result in an exit code of 0.
# - When a restart is requested and the program is still not running
#   the script should only execute a start
# - When a restart is requested and the program can not be stopped the
#   script should terminate without starting the daemon
# - When a reload is requested and the program is not running
#   the init script should execute a start instead

cd /omd/sites/monitor
. .profile
. lib/omd/init_profile
. etc/omd/site.conf
[ "$CONFIG_VICTORIAMETRICS" = "on" ] || exit 5

mkdir -p /omd/sites/monitor/tmp/victoriametrics

BIN=/omd/sites/monitor//bin/victoria-metrics-prod
USR=monitor

CFGIN_FILE=/omd/sites/monitor/etc/victoriametrics/victoriametrics.conf
CFGTEMP_FILE=/omd/sites/monitor/etc/victoriametrics/victoriametrics.conf_template

LOG_DIR=/omd/sites/monitor/var/victoriametrics
LOGFILE=/omd/sites/monitor/var/victoriametrics/victoriametrics.log
PID_FILE=/omd/sites/monitor/tmp/lock/victoriametrics.lock
DATA_DIR=/omd/sites/monitor/var/victoriametrics/data

mkdir -p ${DATA_DIR}

export LC_ALL=C

if [ ! -f ${CFGIN_FILE} ] ; then
  if [ ! -f ${CFGTEMP_FILE} ] ; then
    echo "Config File template ${CFGTEMP_FILE} does not exist -> Abort"
    exit 8
  fi
  echo "create victoriametrics.conf..."

  # generate random passwords
  #generate_password
  httpAuth_password=$( tr -cd A-Za-z0-9 < /dev/urandom | head -c 16 )
  #generate_password
  snapshotAuthKey=$( tr -cd A-Za-z0-9 < /dev/urandom | head -c 16 )
  sed -e "s/vm_httpAuth_password=.*/vm_httpAuth_password=${httpAuth_password}/; s/vm_snapshotAuthKey=.*/vm_snapshotAuthKey=${snapshotAuthKey}/"  ${CFGTEMP_FILE} > ${CFGIN_FILE}

  echo "created ${CFGIN_FILE} with random passwords, for remote access check this file"

  if [ -f ${OMD_ROOT}/etc/victoriametrics/grafana_datasources.yml ] ; then
    echo "patching password in grafana_datasources.yml"
    sed -e "s/basicAuthPassword: .*/basicAuthPassword: ${httpAuth_password}/; s/basicAuthUser: .*/basicAuthUser: prom/" -i ${OMD_ROOT}/etc/victoriametrics/grafana_datasources.yml
    omd status grafana | grep "grafana.*running" > /dev/null 2>&1
    if [ $? -eq 0 ] ; then
     omd restart grafana
    fi
  fi
fi

# export every parameter thet is set after this line
set -a
# default parameters maybe overwritten by exports from CFGIN_FILE:
# the following parameter cannot be overwritten: 
# - retentionPeriod, httpListenAddr (from omd config) and 
# - storageDataPath (fix)
vm_selfScrapeInstance="$(uname -n)-$(id -un)"
vm_enableTCP6=1
vm_search_maxLookback=5m
vm_loggerLevel=INFO
vm_loggerOutput=stdout
vm_selfScrapeInterval=30s
vm_search_maxPointsPerTimeseries=1000000000
vm_search_maxUniqueTimeseries=1000000000
vm_search_minStalenessInterval=5m

. ${CFGIN_FILE}
if [ $? -gt 0 ] ; then
  exit 8
fi

# base option to identify instance
BASE_OPTIONS=(
    "-envflag.enable"
    "-envflag.prefix=vm_"
    "-httpListenAddr=$CONFIG_VICTORIAMETRICS_TCP_ADDR:$CONFIG_VICTORIAMETRICS_PORT"
    "-storageDataPath=$DATA_DIR"
)

# Retention taken from Config
vm_retentionPeriod="${CONFIG_VICTORIAMETRICS_RETENTION}"

if [ "${CONFIG_VICTORIAMETRICS_MODE}" = "ssl" ] ; then
  if [  -z "$vm_tlsCertFile" ] ; then echo "vm_tlsCertFile missing"; exit 8; fi
  if [  -z "$vm_tlsKeyFile" ] ; then echo "vm_tlsKeyFile missing"; exit 8; fi
  if [ ! -f $vm_tlsCertFile ] ; then echo $vm_tlsCertFile does not exist; exit 8; fi
  if [ ! -f $vm_tlsKeyFile ] ; then echo $vm_tlsKeyFile does not exist; exit 8; fi
  export vm_tls=1
else
  unset vm_tls
  unset vm_tlsCertFile
  unset vm_tlsKeyFile
fi

if [ "$( printf '%s' "${vm_dedup_minScrapeInterval}" | tr -cd 1-9 )" = "" ] ; then
  # export must be removed, otherwise used via env!
  unset vm_dedup_minScrapeInterval
fi

set +a # stop that behavior again

#
rebuild_config() {
  return 0
}

# Fetches the pid of the currently running victoriadb process of the given
# user.
#
# --ppid 1 in ps seem not to filter by direct ppid but by the whole
# parent process tree. So filter by hand again.
#
# It returns 1 when no process can be found and echos the PID while
# returning 0 when a process can be found.
victoriadb_proc() {
    PID=$(pgrep -u $USR -o -fx "$BIN ${BASE_OPTIONS[*]}"  2>/dev/null)
    if [ "$PID" != "" ]; then
        echo "$PID"
        return 0
    else
        return 1
    fi
}

# First try to use the process list to gather a victoriadb process,
# when no process is found via ps take a look at the lock file
#
# It returns 1 when no process can be found and echos the PID while
# returning 0 when a process can be found.
pidof_victoriadb() {
    victoriadb_proc
    return $?
}


verify_config() {
    return 0
    #TODO: check parameters from db1.secrets against help page
}

prep_start() {
    touch $PID_FILE
}

victoriadb_wait_stop() {
    pid=$(pidof_victoriadb) || true
    if ! kill -0 "${pid:-}" >/dev/null 2>&1; then
        echo -n 'Not running. '
        return 0
    fi

    # wait until really stopped.
    # it might happen that victoriadb has a subprocess which
    # is left running and becomes ppid 1 after killing the
    # main victoriadb process. So fetch the process id again
    # multiple times to fetch new processes until all are gone.
    if [ -n "${pid:-}" ]; then
        I=0
        while kill -0 ${pid:-} >/dev/null 2>&1; do
            # Send single kill per process
            kill $pid
            while kill -0 ${pid:-} >/dev/null 2>&1;  do
                if [ $I = '60' ]; then
                    return 1
                else
                    echo -n "."
                    I=$(($I+1))
                    sleep 1
                fi
            done
            # Is there another proc with ppid 1?
            pid=$(pidof_victoriadb | head -n1) || true
        done
    fi

    [ -f "$PID_FILE" ] && rm -f "$PID_FILE"
    return 0

}

victoriadb_wait_start() {
    prep_start
    nohup $BIN "${BASE_OPTIONS[@]}" >>$LOGFILE 2>&1 &
    printf '%d' $! > $PID_FILE
    I=0
    while ! pidof_victoriadb >/dev/null 2>&1;  do
        if [ $I = '10' ]; then
            return 1
        else
            echo -n "."
            I=$(($I+1))
            sleep 1
        fi
    done
    return 0
}

if [ ! -f $BIN ]; then
    echo "Victoriametrics binary $BIN not found. Terminating..."
    exit 1
fi

__init_hook $0 $1 pre
case "$1" in
    start)
        echo -n "Starting victoriametrics..."
        if pidof_victoriadb >/dev/null 2>&1; then
            echo 'Already running.'
            exit 1
        fi

        if ! verify_config quiet; then
            exit 1
        fi

        if victoriadb_wait_start; then
            echo 'OK'
            __init_hook $0 $1 post 0
            exit 0
        else
            echo 'ERROR'
            __init_hook $0 $1 post 1
            exit 1
        fi
    ;;
    stop)
        echo -n "Stopping victoriametrics..."
        if victoriadb_wait_stop; then
            echo 'OK'
             __init_hook $0 $1 post 0
            exit 0
        else
            echo 'ERROR'
             __init_hook $0 $1 post 1
            exit 1
        fi
    ;;
    check|checkconfig)
        if ! verify_config; then
            exit 1
        fi
        exit 0
    ;;
    status)
        PID=$(pidof_victoriadb 2>&1) || true
        if kill -0 "${PID:-}" >/dev/null 2>&1; then
            echo "Running ($PID)."
            exit 0
        else
            echo 'Not running. '
            exit 1
        fi
    ;;
    restart)
        if ! verify_config quiet; then
            exit 1
        fi

        $0 stop || (echo "Unable to stop Victoriametrics. Terminating..." && exit 1)
        echo -n "Starting Victoriametrics ..."
        if victoriadb_wait_start; then
            echo 'OK'
            exit 0
        else
            echo 'ERROR'
            exit 1
        fi
    ;;

    check)
    ;;

    *)
        echo "Usage: victoriametrics {start|stop|restart|status|checkconfig}"
        exit 2
    ;;
esac

# EOF

