CentOS: cleanup mysql DB, generate report

I want to share my solution for persistence DB cleanup and report generation.

You can configure the defaults in “dbTools.conf”.
And you can change some of the configuration values via BasicUI (If you create the items and sitemap).
If there are no items, the defaults from the file “dbTools.conf” will be used.

You also need “jq - Command-line JSON processor” installed on your server.
-> yum install jq

I execute the script daily via crontab rule.
At least one entry remains in the DB, also if it is older than the defined cleanup value.

Tested on CentOS 7.7

/etc/openhab2/items/dbtools.items

/************************************************** Gruppen ********************************************/
//--> persist elements
Group		gPersist

/************************************************** Items ********************************************/
Switch		dbCleanupSwitch
			"Housekeeping DB"
			<switch>
			(gPersist)

Switch		dbReportSwitch
			"Erstelle DB Report"
			<switch>
			(gPersist)

Number		dbCleanupRetention
			"Lösche Elemente älter als [%d Tage]"
			<text>
			(gPersist)

Number		dbReportNumber
			"Behalte Anzahl Reports [%d]"
			<text>
			(gPersist)

String		dbToolsSize
			"DB Grösse [%s]"
			<text>
			(gPersist)

String		dbReportTop1
			"Top 1 Table [%s]"
			<text>
			(gPersist)

String		dbReportTop2
			"Top 2 Table [%s]"
			<text>
			(gPersist)

String		dbReportTop3
			"Top 3 Table [%s]"
			<text>
			(gPersist)

String		dbReportTop4
			"Top 4 Table [%s]"
			<text>
			(gPersist)

/etc/openhab2/sitemap/dbTools.sitemap

sitemap dbTools label="dbTools" {
	Frame label="Systeminfo" {
    	Text item=dbToolsSize label="Grösse DB" {
        	Default	  item=dbToolsSize
			Text	  item=EmptyValue
			Switch	  item=dbCleanupSwitch
			Setpoint  item=dbCleanupRetention minValue=1 maxValue=5000 step=1
			Switch	  item=dbReportSwitch
			Setpoint  item=dbReportNumber minValue=1 maxValue=1000 step=1
        	Default	  item=dbReportTop1
        	Default   item=dbReportTop2
        	Default   item=dbReportTop3
        	Default   item=dbReportTop4
		}
	}
}

/etc/openhab2/rules/dbTools.rules

// dbTools.sh
rule "dbTools script"
when 
    Time cron "15 5 0 * * ?"
then
    logInfo("RULES","START dbTools script")
    executeCommandLine("/etc/openhab2/scripts/dbTools.sh")
end

/etc/openhab2/scripts/dbTools.sh

#!/bin/bash
#
# dbTools.sh
# Remo Martinelli
#
#-----------#
# FUNCTIONS #
#-----------#
# read db.conf
config () {
  if [[ -f /etc/openhab2/scripts/dbTools.conf ]]
  then
    # source config file
    . /etc/openhab2/scripts/dbTools.conf
    # get defaults
    database=$database
    databaseUser=$databaseUser
    databasePassword=$databasePassword
    databaseTop=$databaseTop
    openhabAPIPort=$openhabAPIPort
    report=$report
    reportNumber=$reportNumber
    reportOutput=$reportOutput
    cleanup=$cleanup
    cleanupRetention=$cleanupRetention
    log=$log
    logPath=$logPath
    logLength=$logLength
  else
    echo "ERROR: Input file not found."
    exit 2
  fi
}

log () {
  if [[ ${log}=YES ]]
  then
    echo "$(date +%Y-%m-%d\ %H:%M:%S) $1" >> ${logFile}
  fi
}

#------#
# MAIN #
#------#
# read config
config

# logfile output
logFile=${logPath}dbTools.log
log "INFO:    Start dbTools script."

##################################################
# get values from OpenHAB items via REST API
##################################################
log "INFO:    Get values from OpenHAB via REST API."
# item dbCleanupSwtch
RESPONSE=`curl -s --request GET -H "Content-Type:application/json" http://localhost:"${openhabAPIPort}"/rest/items/dbCleanupSwitch/state`
if [[ ${RESPONSE} != *"does not exist!"* && ${RESPONSE} != *"Error"* && ${RESPONSE} != "NULL" && ! -z ${RESPONSE} ]]
then
  cleanup=${RESPONSE}
  log "INFO:    Get value for 'cleanup' (${cleanup}) from OpenHAB via REST API."
else
  log "INFO:    Use default value for 'cleanup' (${cleanup})."
fi

# item dbCleanupRetention
RESPONSE=`curl -s --request GET -H "Content-Type:application/json" http://localhost:"${openhabAPIPort}"/rest/items/dbCleanupRetention/state`
if [[ ${RESPONSE} != *"does not exist!"* && ${RESPONSE} != *"Error"* && ${RESPONSE} != "NULL" && ! -z ${RESPONSE} ]]
then
  cleanupRetention=`echo ${RESPONSE} | cut -f1 -d"."`
  log "INFO:    Get value for 'cleanupRetention' (${cleanupRetention}) from OpenHAB via REST API."
else
  log "INFO:    Use default value for 'cleanupRetention' (${cleanupRetention})."
fi

# item dbReportSwicth
RESPONSE=`curl -s --request GET -H "Content-Type:application/json" http://localhost:"${openhabAPIPort}"/rest/items/dbReportSwitch/state`
if [[ ${RESPONSE} != *"does not exist!"* && ${RESPONSE} != *"Error"* && ${RESPONSE} != "NULL" && ! -z ${RESPONSE} ]]
then
  report=${RESPONSE}
  log "INFO:    Get value for 'report' (${report}) from OpenHAB via REST API."
else
  log "INFO:    Use default value for 'report' (${report})."
fi

# item dbReportNumber
RESPONSE=`curl -s --request GET -H "Content-Type:application/json" http://localhost:"${openhabAPIPort}"/rest/items/dbReportNumber/state`
if [[ ${RESPONSE} != *"does not exist!"* && ${RESPONSE} != *"Error"* && ${RESPONSE} != "NULL" && ! -z ${RESPONSE} ]]
then
  reportNumber=`echo ${RESPONSE} | cut -f1 -d"."`
  log "INFO:    Get value for 'reportNumber' (${reportNumber}) from OpenHAB via REST API."
else
  log "INFO:    Use default value for 'reportNumber' (${reportNumber})."
fi

##################################################
# get db size and biggest tables
##################################################
log "INFO:    Get db size and biggest tables."
SIZE=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT SUM( data_length + index_length)/1024/1024 "Database Size (MB)"  FROM information_schema.TABLES where table_schema = "'${database}'";'`
SIZE=$(printf '%.2f\n' ${SIZE})
# dbToolsSize
log "INFO:    Db size is ${SIZE} MB."
RESPONSE=`curl -X POST --silent --output /dev/null -H "Content-Type: text/plain" -H "Accept:application/json" -d "${SIZE} MB" http://localhost:"${openhabAPIPort}"/rest/items/dbToolsSize`

# dbReportTop1, dbReportTop2, dbReportTop...
ITEMS=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT table_name, round( (data_length + index_length) / 1024 / 1024, 2)  as total_size FROM information_schema.tables WHERE table_schema not in ("information_schema", "mysql","performance_schema" ,"sys") AND table_type = "BASE TABLE" AND table_schema = "'${database}'" ORDER BY total_size desc limit '${databaseTop}';'`
STRINGARRAY=($ITEMS)

INITIAL=1
STEP=2
COUNT=${databaseTop}

for ((i=0;i<${COUNT};i++))
do
 let VALUE=$((${STEP}*${i}))
 let VALUE1=$((${i}+1))
  log "INFO:    Table ${STRINGARRAY[VALUE]} [${STRINGARRAY[VALUE+1]} MB]."
  RESPONSE=`curl -X POST --silent --output /dev/null -H "Content-Type: text/plain" -H "Accept:application/json" -d "${STRINGARRAY[VALUE]} [${STRINGARRAY[VALUE+1]} MB]" http://localhost:"${openhabAPIPort}"/rest/items/dbReportTop"${VALUE1}"`
done

##################################################
# create report
##################################################
if [[ $report == "ON" ]]
then
  log "INFO:    Create report."
  FILENAME=${reportOutput}"OpenHAB_Export_"$(date +%Y-%m-%d\_%H-%M-%S)".csv"
  echo "itemname;tablename;size[MB];numberOfEntries;oldestEntry;newestEntry;type;label;groups;state" > ${FILENAME}
  # entries
  # itemname; tablename;      size;  numberOfEntries;           oldestEntry;           newestEntry;     type;      label;     group;    state;
  # ItemName; item0123;        1.0;              123;   2018-04-12 08:14:45;   2019-12-31 17:03:33;   Switch;  ItemLabel;  gPersist;       ON;
  # ItemName; not_persisted;     -;                -;                     -;                     -; 
  ITEMS=`curl -s --request GET -H "Content-Type:application/json" "http://localhost:"${openhabAPIPort}"/rest/items?recursive=true"`
  NUMBEROFITEMS=`echo ${ITEMS} | jq '. | length'`

  # get all items from db
  ITEMSDB="- "`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT itemname,itemId FROM '${database}'.items;'`" -"
  # loop through the items
  for((i=0;i<${NUMBEROFITEMS};++i))
  do
    SELECTEDITEM=`echo ${ITEMS} | jq '.['${i}']'`
    SELECTEDITEMNAME=`echo ${SELECTEDITEM} | jq '.name' | sed 's/^"\(.*\)"$/\1/'`
    STATE=`echo ${SELECTEDITEM} | jq '.state' | sed 's/^"\(.*\)"$/\1/'`
    TYPE=`echo ${SELECTEDITEM} | jq '.type' | sed 's/^"\(.*\)"$/\1/'`
    LABEL=`echo ${SELECTEDITEM} | jq '.label' | sed 's/^"\(.*\)"$/\1/'`
    GROUP=`echo "${SELECTEDITEM}"| jq '.groupNames[]'| sed 's/^"\(.*\)"$/\1/' | sed ':a;N;$!ba;s/\n/,/g'`

    if [[ ${STATE} == *"data:image"* ]]
    then
      STATE="*imageData*"
    fi

    # check if item is persisted
    if [[ `echo ${ITEMSDB} | grep " ${SELECTEDITEMNAME} "` ]]
    then
      ITEMID=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT itemId FROM '${database}'.items WHERE itemname LIKE "'${SELECTEDITEMNAME}'";'`
      ITEMIDFULL=`printf "item%04d\n" ${ITEMID}`
      OLDEST=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT time FROM '${database}'.'${ITEMIDFULL}' ORDER BY time ASC LIMIT 1;'`
      NEWEST=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT time FROM '${database}'.'${ITEMIDFULL}' ORDER BY time DESC LIMIT 1;'`
      COUNT=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT COUNT(*) FROM '${database}'.'${ITEMIDFULL}';'`
      SIZE=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT SUM( data_length + index_length)/1024/1024 FROM information_schema.TABLES where table_schema = "'${database}'" AND TABLE_NAME = "'${ITEMIDFULL}'";'`
      echo "${SELECTEDITEMNAME};${ITEMIDFULL};${SIZE};${COUNT};${OLDEST};${NEWEST};${TYPE};${LABEL};${GROUP};${STATE}" >> ${FILENAME}
    else
      echo "${SELECTEDITEMNAME};;;;;;${TYPE};${LABEL};${GROUP};${STATE}" >> ${FILENAME}
    fi
  done
  # remove oldest reports
  let reportNumber=${reportNumber}-1
  find ${reportOutput} -name "OpenHAB_Export*" -mtime +${reportNumber} -exec rm -Rf {} \;
else
  log "INFO:    Create report is disabled."
fi

##################################################
# cleanup db
##################################################
if [[ $cleanup == "ON" ]]
then
  log "INFO:    Cleanup db table rows older than $cleanupRetention days."
  # get all items from db
  ITEMSDB=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT itemId FROM '${database}'.items;'`
  # loop through the items
  for SELECTEDITEMDB in ${ITEMSDB}
  do
    SELECTEDITEMDBFULL=`printf "item%04d\n" ${SELECTEDITEMDB}`
    # check if there are rows to delete
    COUNTITEMSTABLE=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT COUNT(*) FROM '${database}'.'${SELECTEDITEMDBFULL}';'`
	if [[ ${COUNTITEMSTABLE} -eq 0 ]]
    then
      log "INFO:    Table ${SELECTEDITEMDBFULL} has no entries -> skip it."
    else
	    # check if there are rows in the table between delete date and now
      COUNTITEMSKEEP=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT COUNT(*) FROM '${database}'.'${SELECTEDITEMDBFULL}' WHERE time > NOW() - INTERVAL '${cleanupRetention}' DAY;'`
      if [[ ${COUNTITEMSKEEP} -eq 0 ]]
      then
        # evaluate latest timestamp if latest row is older than retention time
        LATESTITEMDATE=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT time FROM '${database}'.'${SELECTEDITEMDBFULL}' ORDER BY time DESC LIMIT 1;'`
        let REALCLEANUPRETENTION=$((($(date +%s)-$(date +%s --date "${LATESTITEMDATE}"))))
      else
	      let REALCLEANUPRETENTION=$((${cleanupRetention}*3600*24))
      fi
      # finally delete rows in table with adjusted retention date
      let REALCLEANUPRETENTIONDAYS=$((${REALCLEANUPRETENTION}/(3600*24)))
      COUNTITEMSDELETE=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'SELECT COUNT(*) FROM '${database}'.'${SELECTEDITEMDBFULL}' WHERE time < NOW() - INTERVAL '${REALCLEANUPRETENTION}' SECOND;'`
      if [[ ${COUNTITEMSDELETE} -gt 0 ]]
      then
        log "INFO:    Table ${SELECTEDITEMDBFULL} has ${COUNTITEMSKEEP} rows younger than ${cleanupRetention} days."
        if [[ ${COUNTITEMSKEEP} -eq 0 ]]
        then
          log "INFO:    Table ${SELECTEDITEMDBFULL} youngest entry is from ${LATESTITEMDATE}."
        fi
        log "INFO:    Table ${SELECTEDITEMDBFULL} delete entries older than ${REALCLEANUPRETENTIONDAYS} days."
        log "INFO:    Table ${SELECTEDITEMDBFULL} delete ${COUNTITEMSDELETE} rows."
        DELETEITEMS=`mysql -u${databaseUser} -p${databasePassword} -s -N -e 'DELETE FROM '${database}'.'${SELECTEDITEMDBFULL}' WHERE time < NOW() - INTERVAL '${REALCLEANUPRETENTION}' SECOND;'`
      fi
    fi	
  done
  log "INFO:    Cleanup db table rows done."
else
  log "INFO:    Cleanup db disabled."
fi

##################################################
# maintain logfile
##################################################
if [[ $log == "YES" ]]
then
  log "INFO:    Trim logfile."
  let logLength=${logLength}-1
  echo "$(tail -n ${logLength} ${logFile})" > ${logFile}
fi

log "INFO:    End dbTools script."

exit

/etc/openhab2/scripts/dbTools.conf

# input for dbTools.sh
# put it in /etc/openhab2/scripts/dbCleanup.conf
#####################################################
# databse settings
#####################################################
database=enter_database _name_here
databaseUser=database_user
databasePassword=database_user_password
databaseTop=4
openhabAPIPort=8080
#####################################################
# report defaults
#####################################################
report=OFF
reportNumber=100
reportOutput=/etc/openhab2/data/
#####################################################
# cleanup defaults
#####################################################
cleanup=OFF
cleanupRetention=5000
#####################################################
# log
#####################################################
log=YES
logPath=/etc/openhab2/data/
logLength=10000

Changelog:
6. Jan 2020: updated script dbTools.sh (bug fixing)
23. Jan 2020: cleanup old reports fixed

3 Likes