#!/usr/bin/perl -w
#2345678911234567892123456789312345678941234567895123456789612345678971234567898

# License: GPL v3
# Author: Marc MERLIN <marc_soft at merlins.org> 2009/12/26
# $Id: cacti_owfs 1401 2019-08-24 23:41:15Z svnuser $

use strict;
use Date::Manip;
use Getopt::Long;

my $VERBOSE = 0;

my $LOGFILE = "/var/log/temperatures";
$LOGFILE=$ENV{'TEMPLOGFILE'} if ($ENV{'TEMPLOGFILE'});
if ($LOGFILE eq "-")
{
    warn("reading from STDIN\n");
    $LOGFILE="<&STDIN";
}
# how many minutes between each data sample
my $SAMPLE_TIME = 1;

# global shared vars
my %data;
my @timeslot;

my $CACTI;
my $CACTI_DUMP;
my $CACTI_DUMP_HEADER;
my $VAR;

# whe adding fields here, go to Data Input Methods
# https://server/cacti/data_input.php?action=edit&id=14
# you can add fields more quickly by clicking add, submit, then back,
# change, and submit again.
# After adding fields, you have to add them to a Data Source
# https://server/cacti/data_sources.php?action=ds_edit&id=17

# This script will also convert the format of UV and Moisture readings
# as they are logged raw in the original file and the conversion table 
# may change over time, so it's better to log raw data and convert here.
# this is the painful and slow step. Cacti takes a while to add fields
# one per one (add field creates 'ds', which you then edit).

# when all done adding the fields, Turn On Data Source Debug Mode.
# and go to the end of this file

sub reset_idx_hash
{
    return ( {
		"01" => 'Corridor:U',
		"02" => 'MasterBR:U',
		"11" => 'Family_Room:U',
		"12" => 'Living_Room:U',
		"15" => 'Garage:U',
		"16" => 'Garage_Attic_Streetside:U',
		"17" => 'Garage_Attic_Creekside:U',
		"18" => 'Garage_Outside_Creekside_Attic:U',
		"21" => 'Attic:U',
		"22" => 'Roof:U',
		"23" => 'Outdoors_Roof:U',
		"25" => 'Roof_UV:U',
		"25U"=> 'Roof_UV_U:U',
		"31" => 'Crawlspace:U',
		"32" => 'Outdoors_Crawlspace:U',
		"35" => 'Front_Lawn:U',	
		"35M"=> 'Front_Lawn_M:U',
		"36" => 'Side_Lawn1:U',		###
		"36M"=> 'Side_Lawn1_M:U',	###
		"37" => 'Side_Lawn2:U',		###
		"37M"=> 'Side_Lawn2_M:U',	###
		"38" => 'Side_Lawn3:U',		###
		"38M"=> 'Side_Lawn3_M:U',	###
		"41" => 'BR2:U',	
		"42" => 'BR3:U',		###
		"43" => 'BR4:U',		###
		"51" => 'Garage_Fridge:U',
		"52" => 'Garage_Freezer:U',
		"55" => 'Computer_Closet:U',
		"56" => 'Hall_Closet:U',
		"56H"=> 'Hall_Closet_H:U',
		"60" => 'Dining_Room:U',
		"61" => 'Kitchen_Fridge:U',
		"62" => 'Kitchen_Freezer:U',
		"80" => 'spare0:U',		###
		"81" => 'spare1:U',		###
		"82" => 'spare2:U',		###
		"90" => 'HVAC_MainVent:U',
		"91" => 'HVAC_MBR_ZoneVent:U',
		"92" => 'HVAC_MBR_BoostVent:U',
		"93" => 'HVAC_FMR_BoostVent:U',	
		# This is the vent from HVAC output going to the crawlspace after the damper point
		"94" => 'HVAC_Vent_Crawlspace_Damper:U',
		"95" => 'HVAC_Outside_Air_Damper:U',
		"99" => "Full_HVAC_Status:U",
	    } );
}


my $temp = reset_idx_hash();
my $HASH_ELTS = keys(%{$temp});
my $REC_LINES = $HASH_ELTS * 2;
my @template_labels = ();
my %label_to_idx;

foreach my $key (sort keys %{$temp})
{
    $_ = $temp->{$key};
    s/:U//;
    $label_to_idx{$_} = $key;
    # DS fields in cacti/RRD are limited to 19 characters
    s/^(...................).*/$1/;
    push (@template_labels, $_);
}

my $template_str = join(":", @template_labels);

sub verbose
{
    my ($mesg, $level) = @_;
    $level = 1 if (not $level);

    warn("$mesg\n") if ($VERBOSE >= $level);
}

sub delta_mins
{
    $_ = DateCalc($_[0], $_[1]);
    #print "Got delta $_ from ".join("|", @_)."\n";

    return sprintf("%5.2f", Delta_Format($_, 1, "%mh"));
}

sub printable_date
{
    return UnixDate($_[0], "%Y/%m/%d %T");
}

sub printable_time
{
    return UnixDate($_[0], "%T");
}

sub date_min
{
    $_[0] =~ /\d\d:(\d\d):\d\d$/ or die "Couldn't get minute from $_\n";
    return $1;
}

sub epoch
{
    return UnixDate($_[0], "%s");
}


# load all the data samples in %data and the timeslots keys in @timeslot
# %date time indexes are converted from UTC to local time at read time
# and are stored in Date::Manip format (YYYMMDDHH:MM:SS)
sub load_file_data
{
    # fromdate and todate are given in localtime as per locale
    my ($FH, $fromdate, $todate) = @_;
    # convert them once to parsed format so that comparisons can be done as strings
    my ($parsed_fromdate, $parsed_todate) = (ParseDate($fromdate), ParseDate($todate));

    my $time = 0;
    my $process_min;
    my $new_sample = 0;
    # previous data record is used to see if we had a hole in data parsing
    my $datatime;
    my $prev_datatime;
    my $values;

    # reset to null since we can be called multiple times
    %data= ();
    @timeslot = ();

    verbose("load_file_data: between $parsed_fromdate and $parsed_todate");

    for (my $line=1; $_ = ${$FH}[$line-1] and $line <= $#{$FH} + 1; $line++)
    {
	my ($date, $idx, $name, $value, $extra_type, $extra_value);

	# 2010/07/08 18:42:15 56 Hall_Closet F: 70.7 H: 57.6217%
	# 2010/06/22 07:05:13 35 Front_Lawn F: 65.075 M: -1.31125
	if ($_ =~ /^(\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d) (\d\d) (\S+) F: (\S+) ([UHM]): ([-0-9.]+)/)
	{
	    ($date, $idx, $name, $value, $extra_type, $extra_value) = ($1, $2, $3, $4, $5, $6);
	}
	else
	{
	    # 2010/07/08 18:42:04 12 Living_Room F: 70.925
	    if ($_ =~ /^(\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d) (\d\d) (\S+) F: (\S+)/)
	    {
		($date, $idx, $name, $value) = ($1, $2, $3, $4);
	    }
	    else
	    {
		# 2010/07/08 18:41:30 06 MBR_HVAC: 0
		if ($_ =~ /^(\d\d\d\d\/\d\d\/\d\d \d\d:\d\d:\d\d) (\d\d) (\S+): (\S+)/)
		{
		    ($date, $idx, $name, $value) = ($1, $2, $3, $4);
		}
		else
		{
		    warn "could not parse $_";
		    next;
	        }		
	    }
        }

	my $read_min = date_min($date);

	#warn "Working on $read_min; $date, $idx, $name, $value\n";

	# we gather data one minute at a time on the minute. Any data missing will
	# not be part of this sample
	if (not defined $process_min or $read_min != $process_min)
	{
	    $prev_datatime = $datatime if (defined $datatime);
	    # unnecessary and slows things down
	    #$datatime = ParseDate("$date UTC");
	    $datatime = $date;
	    if (defined $prev_datatime)
	    {
		my $sample_delta = delta_mins($prev_datatime, $datatime);
		if (defined $prev_datatime and $sample_delta > $SAMPLE_TIME * 2)
		{
		    warn "Warning data log time jumped by $sample_delta mn at $datatime\n";
		}
		else
		{
		    #warn "accepted delta of $sample_delta mn between $datatime and $prev_datatime\n";
		}

	    }
	    if (defined $process_min)
	    {
		$data{$prev_datatime} = $values;
		push (@timeslot, $prev_datatime);
	    }

	    # (re)set variables to 'U'
	    $values = reset_idx_hash();
	    $process_min = $read_min;
	}
	
	die "Got unknown index $idx for $datatime / $name:$value" unless (defined $values->{$idx});
	$values->{$idx} = "$name:$value";

	if ($extra_type)
	{
	    $idx = "$idx$extra_type";
	    $name =  $name."_$extra_type";
	    $value =  $extra_value;

	    if ($extra_type eq "M")
	    {
                # 0.56625/0.5675 bone dry, but once got 0.335625.
		# glass of water yielded 1.74 but I've sometimes seen 2.555. What to use?
		my ($min, $max);
		if ($date lt "2010091700:00:00")
		{
		    ($min, $max) = (0.5, 1.74);
		}
		else
		{
		    # Sensor got damaged, readings are off now.
		    ($min, $max) = (0.5, 1.02);
		}
                # I should really temperature adjust the wet readings,
                # but I'll worry about this later because I have no idea
                # what tables to use right now.
		$value *= -1;
		$value = $min if ($value < $min);
		$value = $max if ($value > $max);
		$value = (100 * ($value - $min) / ($max - $min));
	    }

	    #warn "Going to store extra value -> $idx = $name:$value\n";
	    die "Got unknown index $idx for $datatime / $name:$value" unless (defined $values->{$idx});
	    $values->{$idx} = "$name:$value";
	}
    }
    # save data we last gathered before EOF
    $data{$datatime} = $values;
    push (@timeslot, $datatime);

}


sub cacti_dump
{
    my @lines;
    my $lineblock = 100000;
    my $pass = 1;
    my $exit = 0;
    my $from = $_[0] ? $_[0] : 0;
    my $tail = $_[1] ? $_[1] : 0;

    if ($tail)
    {
	verbose("Will gather stats from $from working on $tail lines");
	open(TEMPS, "tail -n $tail $LOGFILE |");
    }
    else
    {
	verbose("Will gather stats from $from (reading the whole file)");
	open(TEMPS, $LOGFILE);
    }

    while (1)
    {
	my $i;

	@lines = ();
	for ($i = 0; $i < $lineblock; $i++)
	{
	    $_ = <TEMPS>;
	    if (not $_)
	    {
		$exit = 1;
		last;
	    }
	    push(@lines, $_);
	}
	# 2mn for 100,000 lines
	warn("Read block of $i lines, now parsing (pass $pass)\n");
	load_file_data(\@lines, $from, "now");
	@lines = [];

	# 50s for 100,000 lines
	warn("Parsed block of $i lines, now dumping to STDOUT (pass $pass) from ".$timeslot[0]."\n");
	my $hash = reset_idx_hash();
	foreach my $timeslot (0 .. $#timeslot)
	{
	    my $time = $timeslot[$timeslot];
	    my $values = "";
	    my $foundvalues = 0;

	    # we order keys here, and when we feed into an rrd, the order is specified by
	    # using --cacti-dump-header so that the field order matches
	    foreach my $key ( sort keys %{$data{$time}} )
	    {
		$foundvalues++;
		$_ = $data{$time}->{$key};
		# Hall_Closet_H:63.1241 -> 63.1241
		s/.*://;
		$values .= ":$_";
	    }
	    if (not $values)
	    {
		warn "No values for ".epoch($time).", skipping...\n";
	    }
	    elsif (not $foundvalues eq $HASH_ELTS)
	    {
		warn "No enough values for ".epoch($time).": $values, skipping...\n";
	    }
	    elsif ($values =~  /[A-TV-z]/)
	    {
		warn "Illegal characters in values for ".epoch($time).": $values, skipping...\n";
	    }
	    else
	    {
		print epoch($time)."$values\n";
	    }
	}
	last if ($exit);
	$pass++;
    }
    close(TEMPS);
}


sub data_tail
{
    my ($var) = @_;
    my @lines;

    open(TEMPS, "tail -n $REC_LINES $LOGFILE |");
    @lines=<TEMPS>;
    close(TEMPS);
    load_file_data(\@lines, 0, "now");
    
    my $lastdate = $timeslot[$#timeslot];
    my $lastdatestr = ParseDate($lastdate);
    # how old samples can be before we reject them
    my $acceptable_time = DateCalc("now", "-".($SAMPLE_TIME*2 + 1)." minutes");

    my $str = "";
    if ($var)
    {
	$str = $data{$lastdate}->{$var};
	$str =~ s/.*://;
    }
    else
    {
	# we don't need to order for cacti, but it's nice for debugging and reading
	foreach my $key ( sort keys %{$data{$lastdate}} )
	{
	    $str .= $data{$lastdate}->{$key}." ";
	}
    }

    #warn("read: $lastdate and acceptable: $acceptable_time with string $str\n");
    if ($acceptable_time gt $lastdatestr)
    {
	warn "FAIL: $acceptable_time > $lastdate\n";
	$str =~ s/:\S* /:U /g
    }

    print "$str\n";
}


sub usage
{
    warn ("$_[0]\n") if defined($_[0]);
    print STDERR <<EOF;

    $0 [--cacti | --cacti-dump | --cacti-dump-header | --value label]
EOF
    exit;
}

GetOptions(
    "cacti" => \$CACTI,
    "cacti-dump" => \$CACTI_DUMP,
    "cacti-dump-header" => \$CACTI_DUMP_HEADER,
    "value:s" => \$VAR,
) or usage;

if ($CACTI)
{
    data_tail();
}
elsif ($VAR)
{
    my $idx = $label_to_idx{$VAR};

    usage("$VAR is not a known value\nValid options are: @template_labels") if (not defined $idx);
    data_tail($idx);
}
elsif ($CACTI_DUMP)
{
    cacti_dump(@ARGV);
}
elsif ($CACTI_DUMP_HEADER)
{
    print "$template_str\n";
}
else
{
    usage()
}

# vim:sts=4:sw=4