From 655f6d2adaadfe5d70775a18023301491d799419 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Wed, 1 Jan 2020 22:52:50 -0800 Subject: [PATCH] add module to parse the log files that we output... --- RainEagle/parse.py | 295 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 RainEagle/parse.py diff --git a/RainEagle/parse.py b/RainEagle/parse.py new file mode 100644 index 0000000..0b42561 --- /dev/null +++ b/RainEagle/parse.py @@ -0,0 +1,295 @@ +from StringIO import StringIO + +import collections +import itertools +import unittest + +class MeterRead(collections.namedtuple('MeterRead', [ 'meterts', 'readts', 'status', 'load', 'maxload', 'loadunit', 'sink', 'source', 'ssunit' ])): + pass + +class ZoneInfo(collections.namedtuple('ZoneInfo', [ 'tz', 'lcl', 'utc', 'offset' ])): + def getOffset(self): + '''Return the offset in seconds from UTC. This value + should be subtracted from UTC to get localtime or added + to localtime to get UTC.''' + + off = int(self.offset) + + min = abs(off) % 100 + hr = int(off / 100) + + return hr * 60 * 60 + min * 60 + +def _pread(fp, off, sz): + origpos = fp.tell() + + fp.seek(off) + r = fp.read(sz) + + fp.seek(origpos) + + return r + +# +# Notes: +# +# We are assuming that the first line of the file is in the same timezone +# as the line that immediately follows. We also assume that the remaining +# lines in a file are the same timezone. This will be fine as long as no +# one starts or stops the program around a time change. This is pretty safe +# in that it only happens twice a year. This is a .023% chance that a +# random reboot/start of the program will hit this. +# +# The meter updates the timezone info when it gets the new info. This +# means that we don't know exactly when the time changed, BUT there will +# be a discontinuity, where it skips forward or backward an hour, and this +# is when the new timezone takes effect. + +class ParseLog(object): + def __init__(self, fp): + self._fp = fp + + @staticmethod + def parseline(data, tz=None): + p = data.split() + if p[0] == 'm': + if tz is None: + raise ValueError('cannot parse meter line w/o timezone info') + meterts = int(p[2]) - tz.getOffset() + return MeterRead(meterts=meterts, readts=float(p[1]), + status=p[3], load=float(p[4]), maxload=float(p[5]), + loadunit=p[6], sink=float(p[7]), + source=float(p[8]), ssunit=p[9]) + elif p[0] == 'z': + return ZoneInfo(p[1], int(p[2]), int(p[3]), p[4]) + else: + raise ValueError('unknown type: %s' % repr(data[0])) + + def __iter__(self): + # this can be suspended/resumed between yields, so + # keep track of pointer. + + fp = self._fp + + pos = 0 + tz = None + done = False + while not done: + fp.seek(pos) + pendinglines = [] + + # find the timezone info + while True: + l = fp.readline() + if l == '': + done = True + break + elif l[0] == 'z': + nexttz = self.parseline(l) + if tz is None: + tz = nexttz + else: + pendinglines.append(l) + + pos = fp.tell() + + # was there a tz change + tzchange = tz.offset != nexttz.offset + lastts = None + for idx, i in enumerate(pendinglines): + line = self.parseline(i, tz) + if (tzchange and lastts is not None and + abs(line.meterts - lastts) > 50*60): + tz = nexttz + # need to reparse due to zone changed + line = self.parseline(i, tz) + tzchange = False + + yield line + lastts = line.meterts + + @staticmethod + def verifyIndex(idx, fp): + origpos = fp.tell() + + # check size + fp.seek(0, 2) # End of file + length = fp.tell() == idx['length'] + + fp.seek(origpos) + + return all(itertools.chain((length,), (_pread(fp, x[1], 1) == 'z' for x in idx['index']))) + + @staticmethod + def generateIndex(fp, bufsize=65536): + origpos = fp.tell() + + fp.seek(0, 2) # End of file + idxs = [] + ret = dict(length=fp.tell(), index=idxs) + + pos = 0 + fp.seek(0) + prebuf = '' + while True: + buf = prebuf + fp.read(bufsize) + if buf == '': + break + + cont = False + zpos = 0 + while True: + try: + idx = buf.index('z', zpos) + except ValueError: + break + + try: + nl = buf.index('\n', idx) + except ValueError: + prebuf = buf + cont = True + break + + line = ParseLog.parseline(buf[idx:nl], None) + + idxs.append((line.utc, pos + idx)) + + zpos = nl + 1 + + if cont: + continue + + pos += bufsize + len(prebuf) + prebuf = '' + + # restore position + fp.seek(origpos) + + return ret + + @classmethod + def fromfile(cls, fp): + '''Pass in a file like object that is the log. + + Note that this will claim ownership of the file object. It + may seek and read parts of the file, the fp should not be + accessed after this call. When the object is destroyed, it + will be closed. + ''' + + return cls(fp) + +class MiscTests(unittest.TestCase): + def test_pread(self): + s = 'this is a random test string' + sio = StringIO(s) + + pos = 13 + sio.seek(pos) + + self.assertEqual(_pread(sio, 10, 4), s[10:10 + 4]) + self.assertEqual(_pread(sio, 7, 1), s[7]) + + self.assertEqual(sio.tell(), pos) + +class Tests(unittest.TestCase): + oldlines = '''l 1571848576 Connected 1.1260 9.155000 W 65375.946 0.000 Wh +z GMT+7 1571848569 1571873769 -0700 +l 1571848585 Connected 1.0890 9.155000 kW 15.946 0.000 kWh +l 1571848593 Connected 1.0500 9.155000 kW 15.946 0.000 kWh +''' + def test_getoffset(self): + zi = ZoneInfo(tz='GMT+8', lcl='1577132476', utc='1577161276', offset='-0800') + + self.assertEqual(zi.getOffset(), -8 * 60 * 60) + + zi = ZoneInfo(tz='GMT+8', lcl='1577132476', utc='1577161276', offset='-0700') + + self.assertEqual(zi.getOffset(), -7 * 60 * 60) + + zi = ZoneInfo(tz='GMT+8', lcl='1577132476', utc='1577161276', offset='-0730') + + self.assertEqual(zi.getOffset(), -(7 * 60 * 60 + 30 * 60)) + +# notes: +# 1572767994 Sun Nov 3 00:59:54 PDT 2019 +# 1572768005 Sun Nov 3 01:00:05 PDT 2019 +# 1572771545 Sun Nov 3 01:59:05 PDT 2019 +# 1572772464 Sun Nov 3 01:14:24 PST 2019 +# 1572772472 Sun Nov 3 01:14:32 PST 2019 +# 1572775140 Sun Nov 3 01:59:00 PST 2019 +# 1572778799 Sun Nov 3 02:59:59 PST 2019 +# 1572793200 Sun Nov 3 07:00:00 PST 2019 +# 1572793208 Sun Nov 3 07:00:08 PST 2019 + + zonelines = '''m 1572767994.3 1572742790 Connected 0 0 kW 0 0 kWh +z GMT+7 1572742795 1572767995 -0700 +m 1572768005.3 1572742805 Connected 0 0 kW 0 0 kWh +m 1572769505.3 1572744305 Connected 0 0 kW 0 0 kWh +m 1572771545.3 1572746345 Connected 0 0 kW 0 0 kWh +m 1572772464.3 1572747264 Connected 0 0 kW 0 0 kWh +m 1572772472.3 1572743672 Connected 0 0 kW 0 0 kWh +m 1572775140.3 1572746340 Connected 0 0 kW 0 0 kWh +m 1572778799.3 1572749999 Connected 0 0 kW 0 0 kWh +z GMT+8 1572750100 1572778900 -0800 +m 1572793208.3 1572764405 Connected 0 0 kW 0 0 kWh +''' + + def test_zonemove(self): + # test when DST moves + s = StringIO(self.zonelines) + pl = ParseLog.fromfile(s) + + lines = list(pl) + + self.assertEqual([ x.meterts for x in lines ], [ 1572767990, 1572768005, 1572769505, 1572771545, 1572772464, 1572772472, 1572775140, 1572778799, 1572793205 ]) + + def test_genverifindex(self): + s = StringIO(self.zonelines) + pos = 10 + s.seek(pos) + genidx = ParseLog.generateIndex(s, 2) + + zpos = [ i for i, x in enumerate(self.zonelines) if x == 'z' ] + + # that the position remained the same + self.assertEqual(s.tell(), pos) + + self.assertEqual(genidx['length'], len(s.getvalue())) + self.assertEqual(genidx['index'], [ (1572767995, zpos[0]), (1572778900, zpos[1]) ]) + + self.assertTrue(ParseLog.verifyIndex(genidx, s)) + + s.seek(pos) + + tmp = genidx.copy() + tmp['length'] = 0 + self.assertFalse(ParseLog.verifyIndex(tmp, s)) + + tmp = genidx.copy() + tmp['index'][0] = (tmp['index'][0][0], 10) + self.assertFalse(ParseLog.verifyIndex(tmp, s)) + + # that the position remained the same + self.assertEqual(s.tell(), pos) + + newlines = '''m 1577161278.22 1577132472 Connected 0.2580 1.992000 kW 90.404 1.660 kWh +z GMT+8 1577132476 1577161276 -0800 +m 1577161288.39 1577132480 Connected 0.3410 1.992000 kW 90.404 1.660 kWh +m 1577161298.96 1577132488 Connected 0.1450 1.992000 kW 90.404 1.660 kWh +''' + + def test_parsenew(self): + s = StringIO(self.newlines) + pl = ParseLog.fromfile(s) + + lines = list(pl) + + self.assertEqual([ x.readts for x in lines ], [ 1577161278.22, 1577161288.39, 1577161298.96 ]) + self.assertEqual([ x.meterts for x in lines ], [ 1577161272, 1577161280, 1577161288 ]) + self.assertEqual([ x.load for x in lines ], [ 0.2580, 0.3410, 0.1450 ]) + + def test_close(self): + # test to make sure the file object is closed + pass