Added a deduplicating log handler for the stderr logger

This commit is contained in:
Erik Thuning 2022-12-06 16:06:31 +01:00
parent b11f567903
commit 82eb9d110c
2 changed files with 96 additions and 1 deletions

@ -2,6 +2,7 @@ import logging
import multiprocessing as mp
from .collector import Collector
from .deduplogger import DedupLogHandler
from .distributor import Distributor
from .handlers import init_handlers
from .notifier import Notifier
@ -30,7 +31,7 @@ class Pipeline:
stderrlog = logging.StreamHandler()
stderrlog.setLevel(log_level)
stderrlog.setFormatter(fmt)
baselogger.addHandler(stderrlog)
baselogger.addHandler(DedupLogHandler(stderrlog, 3, 30))
# Mail logging settings
if 'mail_level' in config['Logging']:

94
pipeline/deduplogger.py Normal file

@ -0,0 +1,94 @@
from logging import Handler, LogRecord
from time import time
class DedupLogHandler(Handler):
"""
This handler suppresses repeated log messages after a set
number of duplicates.
Can optionally be configured to forcibly un-suppress a single
duplicate message on a certain time interval, in order to get
periodic updates on suppressed messages.
"""
def __init__(self, target, max_repeats, report_interval=None):
"""
Initialize the handler with the target handler to forward
non-suppressed messages to, the threshold for suppressing
duplicates, and optionally the time interval on which to
report the suppression count.
"""
Handler.__init__(self)
self.target = target
self.max_repeats = max_repeats
self.report_interval = report_interval
self.last = None
self.dupcount = 0
self.supcount = 0
self.suppressed_last = False
self.last_emittime = time()
def emit(self, record):
"""
Emit a record.
A record is emitted if it has been repeated less than
`max_repeats` times in a row, or if it is time to provide
a status update according to `report_interval`.
Records are compared by passing them to `target.format()`
and comparing the results.
"""
identical = (
self.last
and self.target.format(self.last) == self.target.format(record))
if identical:
self.dupcount += 1
else:
self.dupcount = 0
should_suppress = self.dupcount >= self.max_repeats
now = time()
force_emit = False
if self.report_interval is not None:
force_emit_at = self.last_emittime + self.report_interval
force_emit = force_emit_at < now
if should_suppress and not force_emit:
self.suppressed_last = True
self.supcount += 1
return
if self.suppressed_last:
self.print_repeats()
self.last = record
self.suppressed_last = False
self.last_emittime = now
self.target.emit(record)
def print_repeats(self):
"""
Emit a log message indicating the number of suppressed records.
All attributes of this new record are derived from the latest
record, except the message and its arguments.
"""
msg = "Suppressed %s identical messages"
self.target.emit(LogRecord(self.last.name,
self.last.levelno,
self.last.pathname,
self.last.lineno,
msg,
(self.supcount,),
None))
self.supcount = 0
def close(self):
"""
Close this hander. If messages are currently being suppressed,
the number of suppressed messages is printed before chaining
to the parent class' `close()`.
"""
if self.suppressed_last:
self.print_repeats()
Handler.close(self)