guard the extra-merge loop with the formatter's own output keys (time/level/module/
message). stdlib LogRecord rejects extra keys colliding with real attribute names, but
time/level are NOT LogRecord attrs, so a caller's extra={"time":...}/{"level":...}
previously overwrote the UTC timestamp / levelname — the two fields Loki/Grafana alert
on. now those keys are reserved and a colliding extra is dropped.
Signed-off-by: disqualifier <dev@disqualifier.me>
73 lines
3.4 KiB
Python
73 lines
3.4 KiB
Python
"""log formats for the app-wide setup: human-readable text + structured JSON lines.
|
|
|
|
two output formats, two proven needs. `text` (default) is the human `tail -f` format
|
|
(`time | module | level | message`, local time). `json` is the Grafana/Loki path —
|
|
one JSON object per line (JSON Lines), fields parsed into labels natively, UTC
|
|
timestamps so logs aggregated from many machines/containers sort unambiguously.
|
|
`%(name)s` is the getLogger name the emitting module used, so each module shows.
|
|
"""
|
|
|
|
import datetime
|
|
import json
|
|
import logging
|
|
|
|
DEFAULT_FORMAT = "%(asctime)s | %(name)s | %(levelname)s | %(message)s"
|
|
DEFAULT_DATEFMT = "%Y-%m-%d %H:%M:%S"
|
|
|
|
_RESERVED = frozenset(vars(logging.makeLogRecord({})).keys()) | {"message", "asctime"}
|
|
# this formatter's own canonical output keys — stdlib's LogRecord rejects `extra` keys
|
|
# colliding with real attribute names (e.g. `module`), but `time`/`level` are NOT
|
|
# LogRecord attrs, so a caller's extra={"time":...}/{"level":...} would otherwise
|
|
# overwrite the UTC timestamp / levelname. guard them explicitly
|
|
_OUTPUT_KEYS = frozenset({"time", "level", "module", "message"})
|
|
|
|
|
|
class JsonLinesFormatter(logging.Formatter):
|
|
"""format each record as a single-line JSON object (JSON Lines / .jsonl)
|
|
|
|
emits at minimum time/level/module/message. time is UTC ISO-8601 with a `Z`
|
|
suffix (e.g. 2026-06-28T14:03:11Z) so logs aggregated across machines and
|
|
containers sort unambiguously — Grafana converts to local for display. any
|
|
field passed via logging `extra={...}` lands as a top-level JSON field, which
|
|
is how a caller stamps monitor/service/request-id for Loki labels without the
|
|
lib knowing those domain concepts. a traceback (exc_info) is rendered into an
|
|
`exc_info` string field rather than dropped.
|
|
"""
|
|
|
|
def format(self, record: logging.LogRecord) -> str:
|
|
when = datetime.datetime.fromtimestamp(record.created, datetime.timezone.utc)
|
|
payload = {
|
|
"time": when.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
"level": record.levelname,
|
|
"module": record.name,
|
|
"message": record.getMessage(),
|
|
}
|
|
for key, value in record.__dict__.items():
|
|
if key not in _RESERVED and key not in _OUTPUT_KEYS and not key.startswith("_"):
|
|
payload[key] = value
|
|
if record.exc_info:
|
|
payload["exc_info"] = self.formatException(record.exc_info)
|
|
elif record.exc_text:
|
|
payload["exc_info"] = record.exc_text
|
|
if record.stack_info:
|
|
payload["stack_info"] = self.formatStack(record.stack_info)
|
|
return json.dumps(payload, default=str)
|
|
|
|
|
|
def build_formatter(output: str = "text", fmt=None, datefmt=None) -> logging.Formatter:
|
|
"""build the formatter for the chosen output format
|
|
|
|
`output="text"` (default) returns the human-readable text formatter, honoring
|
|
the raw `fmt`/`datefmt` format-string overrides. `output="json"` returns the
|
|
structured `JsonLinesFormatter` (which ignores `fmt`/`datefmt` — it builds
|
|
fields, not a format string). an unrecognized `output` falls back to text and
|
|
warns, never raising — a bad format arg must not take the app down.
|
|
"""
|
|
if output == "json":
|
|
return JsonLinesFormatter()
|
|
if output != "text":
|
|
logging.getLogger(__name__).warning(
|
|
"log_setup: unknown output %r; falling back to 'text'", output
|
|
)
|
|
return logging.Formatter(fmt or DEFAULT_FORMAT, datefmt or DEFAULT_DATEFMT)
|