log_setup/src/log_setup/formats.py
disqualifier ff29e05322 fix: JSON extra cannot clobber canonical time/level/module fields (v0.3.1)
guard the extra-merge loop with the formatter's own output keys (time/level/module/
message). stdlib LogRecord rejects extra keys colliding with real attribute names, but
time/level are NOT LogRecord attrs, so a caller's extra={"time":...}/{"level":...}
previously overwrote the UTC timestamp / levelname — the two fields Loki/Grafana alert
on. now those keys are reserved and a colliding extra is dropped.

Signed-off-by: disqualifier <dev@disqualifier.me>
2026-06-29 17:11:45 -04:00

73 lines
3.4 KiB
Python

"""log formats for the app-wide setup: human-readable text + structured JSON lines.
two output formats, two proven needs. `text` (default) is the human `tail -f` format
(`time | module | level | message`, local time). `json` is the Grafana/Loki path —
one JSON object per line (JSON Lines), fields parsed into labels natively, UTC
timestamps so logs aggregated from many machines/containers sort unambiguously.
`%(name)s` is the getLogger name the emitting module used, so each module shows.
"""
import datetime
import json
import logging
DEFAULT_FORMAT = "%(asctime)s | %(name)s | %(levelname)s | %(message)s"
DEFAULT_DATEFMT = "%Y-%m-%d %H:%M:%S"
_RESERVED = frozenset(vars(logging.makeLogRecord({})).keys()) | {"message", "asctime"}
# this formatter's own canonical output keys — stdlib's LogRecord rejects `extra` keys
# colliding with real attribute names (e.g. `module`), but `time`/`level` are NOT
# LogRecord attrs, so a caller's extra={"time":...}/{"level":...} would otherwise
# overwrite the UTC timestamp / levelname. guard them explicitly
_OUTPUT_KEYS = frozenset({"time", "level", "module", "message"})
class JsonLinesFormatter(logging.Formatter):
"""format each record as a single-line JSON object (JSON Lines / .jsonl)
emits at minimum time/level/module/message. time is UTC ISO-8601 with a `Z`
suffix (e.g. 2026-06-28T14:03:11Z) so logs aggregated across machines and
containers sort unambiguously — Grafana converts to local for display. any
field passed via logging `extra={...}` lands as a top-level JSON field, which
is how a caller stamps monitor/service/request-id for Loki labels without the
lib knowing those domain concepts. a traceback (exc_info) is rendered into an
`exc_info` string field rather than dropped.
"""
def format(self, record: logging.LogRecord) -> str:
when = datetime.datetime.fromtimestamp(record.created, datetime.timezone.utc)
payload = {
"time": when.strftime("%Y-%m-%dT%H:%M:%SZ"),
"level": record.levelname,
"module": record.name,
"message": record.getMessage(),
}
for key, value in record.__dict__.items():
if key not in _RESERVED and key not in _OUTPUT_KEYS and not key.startswith("_"):
payload[key] = value
if record.exc_info:
payload["exc_info"] = self.formatException(record.exc_info)
elif record.exc_text:
payload["exc_info"] = record.exc_text
if record.stack_info:
payload["stack_info"] = self.formatStack(record.stack_info)
return json.dumps(payload, default=str)
def build_formatter(output: str = "text", fmt=None, datefmt=None) -> logging.Formatter:
"""build the formatter for the chosen output format
`output="text"` (default) returns the human-readable text formatter, honoring
the raw `fmt`/`datefmt` format-string overrides. `output="json"` returns the
structured `JsonLinesFormatter` (which ignores `fmt`/`datefmt` — it builds
fields, not a format string). an unrecognized `output` falls back to text and
warns, never raising — a bad format arg must not take the app down.
"""
if output == "json":
return JsonLinesFormatter()
if output != "text":
logging.getLogger(__name__).warning(
"log_setup: unknown output %r; falling back to 'text'", output
)
return logging.Formatter(fmt or DEFAULT_FORMAT, datefmt or DEFAULT_DATEFMT)