From 595f0363b3c34b7ea1f4464bc75a8fc5f0ad58d9 Mon Sep 17 00:00:00 2001 From: disqualifier Date: Tue, 30 Jun 2026 03:13:43 -0400 Subject: [PATCH] feat: tiered restart/retention (keep_uncompressed/keep_compressed) + name normalize - keep_uncompressed/keep_compressed: newest N rolled logs plain, next M gzipped, rest deleted. applies to on_start/daily/size. opt-in by knob presence; without them the legacy backup_count + gzip-on-roll path is unchanged (existing consumers unaffected). - _normalize_name: 'latest' and 'latest.log' both -> live latest.log (no .log.log). - _gzip_file preserves source mtime (stable tier ordering across re-tiers). - rotate_on_start collision counter checks both .log and .log.gz (no duplicate logical roll when a same-stamp file was already compressed). execute-verified stdlib-only incl. a back-compat control proving the no-knobs path is unchanged. bump v0.3.2 -> v0.4.0 Signed-off-by: disqualifier --- README.md | 49 +++++++++++++-- pyproject.toml | 2 +- src/log_setup/__init__.py | 2 +- src/log_setup/rotation.py | 124 ++++++++++++++++++++++++++++++++------ src/log_setup/setup.py | 56 ++++++++++++++--- 5 files changed, 203 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index e675f51..e446a36 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,12 @@ and emit; their records flow into the handlers `log_setup` wired. ## Install ``` -log_setup @ git+ssh://git@git.rethinkstudios.io/rethink-public/log_setup.git@v0.3.2 +log_setup @ git+ssh://git@git.rethinkstudios.io/rethink-public/log_setup.git@v0.4.0 ``` No dependencies — stdlib only. -Drop the `@v0.3.2` suffix from the line above to install the latest unpinned. +Drop the `@v0.4.0` suffix from the line above to install the latest unpinned. ## Quick start @@ -50,10 +50,49 @@ emits; the records land in the configured root. (`run..log[.gz]`) and starts fresh; prunes to `backup_count`. - `None` — single file, no rotation. - **compress=True** (default) gzips each rolled file (`run.log.2026-06-27.gz`). -- **Retention** = `backup_count` (default 14) for every mode. +- **Retention** = `backup_count` (default 14) for every mode — unless tiered retention is + enabled (below). - **console=True** (off by default) also logs to stdout in the same format — opt in when you want live terminal output alongside the file. +The `name` you pass is normalized so it produces exactly one `.log`: `name="latest"` and +`name="latest.log"` both yield the live file `latest.log` (never `latest.log.log`). + +## Tiered retention (`keep_uncompressed` / `keep_compressed`) + +The default is a flat `backup_count`: every rolled file is gzipped on roll and the oldest +are deleted past the count. If instead you want the recent logs **uncompressed** (read them +without `zcat`) and older ones **gzipped**, pass the two tier knobs: + +```python +setup_logging( + name="latest", + rotate="on_start", # works for on_start, daily, and size + keep_uncompressed=3, # newest 3 rolled logs kept PLAIN + keep_compressed=7, # next 7 kept GZIPPED; total retained = 10 +) +``` + +Result in `log_dir` (newest → oldest): + +``` +latest.log <- live "latest" (stable, tail -f) +latest..log latest..log latest..log <- 3 newest: plain +latest..log.gz ... latest..log.gz <- next 7: gzipped +(anything past 10 deleted) +``` + +- Each restart (`on_start`) or roll (`daily`/`size`) moves the live file into `log_dir`, + then re-tiers: newest `keep_uncompressed` stay plain, the next `keep_compressed` are + gzipped in place, the rest deleted. Total kept = `keep_uncompressed + keep_compressed`. +- **Opt-in by presence** — pass either knob to enable tiering. Pass **neither** and + rotation behaves exactly as before (`backup_count` + gzip-on-roll), so existing callers + are unaffected. +- In tiered mode `backup_count` and the gzip-on-roll behavior of `compress` are **ignored** + — the tier counts bound retention instead. +- `keep_uncompressed=0` → everything gzipped; `keep_compressed=0` → only the plain tier. + Retention is count-based (not time-based). + ## Output format (`output=`) Two formats, two needs. Default is `"text"`; the live-file name is the same either way @@ -94,7 +133,9 @@ setup_logging( level="INFO", # root level everything inherits (str name or logging constant) module_levels=None, # {logger_name: level} per-logger overrides (exact name match) rotate="daily", # "daily" | "size" | "on_start" | None - backup_count=14, # rotated files to keep (older auto-deleted) + backup_count=14, # rotated files to keep (flat retention; ignored if tiered) + keep_uncompressed=None, # tiered: newest N rolled logs kept PLAIN (opt-in) + keep_compressed=None, # tiered: next M rolled logs kept GZIPPED (opt-in) max_bytes=10_000_000, # only for rotate="size" compress=True, # gzip rolled files console=False, # also log to stdout (off by default; opt in) diff --git a/pyproject.toml b/pyproject.toml index c2ed493..55a1372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "log_setup" -version = "0.3.2" +version = "0.4.0" description = "stdlib app-entry-point logging setup: live run.log, rotation, gzip, retention, consistent format" requires-python = ">=3.10" dependencies = [] diff --git a/src/log_setup/__init__.py b/src/log_setup/__init__.py index 699b84c..38e8eff 100644 --- a/src/log_setup/__init__.py +++ b/src/log_setup/__init__.py @@ -19,4 +19,4 @@ from .setup import setup_logging __all__ = ["setup_logging"] -__version__ = "0.3.2" +__version__ = "0.4.0" diff --git a/src/log_setup/rotation.py b/src/log_setup/rotation.py index 7d647d6..9db12d0 100644 --- a/src/log_setup/rotation.py +++ b/src/log_setup/rotation.py @@ -32,6 +32,23 @@ def _move(source: str, dest: str) -> None: shutil.move(source, dest) +def _gzip_file(source: str, dest: str) -> None: + """gzip source into dest then remove source (the rolled-file compression idiom) + + the source mtime is carried onto dest so a file keeps its position when it crosses + the plain->gz tier boundary — retier ranks by mtime, and a fresh write would + otherwise make a just-compressed file look like the newest one and reshuffle tiers. + """ + mtime = _safe_mtime(source) + with open(source, "rb") as src, gzip.open(dest, "wb") as dst: + shutil.copyfileobj(src, dst) + os.remove(source) + try: + os.utime(dest, (mtime, mtime)) + except OSError: + pass + + def make_namer(log_dir: str, compress: bool) -> Callable[[str], str]: """namer: redirect a rolled filename into log_dir, adding .gz when compressing @@ -48,22 +65,36 @@ def make_namer(log_dir: str, compress: bool) -> Callable[[str], str]: def make_rotator( compress: bool, log_dir: Optional[str] = None, prune_stem: Optional[str] = None, backup_count: int = 0, + keep_uncompressed: Optional[int] = None, keep_compressed: Optional[int] = None, ) -> Callable[[str, str], None]: """rotator: move (or gzip) the source live file to the destination rolled path - prunes `log_dir` to `backup_count` newest rolled files after each roll when - `log_dir`/`prune_stem` are given. the stdlib handler's own retention + legacy mode (default): gzip on roll when `compress`, then prune `log_dir` to + `backup_count` newest rolled files. the stdlib handler's own retention (`getFilesToDelete`) only scans the live file's directory, so it never sees the - rolled files we redirect into `log_dir` — pruning here is what actually bounds - retention for the daily and size rolling modes. + rolled files we redirect into `log_dir` — pruning here is what bounds retention for + the daily and size rolling modes. + + tiered mode (when `keep_uncompressed`/`keep_compressed` are given): land the rolled + file PLAIN and re-tier `log_dir` — newest `keep_uncompressed` stay uncompressed, the + next `keep_compressed` are gzipped, the rest deleted. `compress`/`backup_count` are + ignored in this mode (the tier counts bound retention instead). """ + tiered = keep_uncompressed is not None or keep_compressed is not None + def rotator(source: str, dest: str) -> None: if not os.path.exists(source): return + if tiered: + # dest carries the namer's .gz suffix in compress mode; strip it so the + # freshly-rolled file lands plain and retier decides its tier + plain_dest = dest[:-3] if dest.endswith(".gz") else dest + _move(source, plain_dest) + if log_dir is not None and prune_stem is not None: + retier(log_dir, prune_stem, keep_uncompressed or 0, keep_compressed or 0) + return if compress: - with open(source, "rb") as src, gzip.open(dest, "wb") as dst: - shutil.copyfileobj(src, dst) - os.remove(source) + _gzip_file(source, dest) else: _move(source, dest) if log_dir is not None and prune_stem is not None: @@ -71,31 +102,85 @@ def make_rotator( return rotator -def rotate_on_start(live_path: str, log_dir: str, compress: bool, clock=time.localtime) -> None: +def rotate_on_start( + live_path: str, log_dir: str, compress: bool, clock=time.localtime, + keep_uncompressed: Optional[int] = None, keep_compressed: Optional[int] = None, +) -> None: """move an existing live file into log_dir with a timestamp, gzipped if asked no-op if the live file doesn't exist. used by rotate="on_start" before the fresh handler opens a new live file. the timestamp form is run.<%Y-%m-%d_%H-%M-%S>.log. + + tiered mode (when `keep_uncompressed`/`keep_compressed` are given): the rolled file + always lands PLAIN (so it can occupy the newest uncompressed tier) and `retier` + decides compression/deletion across the whole stem — `compress` is ignored for the + just-rolled file. """ if not os.path.exists(live_path): return + tiered = keep_uncompressed is not None or keep_compressed is not None stem = os.path.splitext(os.path.basename(live_path))[0] stamp = time.strftime("%Y-%m-%d_%H-%M-%S", clock()) - suffix = ".log.gz" if compress else ".log" + suffix = ".log.gz" if (compress and not tiered) else ".log" # the stamp is 1-second resolution; two starts in the same second would collide # and the second clobber the first. disambiguate with a numeric counter so a rapid - # crash-restart loop doesn't lose the earlier rolled file + # crash-restart loop doesn't lose the earlier rolled file. check BOTH the .log and + # .log.gz forms of each candidate: in tiered mode an earlier same-stamp roll may have + # already been compressed to .log.gz, and reusing its bare stem would create a second + # file for the same logical roll and break the tier counts + + def _taken(path: str) -> bool: + base = path[:-3] if path.endswith(".gz") else path + return os.path.exists(base) or os.path.exists(base + ".gz") + dest = os.path.join(log_dir, f"{stem}.{stamp}{suffix}") counter = 1 - while os.path.exists(dest): + while _taken(dest): dest = os.path.join(log_dir, f"{stem}.{stamp}.{counter}{suffix}") counter += 1 - if compress: - with open(live_path, "rb") as src, gzip.open(dest, "wb") as dst: - shutil.copyfileobj(src, dst) - os.remove(live_path) + if compress and not tiered: + _gzip_file(live_path, dest) else: _move(live_path, dest) + if tiered: + retier(log_dir, stem, keep_uncompressed or 0, keep_compressed or 0) + + +def retier(log_dir: str, stem: str, keep_uncompressed: int, keep_compressed: int) -> None: + """re-tier rolled files for stem: newest plain, next gzipped, rest deleted + + newest-first by mtime: the first `keep_uncompressed` stay uncompressed, the next + `keep_compressed` are gzipped in place (a still-plain file in that band is compressed + to .gz and the plain source removed), and everything beyond + keep_uncompressed+keep_compressed is deleted. the live .log is never touched. + fail-soft per file (skip on OSError) so retention never crashes setup. + """ + try: + names = [ + name for name in os.listdir(log_dir) + if name.startswith(f"{stem}.") and name != f"{stem}.log" + ] + except OSError: + return + entries = [os.path.join(log_dir, name) for name in names] + files = [(p, _safe_mtime(p)) for p in entries if os.path.isfile(p)] + files.sort(key=lambda pair: pair[1], reverse=True) + + keep = keep_uncompressed + keep_compressed + for index, (path, _) in enumerate(files): + if index >= keep: + try: + os.remove(path) + except OSError: + pass + elif index >= keep_uncompressed and not path.endswith(".gz"): + dest = path + ".gz" + if os.path.exists(dest): + continue + try: + _gzip_file(path, dest) + except OSError: + pass def prune(log_dir: str, stem: str, backup_count: int) -> None: @@ -134,14 +219,19 @@ def _safe_mtime(path: str) -> float: def attach_rolling( handler, log_dir: str, compress: bool, prune_stem: Optional[str] = None, backup_count: int = 0, + keep_uncompressed: Optional[int] = None, keep_compressed: Optional[int] = None, ) -> Tuple[Callable, Callable]: """wire the custom namer + rotator onto a rotating handler; return them pass `prune_stem`/`backup_count` so the rotator prunes `log_dir` after each roll - (the handler's own retention can't see the redirected rolled files). + (the handler's own retention can't see the redirected rolled files). pass + `keep_uncompressed`/`keep_compressed` instead to use tiered retention (newest plain, + next gzipped, rest deleted) — see make_rotator. """ namer = make_namer(log_dir, compress) - rotator = make_rotator(compress, log_dir, prune_stem, backup_count) + rotator = make_rotator( + compress, log_dir, prune_stem, backup_count, keep_uncompressed, keep_compressed, + ) handler.namer = namer handler.rotator = rotator return namer, rotator diff --git a/src/log_setup/setup.py b/src/log_setup/setup.py index 0b7ff1b..6c01aa0 100644 --- a/src/log_setup/setup.py +++ b/src/log_setup/setup.py @@ -107,25 +107,52 @@ def _tag(handler: logging.Handler) -> logging.Handler: return handler +def _normalize_name(name: str) -> str: + """strip one trailing '.log' (case-insensitive) so the stem is extension-free + + `name` is allowed to be passed with or without the extension — "latest" and + "latest.log" both yield stem "latest" (live file latest.log), never latest.log.log. + only one level is stripped: "app.log.log" -> "app.log" so a legit ".log" inside a + name survives. + """ + if name.lower().endswith(".log"): + return name[:-4] + return name + + def _file_handler( name: str, live_path: str, log_dir: str, rotate: Optional[str], backup_count: int, max_bytes: int, compress: bool, + keep_uncompressed: Optional[int], keep_compressed: Optional[int], ) -> logging.Handler: """build the configured file handler with custom rolling into log_dir""" + tiered = keep_uncompressed is not None or keep_compressed is not None if rotate == "size": handler = logging.handlers.RotatingFileHandler( live_path, maxBytes=max_bytes, backupCount=backup_count, encoding="utf-8", ) - attach_rolling(handler, log_dir, compress, prune_stem=name, backup_count=backup_count) + attach_rolling( + handler, log_dir, compress, prune_stem=name, backup_count=backup_count, + keep_uncompressed=keep_uncompressed, keep_compressed=keep_compressed, + ) elif rotate == "daily": handler = logging.handlers.TimedRotatingFileHandler( live_path, when="midnight", backupCount=backup_count, encoding="utf-8", ) - attach_rolling(handler, log_dir, compress, prune_stem=name, backup_count=backup_count) + attach_rolling( + handler, log_dir, compress, prune_stem=name, backup_count=backup_count, + keep_uncompressed=keep_uncompressed, keep_compressed=keep_compressed, + ) else: if rotate == "on_start": - rotate_on_start(live_path, log_dir, compress) - prune(log_dir, name, backup_count) + if tiered: + rotate_on_start( + live_path, log_dir, compress, + keep_uncompressed=keep_uncompressed, keep_compressed=keep_compressed, + ) + else: + rotate_on_start(live_path, log_dir, compress) + prune(log_dir, name, backup_count) handler = logging.FileHandler(live_path, encoding="utf-8") return handler @@ -137,6 +164,8 @@ def setup_logging( module_levels: Optional[Dict[str, Union[int, str]]] = None, rotate: Optional[str] = "daily", backup_count: int = 14, + keep_uncompressed: Optional[int] = None, + keep_compressed: Optional[int] = None, max_bytes: int = 10_000_000, compress: bool = True, console: bool = False, @@ -147,7 +176,16 @@ def setup_logging( ) -> logging.Logger: """configure the root logger for the whole process and return it - `name` -> .log live file at cwd; rolled/compressed copies go to `log_dir`. + `name` -> .log live file at cwd; rolled/compressed copies go to `log_dir`. a + trailing ".log" in `name` is stripped so "latest" and "latest.log" both produce the + live file latest.log (never latest.log.log). + `keep_uncompressed`/`keep_compressed` (default None) enable TIERED retention: when + either is given, rolled files are kept as the newest `keep_uncompressed` uncompressed + + the next `keep_compressed` gzipped, and the rest are deleted (total retained = + sum). this applies to "on_start", "daily", and "size". `backup_count` and the + gzip-on-roll behavior of `compress` are IGNORED in tiered mode (the tier counts bound + retention). pass NEITHER knob and rotation behaves exactly as before (backup_count + + compress) — existing callers are unaffected. `level` is the root default every logger inherits. `module_levels` is an optional map of exact logger name -> level applied after the root is set, the ergonomic way to quiet noisy dependencies (e.g. {"motor": "WARNING", "aiohttp": "WARNING"}) from @@ -177,7 +215,8 @@ def setup_logging( _clear_owned(root) formatter = build_formatter(output, fmt, datefmt) - live_path = f"{name}.log" + stem = _normalize_name(name) + live_path = f"{stem}.log" handlers = [] @@ -189,7 +228,10 @@ def setup_logging( if file_ok: try: - fh = _file_handler(name, live_path, log_dir, rotate, backup_count, max_bytes, compress) + fh = _file_handler( + stem, live_path, log_dir, rotate, backup_count, max_bytes, compress, + keep_uncompressed, keep_compressed, + ) fh.setFormatter(formatter) handlers.append(fh) except OSError: