Compare commits

..

No commits in common. "main" and "v0.1.0" have entirely different histories.
main ... v0.1.0

9 changed files with 41 additions and 121 deletions

2
.gitignore vendored
View File

@ -1,5 +1,5 @@
# claude # claude
.claude/ CLAUDE.md
# python # python
__pycache__/ __pycache__/

View File

@ -11,23 +11,21 @@ This reads codes from email; it does not generate them (that is `pyotp`'s job).
`requirements.txt`: `requirements.txt`:
``` ```
aiomail @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.5 aiomail @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.0
# OAuth token providers (Microsoft / Google) need the extra: # OAuth token providers (Microsoft / Google) need the extra:
aiomail[oauth] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.5 aiomail[oauth] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.0
``` ```
Direct: Direct:
```bash ```bash
pip install "aiomail @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.5" pip install "aiomail @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.0"
pip install "aiomail[oauth] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.5" pip install "aiomail[oauth] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aiomail.git@v0.1.0"
``` ```
Requires `aioimaplib` and `beautifulsoup4` (pulled transitively). The `oauth` Requires `aioimaplib` and `beautifulsoup4` (pulled transitively). The `oauth`
extra adds `aiohttp` for the refresh-token providers. extra adds `aiohttp` for the refresh-token providers.
Drop the `@v0.1.5` suffix from the line above to install the latest unpinned.
## Password auth ## Password auth
```python ```python

View File

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "aiomail" name = "aiomail"
version = "0.1.5" version = "0.1.0"
description = "async IMAP one-time-code retrieval with password/OAuth2 auth and dynamic matching" description = "async IMAP one-time-code retrieval with password/OAuth2 auth and dynamic matching"
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [

View File

@ -29,4 +29,4 @@ __all__ = [
"DEFAULT_FOLDERS", "DEFAULT_FOLDERS",
] ]
__version__ = "0.1.5" __version__ = "0.1.0"

View File

@ -38,15 +38,6 @@ class PasswordAuth:
raise RuntimeError(f"login failed: {result} {data}") raise RuntimeError(f"login failed: {result} {data}")
def _as_str(token) -> str:
"""coerce a token to str (a provider may hand back bytes)
both XOAUTH2 entrypoints downstream need a str (one .encode()s it, the SASL
builder interpolates it), so normalize here rather than crashing on bytes.
"""
return token.decode() if isinstance(token, bytes) else token
def _sasl_xoauth2(user: str, token: str) -> str: def _sasl_xoauth2(user: str, token: str) -> str:
"""build the base64 XOAUTH2 SASL initial-response string""" """build the base64 XOAUTH2 SASL initial-response string"""
raw = f"user={user}\x01auth=Bearer {token}\x01\x01".encode() raw = f"user={user}\x01auth=Bearer {token}\x01\x01".encode()
@ -80,23 +71,18 @@ class OAuth2Auth:
token = await result if hasattr(result, "__await__") else result token = await result if hasattr(result, "__await__") else result
if not token: if not token:
raise RuntimeError("token provider returned an empty token") raise RuntimeError("token provider returned an empty token")
return _as_str(token) return token
return _as_str(self._token) # type: ignore[arg-type] return self._token # type: ignore[return-value]
async def authenticate(self, mail) -> None: async def authenticate(self, mail) -> None:
token = await self._resolve_token() token = await self._resolve_token()
# aioimaplib's mail.xoauth2(user, token) builds the SASL string by f-string # aioimaplib exposes mail.xoauth2(user, token: bytes) — note the token must
# interpolating the token, so token MUST be str — passing bytes interpolates # be bytes, not str. older/other clients that lack it but expose a generic
# the b'...' repr and corrupts the Bearer value. _resolve_token already # authenticate() are driven via the SASL string from _sasl_xoauth2.
# returns str (via _as_str). clients lacking .xoauth2 are driven via the
# SASL callback from _sasl_xoauth2.
xoauth2 = getattr(mail, "xoauth2", None) xoauth2 = getattr(mail, "xoauth2", None)
if xoauth2 is not None: if xoauth2 is not None:
result, data = await xoauth2(self.user, token) result, data = await xoauth2(self.user, token.encode())
elif hasattr(mail, "authenticate"): elif hasattr(mail, "authenticate"):
# escape hatch for a non-aioimaplib client: the shipped aioimaplib IMAP4
# always has .xoauth2 and never .authenticate, so this branch never runs
# for it; the SASL-callback signature here is untested against any driver
result, data = await mail.authenticate( result, data = await mail.authenticate(
"XOAUTH2", lambda _: _sasl_xoauth2(self.user, token) "XOAUTH2", lambda _: _sasl_xoauth2(self.user, token)
) )

View File

@ -9,7 +9,6 @@ import asyncio
import email import email
import email.message import email.message
import logging import logging
import re
from typing import List, Optional from typing import List, Optional
from aioimaplib import IMAP4, IMAP4_SSL from aioimaplib import IMAP4, IMAP4_SSL
@ -18,22 +17,6 @@ from .auth import Auth
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# IMAP LIST reply: (flags) "<delim>" <name> — delim is server-defined (often "/" or
# "." or NIL); capture the trailing name regardless, quoted or bare
_LIST_RE = re.compile(rb'^\([^)]*\)\s+(?:"[^"]*"|NIL)\s+(.+)$')
def _folder_name(raw: bytes) -> str:
"""extract the folder name from a LIST reply line, delimiter-agnostic
parses the real reply form `(flags) "<delim>" <name>` so any server hierarchy
delimiter works (not just "/"); falls back to the last quoted/space token if the
line doesn't match the canonical shape.
"""
match = _LIST_RE.match(raw.strip())
name = match.group(1).decode() if match else raw.decode().rsplit(" ", 1)[-1]
return name.strip().strip('"')
class IMAPClient: class IMAPClient:
"""connection-managing IMAP client driven by an injected auth mechanism """connection-managing IMAP client driven by an injected auth mechanism
@ -84,34 +67,10 @@ class IMAPClient:
return True return True
except Exception as exc: except Exception as exc:
log.warning("connect attempt %d/%d failed: %s", attempt + 1, self.max_retries, exc) log.warning("connect attempt %d/%d failed: %s", attempt + 1, self.max_retries, exc)
if self._mail is not None: self._mail = None
await self._discard_mail(self._mail)
self._mail = None
await asyncio.sleep(2 * (attempt + 1)) await asyncio.sleep(2 * (attempt + 1))
return False return False
@staticmethod
async def _discard_mail(mail) -> None:
"""tear down a half-built IMAP4 without leaking its connect task
aioimaplib's IMAP4 schedules `create_connection` as a fire-and-forget task it
never retrieves; on a refused connection that task raises and asyncio logs a
noisy "Task exception was never retrieved" traceback. cancel/await it here (and
retrieve its exception) before discarding, so a failed connect stays quiet.
"""
task = getattr(mail, "_client_task", None)
if task is not None and not task.done():
task.cancel()
if task is not None:
try:
await task
except (asyncio.CancelledError, Exception):
pass
try:
await mail.logout()
except Exception as teardown:
log.debug("logout error ignored during failed connect: %s", teardown)
async def close(self) -> None: async def close(self) -> None:
"""log out and drop the connection, swallowing teardown errors""" """log out and drop the connection, swallowing teardown errors"""
if self._mail is not None: if self._mail is not None:
@ -131,6 +90,14 @@ class IMAPClient:
except Exception: except Exception:
return await self.connect() return await self.connect()
def is_throttled(self) -> bool:
"""best-effort detection of a provider throttling response"""
return bool(
self._mail is not None
and getattr(self._mail, "resp", None)
and "THROTTLED" in str(self._mail.resp)
)
async def get_folders(self) -> List[str]: async def get_folders(self) -> List[str]:
"""list mailbox folder names""" """list mailbox folder names"""
if not await self.ensure_connection(): if not await self.ensure_connection():
@ -143,7 +110,7 @@ class IMAPClient:
folders: List[str] = [] folders: List[str] = []
for folder in folder_list or []: for folder in folder_list or []:
try: try:
folders.append(_folder_name(folder)) folders.append(folder.decode().split(' "/" ')[-1].strip('"'))
except Exception: except Exception:
continue continue
return folders return folders
@ -171,14 +138,7 @@ class IMAPClient:
return [] return []
if result != "OK" or not data or not data[0]: if result != "OK" or not data or not data[0]:
return [] return []
ids = [] ids = [int(x) for x in data[0].split()]
for token in data[0].split():
try:
ids.append(int(token))
except (TypeError, ValueError):
# tolerate a malformed/non-numeric token in the SEARCH response
# instead of crashing the whole search
log.debug("skipping non-numeric search token: %r", token)
return sorted(set(ids), reverse=True) return sorted(set(ids), reverse=True)
async def fetch(self, email_id: int, *, icloud: bool = False) -> Optional[email.message.Message]: async def fetch(self, email_id: int, *, icloud: bool = False) -> Optional[email.message.Message]:
@ -197,14 +157,8 @@ class IMAPClient:
if result != "OK" or not data: if result != "OK" or not data:
return None return None
for item in data: for item in data:
# aioimaplib stores the literal message payload as the only bytearray in if isinstance(item, (bytes, bytearray)) and len(item) > 20:
# the response; every other line (including the `<id> FETCH (...` header)
# is plain bytes. select by structure, not length — a length heuristic
# mismatches the header line for any 2+ digit id or a BODY[]/UID fetch.
if isinstance(item, bytearray):
return email.message_from_bytes(bytes(item)) return email.message_from_bytes(bytes(item))
# cross-version fallback: aioimaplib 2.0.x never yields tuples here, but an
# imaplib-style (header, payload) tuple is handled if a future/alt driver does
if isinstance(item, tuple) and len(item) > 1: if isinstance(item, tuple) and len(item) > 1:
return email.message_from_bytes(item[1]) return email.message_from_bytes(item[1])
return None return None

View File

@ -76,7 +76,7 @@ def _scan(text: str, patterns: list[Pattern], lengths: set[int]) -> Optional[str
return m.group(1) if m.groups() else m.group(0) return m.group(1) if m.groups() else m.group(0)
for token in re.split(r"\s+", text): for token in re.split(r"\s+", text):
digits = "".join(c for c in token if c.isdigit()) digits = "".join(c for c in token if c.isdigit())
if digits and len(digits) in lengths: if digits and len(digits) in lengths and digits.isdigit():
return digits return digits
return None return None
@ -121,8 +121,6 @@ def as_predicate(spec: MatchSpec) -> Callable[[Optional[str]], bool]:
if isinstance(spec, re.Pattern): if isinstance(spec, re.Pattern):
return lambda value: bool(spec.search(value or "")) return lambda value: bool(spec.search(value or ""))
if callable(spec): if callable(spec):
# coalesce None like the string/regex branches so the documented Optional[str] return spec
# predicate contract holds even if a caller's callable assumes a real string
return lambda value: bool(spec(value or ""))
needle = str(spec).lower() needle = str(spec).lower()
return lambda value: needle in (value or "").lower() return lambda value: needle in (value or "").lower()

View File

@ -6,7 +6,6 @@ without aiohttp raises a clear error only when a provider is instantiated.
credentials (client_id, refresh_token) are always supplied by the caller. credentials (client_id, refresh_token) are always supplied by the caller.
""" """
import asyncio
import logging import logging
import time import time
from typing import Optional, Sequence from typing import Optional, Sequence
@ -76,21 +75,18 @@ class _RefreshTokenProvider:
async with aiohttp.ClientSession(timeout=timeout) as session: async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(endpoint, data=data) as resp: async with session.post(endpoint, data=data) as resp:
if resp.status == 200: if resp.status == 200:
# content_type=None: some token endpoints return a 200 with token = (await resp.json()).get("access_token")
# text/plain or text/javascript; default json() would raise
# ContentTypeError and discard a valid token body
token = (await resp.json(content_type=None)).get("access_token")
if token: if token:
self._failures = 0 self._failures = 0
return token return token
else: else:
# log a truncated error body only — a token-endpoint body = await resp.text()
# response can carry sensitive material; never dump it whole
body = (await resp.text())[:200]
log.warning("token endpoint %s -> %s: %s", endpoint, resp.status, body) log.warning("token endpoint %s -> %s: %s", endpoint, resp.status, body)
except Exception as exc: except Exception as exc:
log.warning("token request to %s failed: %s", endpoint, exc) log.warning("token request to %s failed: %s", endpoint, exc)
if attempt < self.max_retries - 1: if attempt < self.max_retries - 1:
import asyncio
await asyncio.sleep(2 ** attempt) await asyncio.sleep(2 ** attempt)
self._failures += 1 self._failures += 1

View File

@ -20,18 +20,16 @@ log = logging.getLogger(__name__)
DEFAULT_FOLDERS: Sequence[str] = ("INBOX", "Junk", "Spam", "Archive", "All Mail") DEFAULT_FOLDERS: Sequence[str] = ("INBOX", "Junk", "Spam", "Archive", "All Mail")
def _server_query(sender: MatchSpec, subject: MatchSpec, match_field: str = "from") -> str: def _server_query(sender: MatchSpec, subject: MatchSpec) -> str:
"""build a narrowing IMAP query from plain-string specs only """build a narrowing IMAP query from plain-string specs only
only plain strings translate to server-side filters; regex and callable specs only plain strings translate to server-side FROM/SUBJECT filters; regex and
fall back to ALL and are filtered client-side, so dynamic matching always works callable specs fall back to ALL and are filtered client-side, so dynamic
even when the server cannot express it. `match_field` selects which header the matching always works even when the server cannot express it.
`sender` spec searches: "from" filters by the sender address (default), "to"
filters by the recipient address (the per-user alias the code was sent to).
""" """
parts: List[str] = [] parts: List[str] = []
if isinstance(sender, str): if isinstance(sender, str):
parts.append(f'TO "{sender}"' if match_field == "to" else f'FROM "{sender}"') parts.append(f'FROM "{sender}"')
if isinstance(subject, str): if isinstance(subject, str):
parts.append(f'SUBJECT "{subject}"') parts.append(f'SUBJECT "{subject}"')
return f"({' '.join(parts)})" if parts else "ALL" return f"({' '.join(parts)})" if parts else "ALL"
@ -54,7 +52,6 @@ async def retrieve_otp(
*, *,
sender: MatchSpec = None, sender: MatchSpec = None,
subject: MatchSpec = None, subject: MatchSpec = None,
match_field: str = "from",
folders: Optional[Iterable[str]] = None, folders: Optional[Iterable[str]] = None,
patterns: Sequence[Union[str, Pattern]] = DEFAULT_PATTERNS, patterns: Sequence[Union[str, Pattern]] = DEFAULT_PATTERNS,
lengths: Iterable[int] = DEFAULT_LENGTHS, lengths: Iterable[int] = DEFAULT_LENGTHS,
@ -67,18 +64,14 @@ async def retrieve_otp(
) -> Optional[str]: ) -> Optional[str]:
"""return the newest OTP matching the filters, or None """return the newest OTP matching the filters, or None
sender/subject accept a substring, a compiled regex, or a callable. `match_field` sender/subject accept a substring, a compiled regex, or a callable. folders,
selects which header the `sender` spec is matched against: "from" (default) patterns, code lengths, max age and retry behavior are all tunable. set
matches the sender address; "to" matches the recipient address (the per-user `max_age=None` to disable the freshness check.
alias the code was sent to) and additionally accepts a forwarded match on the
From header, so a forwarded code still resolves. folders, patterns, code lengths,
max age and retry behavior are all tunable. set `max_age=None` to disable the
freshness check.
""" """
folders = list(folders) if folders is not None else list(DEFAULT_FOLDERS) folders = list(folders) if folders is not None else list(DEFAULT_FOLDERS)
sender_ok = as_predicate(sender) sender_ok = as_predicate(sender)
subject_ok = as_predicate(subject) subject_ok = as_predicate(subject)
query = _server_query(sender, subject, match_field) query = _server_query(sender, subject)
for attempt in range(retries + 1): for attempt in range(retries + 1):
for folder in folders: for folder in folders:
@ -98,12 +91,7 @@ async def retrieve_otp(
from_hdr = message.get("From", "") from_hdr = message.get("From", "")
subj_hdr = message.get("Subject", "") subj_hdr = message.get("Subject", "")
if match_field == "to": if not sender_ok(from_hdr) or not subject_ok(subj_hdr):
to_hdr = message.get("To", "")
matched = sender_ok(to_hdr) or sender_ok(from_hdr)
else:
matched = sender_ok(from_hdr)
if not matched or not subject_ok(subj_hdr):
continue continue
code = extract_code(message, patterns=patterns, lengths=lengths) code = extract_code(message, patterns=patterns, lengths=lengths)