Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ce0762a37c | |||
| ef9eb3010c | |||
| da8b0bf6f8 | |||
| 612861b76c | |||
| ccea880df0 | |||
| 5eb689ba73 | |||
| 87debe8465 | |||
| b2876d005e | |||
| ae4c653ecc | |||
| 7ea8ecf888 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,5 +1,5 @@
|
|||||||
# claude
|
# claude
|
||||||
CLAUDE.md
|
.claude/
|
||||||
|
|
||||||
# python
|
# python
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
27
README.md
27
README.md
@ -22,17 +22,17 @@ you want; importing the package never fails because an extra is missing.
|
|||||||
`requirements.txt` (pick the extra you need):
|
`requirements.txt` (pick the extra you need):
|
||||||
|
|
||||||
```
|
```
|
||||||
aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
||||||
aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
||||||
aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
||||||
```
|
```
|
||||||
|
|
||||||
Direct:
|
Direct:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
pip install "aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
||||||
pip install "aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
pip install "aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
||||||
pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
||||||
```
|
```
|
||||||
|
|
||||||
- `[curl]` → curl_cffi backend · `[noble]` → noble_tls backend · `[all]` → both.
|
- `[curl]` → curl_cffi backend · `[noble]` → noble_tls backend · `[all]` → both.
|
||||||
@ -44,6 +44,8 @@ pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-publi
|
|||||||
Constructing a backend whose client isn't installed raises that `RuntimeError` at
|
Constructing a backend whose client isn't installed raises that `RuntimeError` at
|
||||||
construction, never at import.
|
construction, never at import.
|
||||||
|
|
||||||
|
Drop the `@v0.1.3` suffix from the line above to install the latest unpinned.
|
||||||
|
|
||||||
## curl_cffi backend
|
## curl_cffi backend
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@ -55,8 +57,11 @@ async with TLSSession(backend=CurlCffi(impersonate="chrome"), proxies={"https":
|
|||||||
print(resp.json()["tls"]["ja3"])
|
print(resp.json()["tls"]["ja3"])
|
||||||
```
|
```
|
||||||
|
|
||||||
- `CurlCffi(impersonate="chrome")` sets the forged profile; override per call by
|
- `CurlCffi(impersonate="chrome")` sets the forged profile; override it per call by
|
||||||
passing `impersonate=` to any request method.
|
passing `impersonate=` to the low-level `request()` (which forwards `**kwargs` to the
|
||||||
|
backend). `request_with_retries` has a fixed signature and does **not** accept extra
|
||||||
|
backend kwargs — passing `impersonate=` there raises `TypeError`; set the profile on
|
||||||
|
the `CurlCffi` instance for the retrying path.
|
||||||
- curl_cffi forges JA3/JA4 + HTTP/2 fingerprints via the bundled curl-impersonate binary.
|
- curl_cffi forges JA3/JA4 + HTTP/2 fingerprints via the bundled curl-impersonate binary.
|
||||||
|
|
||||||
## noble backend
|
## noble backend
|
||||||
@ -73,8 +78,8 @@ async with TLSSession(backend=Noble(client="chrome_133")) as s:
|
|||||||
|
|
||||||
- `Noble(client="chrome_133")` — accepts a `noble_tls.Client` enum or a string name.
|
- `Noble(client="chrome_133")` — accepts a `noble_tls.Client` enum or a string name.
|
||||||
- noble_tls downloads a Go shared library on first use. `await s.setup()` fetches it
|
- noble_tls downloads a Go shared library on first use. `await s.setup()` fetches it
|
||||||
once at startup; if you skip it, the first request fetches it lazily (guarded to run
|
once at startup; if you skip it, the first request fetches it lazily. The fetch is
|
||||||
once).
|
guarded by a lock, so even concurrent first requests download it exactly once.
|
||||||
|
|
||||||
## Writing your own backend (the `TLSBackend` protocol)
|
## Writing your own backend (the `TLSBackend` protocol)
|
||||||
|
|
||||||
@ -170,4 +175,4 @@ are separate signals. Use this as one component, not a complete anti-bot solutio
|
|||||||
|
|
||||||
## Versioning
|
## Versioning
|
||||||
|
|
||||||
Tagged `vX.Y.Z`. Pin the tag in `requirements.txt`.
|
Releases are tagged `vX.Y.Z`. The install line above pins a release; drop the `@vX.Y.Z` suffix to install the latest unpinned. Pin deliberately for reproducible installs.
|
||||||
|
|||||||
@ -4,11 +4,11 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "aioweb_tls"
|
name = "aioweb_tls"
|
||||||
version = "0.1.0"
|
version = "0.1.3"
|
||||||
description = "TLS-fingerprinting backends for aioweb — curl_cffi / noble_tls ExtendedSession subclasses, config-free, installable."
|
description = "TLS-fingerprinting backends (curl_cffi / noble_tls) for aioweb via one injectable TLSSession, config-free, installable."
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aioweb @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb.git@v0.1.0",
|
"aioweb @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb.git@v0.1.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
@ -11,12 +11,27 @@ is not installed raises a clear RuntimeError naming the extra to install. import
|
|||||||
this module never fails because an extra is missing.
|
this module never fails because an extra is missing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import math
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
from aioweb import Response
|
from aioweb import Response
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _as_client_error(error: Exception, backend: str) -> aiohttp.ClientError:
|
||||||
|
"""wrap a backend-native network exception as an aiohttp.ClientError
|
||||||
|
|
||||||
|
aioweb's request() only re-wraps aiohttp.ClientError; curl_cffi raises
|
||||||
|
RequestException(OSError) and noble_tls raises TLSClientException(IOError), neither
|
||||||
|
of which is an aiohttp.ClientError. translating here gives TLS backends the same
|
||||||
|
typed failure contract as the aiohttp path on the bare request() route.
|
||||||
|
"""
|
||||||
|
return aiohttp.ClientError(f"{backend} request failed: {error}")
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from curl_cffi import AsyncSession as _CurlAsyncSession
|
from curl_cffi import AsyncSession as _CurlAsyncSession
|
||||||
_CURL_ERROR = None
|
_CURL_ERROR = None
|
||||||
@ -45,7 +60,12 @@ def _coerce_timeout(value):
|
|||||||
|
|
||||||
|
|
||||||
def _jar_to_dict(session):
|
def _jar_to_dict(session):
|
||||||
"""best-effort map of a requests-style cookie jar on session to a plain dict"""
|
"""best-effort map of a requests-style cookie jar on session to a plain dict
|
||||||
|
|
||||||
|
intentionally broad: this feeds preview() only, the two backends expose differently-
|
||||||
|
shaped jars, and a cookie read must never crash a request — so any jar that doesn't
|
||||||
|
iterate cleanly degrades to {} rather than raising.
|
||||||
|
"""
|
||||||
jar = getattr(session, "cookies", None)
|
jar = getattr(session, "cookies", None)
|
||||||
if not jar:
|
if not jar:
|
||||||
return {}
|
return {}
|
||||||
@ -60,7 +80,10 @@ class CurlCffi:
|
|||||||
|
|
||||||
config:
|
config:
|
||||||
impersonate: browser profile to forge (default "chrome"); override per call
|
impersonate: browser profile to forge (default "chrome"); override per call
|
||||||
by passing impersonate= to any request method.
|
by passing impersonate= to the low-level request()/_raw_request path,
|
||||||
|
which forwards **kwargs to the backend. NOT request_with_retries — its
|
||||||
|
signature is fixed (no **kwargs) and would raise TypeError. for a
|
||||||
|
per-call profile under retries, set it on the CurlCffi instance instead.
|
||||||
|
|
||||||
requires the [curl] extra (pip install "aioweb_tls[curl]").
|
requires the [curl] extra (pip install "aioweb_tls[curl]").
|
||||||
"""
|
"""
|
||||||
@ -89,10 +112,18 @@ class CurlCffi:
|
|||||||
if proxy:
|
if proxy:
|
||||||
kwargs["proxy"] = proxy
|
kwargs["proxy"] = proxy
|
||||||
|
|
||||||
|
try:
|
||||||
response = await session.request(method, url, impersonate=impersonate, **kwargs)
|
response = await session.request(method, url, impersonate=impersonate, **kwargs)
|
||||||
content = response.content
|
except aiohttp.ClientError:
|
||||||
if content is None:
|
raise
|
||||||
content = response.text.encode() if response.text else b""
|
except asyncio.TimeoutError:
|
||||||
|
raise
|
||||||
|
except OSError as error:
|
||||||
|
# curl_cffi's RequestException subclasses OSError; translate the native
|
||||||
|
# network error into aiohttp.ClientError. narrowed from a bare Exception so a
|
||||||
|
# real bug (AttributeError/TypeError) isn't laundered into 'client error'
|
||||||
|
raise _as_client_error(error, "curl_cffi") from error
|
||||||
|
content = response.content if response.content is not None else b""
|
||||||
return Response(
|
return Response(
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
headers=dict(response.headers),
|
headers=dict(response.headers),
|
||||||
@ -103,8 +134,17 @@ class CurlCffi:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def is_closed(self, session) -> bool:
|
def is_closed(self, session) -> bool:
|
||||||
"""whether the curl_cffi session is closed"""
|
"""whether the curl_cffi session is closed
|
||||||
return bool(getattr(session, "closed", False))
|
|
||||||
|
curl_cffi tracks closed state in the private `_closed` (no public `closed`
|
||||||
|
property), so read that; fall back to a public `closed` if a future version
|
||||||
|
adds one. TLSSession's own `_closed` flag is the primary signal — this is a
|
||||||
|
best-effort backend check for out-of-band closes.
|
||||||
|
"""
|
||||||
|
closed = getattr(session, "_closed", None)
|
||||||
|
if closed is None:
|
||||||
|
closed = getattr(session, "closed", False)
|
||||||
|
return bool(closed)
|
||||||
|
|
||||||
def cookies_for_url(self, session, url) -> dict:
|
def cookies_for_url(self, session, url) -> dict:
|
||||||
"""cookies curl_cffi would send for url (best-effort)"""
|
"""cookies curl_cffi would send for url (best-effort)"""
|
||||||
@ -135,6 +175,7 @@ class Noble:
|
|||||||
) from _NOBLE_ERROR
|
) from _NOBLE_ERROR
|
||||||
self.client = self._resolve_client(client)
|
self.client = self._resolve_client(client)
|
||||||
self._updated = False
|
self._updated = False
|
||||||
|
self._setup_lock = asyncio.Lock()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_client(client):
|
def _resolve_client(client):
|
||||||
@ -144,24 +185,42 @@ class Noble:
|
|||||||
return client
|
return client
|
||||||
|
|
||||||
async def setup(self) -> None:
|
async def setup(self) -> None:
|
||||||
"""fetch the noble_tls Go shared library once; idempotent
|
"""fetch the noble_tls Go shared library once; idempotent and concurrency-safe
|
||||||
|
|
||||||
download_if_necessary handles the first-time fetch (no lib present);
|
uses noble_tls.download_if_necessary (the current API: it fetches the asset on
|
||||||
update_if_necessary refreshes an existing one. try download first so a
|
first use and no-ops when it already exists). older noble_tls without that name
|
||||||
clean environment works, falling back to update.
|
is handled via update_if_necessary as a fallback.
|
||||||
|
|
||||||
|
guarded by an asyncio.Lock with a check-lock-recheck so concurrent first
|
||||||
|
requests don't both run the fetch: the fast path returns once _updated is
|
||||||
|
set, and only the first caller through the lock does the work.
|
||||||
"""
|
"""
|
||||||
|
if self._updated:
|
||||||
|
return
|
||||||
|
async with self._setup_lock:
|
||||||
if self._updated:
|
if self._updated:
|
||||||
return
|
return
|
||||||
download = getattr(noble_tls, "download_if_necessary", None)
|
download = getattr(noble_tls, "download_if_necessary", None)
|
||||||
if download is not None:
|
if download is not None:
|
||||||
await download()
|
await download()
|
||||||
else:
|
elif hasattr(noble_tls, "update_if_necessary"):
|
||||||
await noble_tls.update_if_necessary()
|
await noble_tls.update_if_necessary()
|
||||||
self._updated = True
|
self._updated = True
|
||||||
|
|
||||||
def create_session(self, headers, timeout, **kwargs):
|
def create_session(self, headers, timeout, **kwargs):
|
||||||
"""build the noble_tls Session"""
|
"""build the noble_tls Session, honoring session-default headers + timeout
|
||||||
return noble_tls.Session(client=self.client, **kwargs)
|
|
||||||
|
noble_tls.Session takes neither headers nor timeout in its constructor, so
|
||||||
|
aioweb's session-default headers (and the coerced timeout) are applied after
|
||||||
|
construction — matching CurlCffi, which passes both through. without this a
|
||||||
|
TLSSession(backend=Noble(...), headers=...) would silently drop the headers.
|
||||||
|
"""
|
||||||
|
session = noble_tls.Session(client=self.client, **kwargs)
|
||||||
|
if headers:
|
||||||
|
session.headers.update(headers)
|
||||||
|
if timeout is not None:
|
||||||
|
session.timeout_seconds = timeout
|
||||||
|
return session
|
||||||
|
|
||||||
async def raw_request(self, session, method, url, **kwargs) -> Response:
|
async def raw_request(self, session, method, url, **kwargs) -> Response:
|
||||||
"""send via noble_tls and adapt the result into an aioweb.Response"""
|
"""send via noble_tls and adapt the result into an aioweb.Response"""
|
||||||
@ -169,13 +228,24 @@ class Noble:
|
|||||||
|
|
||||||
timeout = _coerce_timeout(kwargs.pop("timeout", None))
|
timeout = _coerce_timeout(kwargs.pop("timeout", None))
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
kwargs["timeout_seconds"] = int(timeout)
|
# noble takes whole seconds; round UP so a sub-second timeout (e.g. 0.5)
|
||||||
|
# doesn't truncate to 0 (which would mean no/instant timeout)
|
||||||
|
kwargs["timeout_seconds"] = max(1, math.ceil(timeout))
|
||||||
|
|
||||||
proxy = kwargs.pop("proxy", None)
|
proxy = kwargs.pop("proxy", None)
|
||||||
if proxy:
|
if proxy:
|
||||||
kwargs["proxy"] = proxy
|
kwargs["proxy"] = proxy
|
||||||
|
|
||||||
|
try:
|
||||||
response = await session.execute_request(method=method.upper(), url=url, **kwargs)
|
response = await session.execute_request(method=method.upper(), url=url, **kwargs)
|
||||||
|
except aiohttp.ClientError:
|
||||||
|
raise
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
raise
|
||||||
|
except OSError as error:
|
||||||
|
# noble_tls's TLSClientException subclasses IOError (== OSError); translate
|
||||||
|
# the native network error, narrowed from bare Exception so a real bug surfaces
|
||||||
|
raise _as_client_error(error, "noble_tls") from error
|
||||||
content = getattr(response, "content", None)
|
content = getattr(response, "content", None)
|
||||||
if content is None:
|
if content is None:
|
||||||
text = getattr(response, "text", "") or ""
|
text = getattr(response, "text", "") or ""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user