Compare commits
No commits in common. "main" and "v0.1.0" have entirely different histories.
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,5 +1,5 @@
|
|||||||
# claude
|
# claude
|
||||||
.claude/
|
CLAUDE.md
|
||||||
|
|
||||||
# python
|
# python
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
27
README.md
27
README.md
@ -22,17 +22,17 @@ you want; importing the package never fails because an extra is missing.
|
|||||||
`requirements.txt` (pick the extra you need):
|
`requirements.txt` (pick the extra you need):
|
||||||
|
|
||||||
```
|
```
|
||||||
aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
||||||
aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
||||||
aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3
|
aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
Direct:
|
Direct:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install "aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
pip install "aioweb_tls[curl] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
||||||
pip install "aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
pip install "aioweb_tls[noble] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
||||||
pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.3"
|
pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb_tls.git@v0.1.0"
|
||||||
```
|
```
|
||||||
|
|
||||||
- `[curl]` → curl_cffi backend · `[noble]` → noble_tls backend · `[all]` → both.
|
- `[curl]` → curl_cffi backend · `[noble]` → noble_tls backend · `[all]` → both.
|
||||||
@ -44,8 +44,6 @@ pip install "aioweb_tls[all] @ git+ssh://git@git.rethinkstudios.io/rethink-publi
|
|||||||
Constructing a backend whose client isn't installed raises that `RuntimeError` at
|
Constructing a backend whose client isn't installed raises that `RuntimeError` at
|
||||||
construction, never at import.
|
construction, never at import.
|
||||||
|
|
||||||
Drop the `@v0.1.3` suffix from the line above to install the latest unpinned.
|
|
||||||
|
|
||||||
## curl_cffi backend
|
## curl_cffi backend
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@ -57,11 +55,8 @@ async with TLSSession(backend=CurlCffi(impersonate="chrome"), proxies={"https":
|
|||||||
print(resp.json()["tls"]["ja3"])
|
print(resp.json()["tls"]["ja3"])
|
||||||
```
|
```
|
||||||
|
|
||||||
- `CurlCffi(impersonate="chrome")` sets the forged profile; override it per call by
|
- `CurlCffi(impersonate="chrome")` sets the forged profile; override per call by
|
||||||
passing `impersonate=` to the low-level `request()` (which forwards `**kwargs` to the
|
passing `impersonate=` to any request method.
|
||||||
backend). `request_with_retries` has a fixed signature and does **not** accept extra
|
|
||||||
backend kwargs — passing `impersonate=` there raises `TypeError`; set the profile on
|
|
||||||
the `CurlCffi` instance for the retrying path.
|
|
||||||
- curl_cffi forges JA3/JA4 + HTTP/2 fingerprints via the bundled curl-impersonate binary.
|
- curl_cffi forges JA3/JA4 + HTTP/2 fingerprints via the bundled curl-impersonate binary.
|
||||||
|
|
||||||
## noble backend
|
## noble backend
|
||||||
@ -78,8 +73,8 @@ async with TLSSession(backend=Noble(client="chrome_133")) as s:
|
|||||||
|
|
||||||
- `Noble(client="chrome_133")` — accepts a `noble_tls.Client` enum or a string name.
|
- `Noble(client="chrome_133")` — accepts a `noble_tls.Client` enum or a string name.
|
||||||
- noble_tls downloads a Go shared library on first use. `await s.setup()` fetches it
|
- noble_tls downloads a Go shared library on first use. `await s.setup()` fetches it
|
||||||
once at startup; if you skip it, the first request fetches it lazily. The fetch is
|
once at startup; if you skip it, the first request fetches it lazily (guarded to run
|
||||||
guarded by a lock, so even concurrent first requests download it exactly once.
|
once).
|
||||||
|
|
||||||
## Writing your own backend (the `TLSBackend` protocol)
|
## Writing your own backend (the `TLSBackend` protocol)
|
||||||
|
|
||||||
@ -175,4 +170,4 @@ are separate signals. Use this as one component, not a complete anti-bot solutio
|
|||||||
|
|
||||||
## Versioning
|
## Versioning
|
||||||
|
|
||||||
Releases are tagged `vX.Y.Z`. The install line above pins a release; drop the `@vX.Y.Z` suffix to install the latest unpinned. Pin deliberately for reproducible installs.
|
Tagged `vX.Y.Z`. Pin the tag in `requirements.txt`.
|
||||||
|
|||||||
@ -4,11 +4,11 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "aioweb_tls"
|
name = "aioweb_tls"
|
||||||
version = "0.1.3"
|
version = "0.1.0"
|
||||||
description = "TLS-fingerprinting backends (curl_cffi / noble_tls) for aioweb via one injectable TLSSession, config-free, installable."
|
description = "TLS-fingerprinting backends for aioweb — curl_cffi / noble_tls ExtendedSession subclasses, config-free, installable."
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aioweb @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb.git@v0.1.5",
|
"aioweb @ git+ssh://git@git.rethinkstudios.io/rethink-public/aioweb.git@v0.1.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
@ -11,27 +11,12 @@ is not installed raises a clear RuntimeError naming the extra to install. import
|
|||||||
this module never fails because an extra is missing.
|
this module never fails because an extra is missing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
import math
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
from aioweb import Response
|
from aioweb import Response
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _as_client_error(error: Exception, backend: str) -> aiohttp.ClientError:
|
|
||||||
"""wrap a backend-native network exception as an aiohttp.ClientError
|
|
||||||
|
|
||||||
aioweb's request() only re-wraps aiohttp.ClientError; curl_cffi raises
|
|
||||||
RequestException(OSError) and noble_tls raises TLSClientException(IOError), neither
|
|
||||||
of which is an aiohttp.ClientError. translating here gives TLS backends the same
|
|
||||||
typed failure contract as the aiohttp path on the bare request() route.
|
|
||||||
"""
|
|
||||||
return aiohttp.ClientError(f"{backend} request failed: {error}")
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from curl_cffi import AsyncSession as _CurlAsyncSession
|
from curl_cffi import AsyncSession as _CurlAsyncSession
|
||||||
_CURL_ERROR = None
|
_CURL_ERROR = None
|
||||||
@ -60,12 +45,7 @@ def _coerce_timeout(value):
|
|||||||
|
|
||||||
|
|
||||||
def _jar_to_dict(session):
|
def _jar_to_dict(session):
|
||||||
"""best-effort map of a requests-style cookie jar on session to a plain dict
|
"""best-effort map of a requests-style cookie jar on session to a plain dict"""
|
||||||
|
|
||||||
intentionally broad: this feeds preview() only, the two backends expose differently-
|
|
||||||
shaped jars, and a cookie read must never crash a request — so any jar that doesn't
|
|
||||||
iterate cleanly degrades to {} rather than raising.
|
|
||||||
"""
|
|
||||||
jar = getattr(session, "cookies", None)
|
jar = getattr(session, "cookies", None)
|
||||||
if not jar:
|
if not jar:
|
||||||
return {}
|
return {}
|
||||||
@ -80,10 +60,7 @@ class CurlCffi:
|
|||||||
|
|
||||||
config:
|
config:
|
||||||
impersonate: browser profile to forge (default "chrome"); override per call
|
impersonate: browser profile to forge (default "chrome"); override per call
|
||||||
by passing impersonate= to the low-level request()/_raw_request path,
|
by passing impersonate= to any request method.
|
||||||
which forwards **kwargs to the backend. NOT request_with_retries — its
|
|
||||||
signature is fixed (no **kwargs) and would raise TypeError. for a
|
|
||||||
per-call profile under retries, set it on the CurlCffi instance instead.
|
|
||||||
|
|
||||||
requires the [curl] extra (pip install "aioweb_tls[curl]").
|
requires the [curl] extra (pip install "aioweb_tls[curl]").
|
||||||
"""
|
"""
|
||||||
@ -112,18 +89,10 @@ class CurlCffi:
|
|||||||
if proxy:
|
if proxy:
|
||||||
kwargs["proxy"] = proxy
|
kwargs["proxy"] = proxy
|
||||||
|
|
||||||
try:
|
response = await session.request(method, url, impersonate=impersonate, **kwargs)
|
||||||
response = await session.request(method, url, impersonate=impersonate, **kwargs)
|
content = response.content
|
||||||
except aiohttp.ClientError:
|
if content is None:
|
||||||
raise
|
content = response.text.encode() if response.text else b""
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise
|
|
||||||
except OSError as error:
|
|
||||||
# curl_cffi's RequestException subclasses OSError; translate the native
|
|
||||||
# network error into aiohttp.ClientError. narrowed from a bare Exception so a
|
|
||||||
# real bug (AttributeError/TypeError) isn't laundered into 'client error'
|
|
||||||
raise _as_client_error(error, "curl_cffi") from error
|
|
||||||
content = response.content if response.content is not None else b""
|
|
||||||
return Response(
|
return Response(
|
||||||
status_code=response.status_code,
|
status_code=response.status_code,
|
||||||
headers=dict(response.headers),
|
headers=dict(response.headers),
|
||||||
@ -134,17 +103,8 @@ class CurlCffi:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def is_closed(self, session) -> bool:
|
def is_closed(self, session) -> bool:
|
||||||
"""whether the curl_cffi session is closed
|
"""whether the curl_cffi session is closed"""
|
||||||
|
return bool(getattr(session, "closed", False))
|
||||||
curl_cffi tracks closed state in the private `_closed` (no public `closed`
|
|
||||||
property), so read that; fall back to a public `closed` if a future version
|
|
||||||
adds one. TLSSession's own `_closed` flag is the primary signal — this is a
|
|
||||||
best-effort backend check for out-of-band closes.
|
|
||||||
"""
|
|
||||||
closed = getattr(session, "_closed", None)
|
|
||||||
if closed is None:
|
|
||||||
closed = getattr(session, "closed", False)
|
|
||||||
return bool(closed)
|
|
||||||
|
|
||||||
def cookies_for_url(self, session, url) -> dict:
|
def cookies_for_url(self, session, url) -> dict:
|
||||||
"""cookies curl_cffi would send for url (best-effort)"""
|
"""cookies curl_cffi would send for url (best-effort)"""
|
||||||
@ -175,7 +135,6 @@ class Noble:
|
|||||||
) from _NOBLE_ERROR
|
) from _NOBLE_ERROR
|
||||||
self.client = self._resolve_client(client)
|
self.client = self._resolve_client(client)
|
||||||
self._updated = False
|
self._updated = False
|
||||||
self._setup_lock = asyncio.Lock()
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _resolve_client(client):
|
def _resolve_client(client):
|
||||||
@ -185,42 +144,24 @@ class Noble:
|
|||||||
return client
|
return client
|
||||||
|
|
||||||
async def setup(self) -> None:
|
async def setup(self) -> None:
|
||||||
"""fetch the noble_tls Go shared library once; idempotent and concurrency-safe
|
"""fetch the noble_tls Go shared library once; idempotent
|
||||||
|
|
||||||
uses noble_tls.download_if_necessary (the current API: it fetches the asset on
|
download_if_necessary handles the first-time fetch (no lib present);
|
||||||
first use and no-ops when it already exists). older noble_tls without that name
|
update_if_necessary refreshes an existing one. try download first so a
|
||||||
is handled via update_if_necessary as a fallback.
|
clean environment works, falling back to update.
|
||||||
|
|
||||||
guarded by an asyncio.Lock with a check-lock-recheck so concurrent first
|
|
||||||
requests don't both run the fetch: the fast path returns once _updated is
|
|
||||||
set, and only the first caller through the lock does the work.
|
|
||||||
"""
|
"""
|
||||||
if self._updated:
|
if self._updated:
|
||||||
return
|
return
|
||||||
async with self._setup_lock:
|
download = getattr(noble_tls, "download_if_necessary", None)
|
||||||
if self._updated:
|
if download is not None:
|
||||||
return
|
await download()
|
||||||
download = getattr(noble_tls, "download_if_necessary", None)
|
else:
|
||||||
if download is not None:
|
await noble_tls.update_if_necessary()
|
||||||
await download()
|
self._updated = True
|
||||||
elif hasattr(noble_tls, "update_if_necessary"):
|
|
||||||
await noble_tls.update_if_necessary()
|
|
||||||
self._updated = True
|
|
||||||
|
|
||||||
def create_session(self, headers, timeout, **kwargs):
|
def create_session(self, headers, timeout, **kwargs):
|
||||||
"""build the noble_tls Session, honoring session-default headers + timeout
|
"""build the noble_tls Session"""
|
||||||
|
return noble_tls.Session(client=self.client, **kwargs)
|
||||||
noble_tls.Session takes neither headers nor timeout in its constructor, so
|
|
||||||
aioweb's session-default headers (and the coerced timeout) are applied after
|
|
||||||
construction — matching CurlCffi, which passes both through. without this a
|
|
||||||
TLSSession(backend=Noble(...), headers=...) would silently drop the headers.
|
|
||||||
"""
|
|
||||||
session = noble_tls.Session(client=self.client, **kwargs)
|
|
||||||
if headers:
|
|
||||||
session.headers.update(headers)
|
|
||||||
if timeout is not None:
|
|
||||||
session.timeout_seconds = timeout
|
|
||||||
return session
|
|
||||||
|
|
||||||
async def raw_request(self, session, method, url, **kwargs) -> Response:
|
async def raw_request(self, session, method, url, **kwargs) -> Response:
|
||||||
"""send via noble_tls and adapt the result into an aioweb.Response"""
|
"""send via noble_tls and adapt the result into an aioweb.Response"""
|
||||||
@ -228,24 +169,13 @@ class Noble:
|
|||||||
|
|
||||||
timeout = _coerce_timeout(kwargs.pop("timeout", None))
|
timeout = _coerce_timeout(kwargs.pop("timeout", None))
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
# noble takes whole seconds; round UP so a sub-second timeout (e.g. 0.5)
|
kwargs["timeout_seconds"] = int(timeout)
|
||||||
# doesn't truncate to 0 (which would mean no/instant timeout)
|
|
||||||
kwargs["timeout_seconds"] = max(1, math.ceil(timeout))
|
|
||||||
|
|
||||||
proxy = kwargs.pop("proxy", None)
|
proxy = kwargs.pop("proxy", None)
|
||||||
if proxy:
|
if proxy:
|
||||||
kwargs["proxy"] = proxy
|
kwargs["proxy"] = proxy
|
||||||
|
|
||||||
try:
|
response = await session.execute_request(method=method.upper(), url=url, **kwargs)
|
||||||
response = await session.execute_request(method=method.upper(), url=url, **kwargs)
|
|
||||||
except aiohttp.ClientError:
|
|
||||||
raise
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
raise
|
|
||||||
except OSError as error:
|
|
||||||
# noble_tls's TLSClientException subclasses IOError (== OSError); translate
|
|
||||||
# the native network error, narrowed from bare Exception so a real bug surfaces
|
|
||||||
raise _as_client_error(error, "noble_tls") from error
|
|
||||||
content = getattr(response, "content", None)
|
content = getattr(response, "content", None)
|
||||||
if content is None:
|
if content is None:
|
||||||
text = getattr(response, "text", "") or ""
|
text = getattr(response, "text", "") or ""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user