Skip to main content

trillium_client/
dns.rs

1//! DNS resolution over an encrypted transport, including SVCB and HTTPS resource
2//! records ([RFC 9460]). DNS-over-HTTPS (DoH, [RFC 8484]), DNS-over-TLS (DoT,
3//! [RFC 7858]), and DNS-over-QUIC (DoQ, [RFC 9250]) are supported; the [`Resolver`] /
4//! [`DnsTransport`] split keeps the shared query/cache/SVCB core ([`codec`])
5//! independent of the transport, so each transport is just one [`DnsTransport`]
6//! variant plus its own `exchange`.
7//!
8//! `hickory-proto` is used purely as a wire codec — building the query message and
9//! parsing the response (see [`codec`]). The actual IO is performed by [`Client`]
10//! itself (DoH over its own pool, DoT/DoQ over the connector), so this path inherits
11//! the client's runtime and TLS stack and pulls in no resolver of its own.
12//!
13//! Resolutions are kept in a [`DnsCache`] that is independent of the connection
14//! pool: evicting a pooled connection does not invalidate a still-live
15//! resolution, and a single resolution is shared across HTTP/1, HTTP/2, and
16//! HTTP/3 — so an origin reachable by Alt-Svc but without SVCB still need only
17//! be resolved once.
18//!
19//! [RFC 7858]: https://www.rfc-editor.org/rfc/rfc7858
20//! [RFC 8484]: https://www.rfc-editor.org/rfc/rfc8484
21//! [RFC 9250]: https://www.rfc-editor.org/rfc/rfc9250
22//! [RFC 9460]: https://www.rfc-editor.org/rfc/rfc9460
23//!
24//! [`Client`]: crate::Client
25
26mod codec;
27mod doh;
28mod doq;
29mod dot;
30mod framing;
31
32use crate::Client;
33use async_lock::OnceCell;
34pub(crate) use codec::Resolved;
35use codec::{build_query, parse_response};
36use dashmap::DashMap;
37use doh::Doh;
38use doq::Doq;
39use dot::Dot;
40use futures_lite::future;
41use hickory_proto::rr::RecordType;
42use std::{
43    future::Future,
44    io::{self, ErrorKind},
45    sync::Arc,
46    time::{Duration, Instant},
47};
48use trillium_http::Version;
49use trillium_server_common::{Connector, url::Url};
50
51/// Cache lifetime for the resolver endpoint's own bootstrap resolution, which
52/// comes from the system resolver rather than a DNS record with a TTL.
53const BOOTSTRAP_TTL: Duration = Duration::from_secs(300);
54
55/// TTL floor — DNS records with shorter TTLs are cached this long anyway, to
56/// avoid re-resolving on every request when an origin publishes a near-zero TTL.
57const MIN_TTL: Duration = Duration::from_secs(1);
58
59/// TTL ceiling — caps how stale a cached resolution can become regardless of the
60/// record's advertised TTL.
61const MAX_TTL: Duration = Duration::from_secs(3600);
62
63/// DNS resolution timeout when the request carries no overall timeout of its own. Bounds the whole
64/// resolution (connect plus the A/AAAA/HTTPS exchanges), so an unreachable resolver — or one that
65/// doesn't speak the configured transport at all, like a DoT host addressed over DoQ — surfaces as
66/// a clear DNS error rather than hanging indefinitely.
67const DEFAULT_DNS_TIMEOUT: Duration = Duration::from_secs(5);
68
69/// When the request *does* carry an overall timeout, DNS gets at most this fraction of it in the
70/// worst case, leaving the rest of the budget for the connection and response. DNS is normally
71/// fast, so this only matters when a resolver stalls: it fails the lookup with a clear DNS error
72/// roughly halfway through the budget rather than letting it consume the whole thing and surface as
73/// a generic request timeout.
74fn dns_timeout(request_timeout: Option<Duration>) -> Duration {
75    request_timeout.map_or(DEFAULT_DNS_TIMEOUT, |timeout| timeout / 2)
76}
77
78/// A host-keyed DNS cache shared across protocols and independent of the
79/// connection pool. Cheaply cloneable (Arc-backed).
80#[derive(Debug, Clone, Default)]
81pub(crate) struct DnsCache {
82    entries: Arc<DashMap<Box<str>, CacheEntry>>,
83    /// Per-host single-flight slots. While one resolution is in flight, concurrent resolves of
84    /// the same host await its result instead of each issuing their own A/AAAA/HTTPS queries.
85    /// Independent of `entries` (the TTL'd result cache) and reaped as resolutions complete.
86    in_flight: Arc<DashMap<Box<str>, Arc<OnceCell<Resolved>>>>,
87}
88
89#[derive(Debug, Clone)]
90struct CacheEntry {
91    resolved: Resolved,
92    expiry: Instant,
93}
94
95impl DnsCache {
96    /// Return the cached resolution for `host` if present and unexpired,
97    /// evicting it if it has expired.
98    pub(crate) fn get(&self, host: &str) -> Option<Resolved> {
99        let expired = {
100            let entry = self.entries.get(host)?;
101            if entry.expiry >= Instant::now() {
102                return Some(entry.resolved.clone());
103            }
104            true
105        };
106        if expired {
107            self.entries.remove(host);
108        }
109        None
110    }
111
112    /// Cache `resolved` for `host`, honoring `ttl` clamped to [`MIN_TTL`,
113    /// `MAX_TTL`].
114    pub(crate) fn insert(&self, host: &str, resolved: Resolved, ttl: Duration) {
115        let expiry = Instant::now() + ttl.clamp(MIN_TTL, MAX_TTL);
116        self.entries
117            .insert(host.into(), CacheEntry { resolved, expiry });
118    }
119
120    /// Resolve `host` through `query`, coalescing concurrent resolutions of the same host so a
121    /// burst issues one set of DNS queries rather than one per caller. The winning caller runs
122    /// `query` and populates the TTL cache; the rest await its result. On error each caller falls
123    /// back to its own attempt, so a transient failure neither poisons the cache nor wedges the
124    /// waiters.
125    pub(crate) async fn resolve_coalesced(
126        &self,
127        host: &str,
128        query: impl Future<Output = io::Result<(Resolved, Duration)>>,
129    ) -> io::Result<Resolved> {
130        if let Some(hit) = self.get(host) {
131            return Ok(hit);
132        }
133
134        let cell = self
135            .in_flight
136            .entry(host.into())
137            .or_insert_with(|| Arc::new(OnceCell::new()))
138            .clone();
139
140        // `get_or_try_init` runs `query` on exactly one caller; the rest await the same cell. If
141        // it errors (or the running future is cancelled), the cell stays uninitialized and another
142        // caller retries — no guard needed to avoid a wedged slot.
143        let resolved = cell
144            .get_or_try_init(|| async {
145                let (resolved, ttl) = query.await?;
146                self.insert(host, resolved.clone(), ttl);
147                Ok::<_, io::Error>(resolved)
148            })
149            .await
150            .cloned();
151
152        // Best-effort reap: holders of a clone of `cell` already have the value, and a leaked
153        // empty cell (every caller cancelled) self-heals on the next resolve.
154        self.in_flight.remove(host);
155        resolved
156    }
157}
158
159/// A configured DNS resolver: the transport that carries queries plus the cache that every
160/// resolution through it populates.
161///
162/// Query construction, caching, SVCB handling, and the A/AAAA/HTTPS orchestration are all
163/// transport-independent and live here; a [`DnsTransport`] variant supplies only the wire
164/// exchange and its own resolver host. Cheaply cloneable — the cache is Arc-backed, so clones
165/// share it.
166#[derive(Debug, Clone)]
167pub(crate) struct Resolver {
168    cache: DnsCache,
169    transport: DnsTransport,
170}
171
172/// The mechanism a [`Resolver`] uses to turn a wire-format query into a wire-format response.
173/// The single seam where a resolution touches the network; everything around it is shared.
174#[derive(Debug, Clone)]
175enum DnsTransport {
176    /// DNS-over-HTTPS: queries POST over the client's own connection pool.
177    Doh(Doh),
178    /// DNS-over-TLS: queries pipeline over a persistent TLS connection to the resolver.
179    Dot(Dot),
180    /// DNS-over-QUIC: each query rides its own bidi stream on a cached QUIC connection.
181    Doq(Doq),
182}
183
184impl Resolver {
185    /// Build a DoH resolver pointing at `resolver`.
186    pub(crate) fn doh(resolver: Url) -> Self {
187        Self {
188            cache: DnsCache::default(),
189            transport: DnsTransport::Doh(Doh::new(resolver, None)),
190        }
191    }
192
193    /// Build a DoH resolver pointing at `resolver`, pinning the connection to it to HTTP/3.
194    pub(crate) fn doh3(resolver: Url) -> Self {
195        Self {
196            cache: DnsCache::default(),
197            transport: DnsTransport::Doh(Doh::new(resolver, Some(Version::Http3))),
198        }
199    }
200
201    /// Build a DoT resolver pointing at `resolver`.
202    pub(crate) fn dot(resolver: Url) -> Self {
203        Self {
204            cache: DnsCache::default(),
205            transport: DnsTransport::Dot(Dot::new(resolver)),
206        }
207    }
208
209    /// Build a DoQ resolver pointing at `resolver`.
210    pub(crate) fn doq(resolver: Url) -> Self {
211        Self {
212            cache: DnsCache::default(),
213            transport: DnsTransport::Doq(Doq::new(resolver)),
214        }
215    }
216
217    /// Resolve `host:port` through the resolver, caching and returning the result.
218    ///
219    /// The resolver endpoint's own host is the single name resolved via the connector's system
220    /// resolver (it can't be looked up over itself); every other name is resolved over the
221    /// configured transport, so once a client opts in, its lookups don't reach the local/system
222    /// resolver at all.
223    pub(crate) async fn resolve(
224        &self,
225        client: &Client,
226        host: &str,
227        port: u16,
228        request_timeout: Option<Duration>,
229    ) -> io::Result<Resolved> {
230        let kind = self.transport.kind();
231        let endpoint = self.transport.resolver_endpoint();
232        let timeout = dns_timeout(request_timeout);
233        log::debug!("resolving {host}:{port} via {kind} ({endpoint})");
234        // Box the query future: resolving a host issues client requests that themselves resolve
235        // DNS (the resolver's own host), so this nests, and the query future holds the resolver
236        // `Conn`s inline. Boxing keeps that frame off the stack so the recursion can't overflow it.
237        let resolved = client
238            .connector()
239            .runtime()
240            .timeout(
241                timeout,
242                self.cache
243                    .resolve_coalesced(host, Box::pin(self.query_host(client, host, port))),
244            )
245            .await
246            .unwrap_or_else(|| {
247                Err(io::Error::new(
248                    ErrorKind::TimedOut,
249                    format!(
250                        "{kind} resolution of {host} via {endpoint} timed out after {timeout:?}; \
251                         the resolver may be unreachable or may not speak {kind}"
252                    ),
253                ))
254            });
255        match &resolved {
256            Ok(r) => log::debug!(
257                "resolved {host} to {} address(es), {} service binding(s)",
258                r.addrs.len(),
259                r.services.len()
260            ),
261            Err(e) => log::debug!("resolution of {host} failed: {e}"),
262        }
263        resolved
264    }
265
266    /// Issue the DNS queries for `host:port` and assemble the resolution plus its cache lifetime.
267    /// Does not touch the cache — coalescing and caching are the caller's concern.
268    async fn query_host(
269        &self,
270        client: &Client,
271        host: &str,
272        port: u16,
273    ) -> io::Result<(Resolved, Duration)> {
274        // The resolver's own host can't be looked up over itself, so it's resolved via the
275        // connector's system resolver instead (or given as an IP, skipping even that). Its TTL is
276        // a fixed bootstrap window since the system answer carries none.
277        if self.transport.resolver_host() == Some(host) {
278            let addrs = client
279                .connector()
280                .resolve(host, port)
281                .await?
282                .into_iter()
283                .map(|addr| addr.ip())
284                .collect();
285            return Ok((
286                Resolved {
287                    addrs,
288                    services: Vec::new(),
289                },
290                BOOTSTRAP_TTL,
291            ));
292        }
293
294        // A, AAAA, and HTTPS are separate questions (resolvers answer only the
295        // first question of a message), issued concurrently over the transport.
296        let (a, (aaaa, https)) = future::try_zip(
297            self.query(client, build_query(host, port, RecordType::A)?),
298            future::try_zip(
299                self.query(client, build_query(host, port, RecordType::AAAA)?),
300                self.query(client, build_query(host, port, RecordType::HTTPS)?),
301            ),
302        )
303        .await?;
304
305        let mut resolved = Resolved::default();
306        let mut min_ttl = MAX_TTL;
307        for (part, ttl) in [a, aaaa, https] {
308            resolved.merge(part);
309            min_ttl = min_ttl.min(ttl);
310        }
311        resolved.services.sort_by_key(|s| s.priority);
312
313        if !resolved.has_addrs() {
314            return Err(io::Error::new(
315                ErrorKind::NotFound,
316                format!("DNS resolver returned no addresses for {host}"),
317            ));
318        }
319
320        Ok((resolved, min_ttl))
321    }
322
323    /// Exchange a single wire-format query for a response over the transport and parse it.
324    async fn query(&self, client: &Client, query: Vec<u8>) -> io::Result<(Resolved, Duration)> {
325        let bytes = self.transport.exchange(client, query).await?;
326        parse_response(&bytes)
327    }
328}
329
330impl DnsTransport {
331    /// A short transport label for diagnostics.
332    fn kind(&self) -> &'static str {
333        match self {
334            DnsTransport::Doh(_) => "DoH",
335            DnsTransport::Dot(_) => "DoT",
336            DnsTransport::Doq(_) => "DoQ",
337        }
338    }
339
340    /// The resolver endpoint URL, for diagnostics.
341    fn resolver_endpoint(&self) -> &Url {
342        match self {
343            DnsTransport::Doh(doh) => doh.resolver(),
344            DnsTransport::Dot(dot) => dot.resolver(),
345            DnsTransport::Doq(doq) => doq.resolver(),
346        }
347    }
348
349    /// The resolver's own host — the one name bootstrapped via the connector rather than resolved
350    /// over the transport. `None` when the resolver is given as a bare IP.
351    fn resolver_host(&self) -> Option<&str> {
352        match self {
353            DnsTransport::Doh(doh) => doh.host(),
354            DnsTransport::Dot(dot) => dot.host(),
355            DnsTransport::Doq(doq) => doq.host(),
356        }
357    }
358
359    /// Carry one wire-format query to the resolver and return the wire-format response bytes.
360    async fn exchange(&self, client: &Client, query: Vec<u8>) -> io::Result<Vec<u8>> {
361        match self {
362            DnsTransport::Doh(doh) => doh.exchange(client, query).await,
363            DnsTransport::Dot(dot) => dot.exchange(client, query).await,
364            DnsTransport::Doq(doq) => doq.exchange(client, query).await,
365        }
366    }
367}
368
369impl Client {
370    /// Build a DoH resolver URL from a full URL or a bare host/IP. A missing scheme defaults to
371    /// `https`, and a missing path defaults to `/dns-query` — the ubiquitous convention, though
372    /// [RFC 8484] leaves the path to out-of-band configuration, so an explicit path is honored.
373    ///
374    /// [RFC 8484]: https://www.rfc-editor.org/rfc/rfc8484
375    fn doh_resolver_url(resolver: &str) -> Url {
376        let mut url = if resolver.contains("://") {
377            Url::parse(resolver)
378        } else {
379            Url::parse(&format!("https://{resolver}"))
380        }
381        .expect("DoH resolver must be a valid URL or host");
382        if matches!(url.path(), "" | "/") {
383            url.set_path("/dns-query");
384        }
385        url
386    }
387
388    /// Assign the client's single encrypted-DNS resolver, warning if one was already configured.
389    /// A client holds one resolver, so a later DNS configurator replaces an earlier one — almost
390    /// always a mistake rather than an intent.
391    fn set_resolver(&mut self, resolver: Resolver) {
392        if self.resolver.is_some() {
393            log::warn!(
394                "replacing an already-configured DNS resolver; encrypted-DNS resolvers are \
395                 mutually exclusive"
396            );
397        }
398        self.resolver = Some(resolver);
399    }
400
401    /// Route all DNS resolution for this client through the given DNS-over-HTTPS
402    /// ([RFC 8484]) resolver, including SVCB/HTTPS records ([RFC 9460]).
403    ///
404    /// `resolver` may be a full URL (`https://1.1.1.1/dns-query`) or a bare host or IP (`1.1.1.1`).
405    /// A missing scheme defaults to `https` and a missing path to `/dns-query`, so `1.1.1.1`,
406    /// `https://1.1.1.1`, and `https://1.1.1.1/dns-query` are equivalent; an explicit path is
407    /// honored, since [RFC 8484] leaves the path to out-of-band configuration. An IP avoids any
408    /// bootstrap lookup; a hostname is resolved once via the underlying connector and then cached
409    /// like any other name.
410    ///
411    /// A client holds a single DNS resolver; calling [`with_doh3`](Client::with_doh3),
412    /// [`with_dot`](Client::with_dot), or [`with_doq`](Client::with_doq) after this replaces it and
413    /// logs a warning.
414    ///
415    /// # Panics
416    ///
417    /// Panics if `resolver` is neither a valid URL nor a valid host.
418    ///
419    /// [RFC 8484]: https://www.rfc-editor.org/rfc/rfc8484
420    /// [RFC 9460]: https://www.rfc-editor.org/rfc/rfc9460
421    #[must_use]
422    pub fn with_doh(mut self, resolver: impl AsRef<str>) -> Self {
423        let url = Self::doh_resolver_url(resolver.as_ref());
424        self.set_resolver(Resolver::doh(url));
425        self
426    }
427
428    /// Route all DNS resolution for this client through the given DNS-over-HTTPS
429    /// ([RFC 8484]) resolver, forcing the connection to the resolver itself onto HTTP/3.
430    ///
431    /// Identical to [`with_doh`](Client::with_doh) except that the connection to the resolver is
432    /// pinned to HTTP/3 rather than negotiated (h1/h2) over ALPN. Use this for resolvers that serve
433    /// DoH over HTTP/3 but don't advertise it via [`Alt-Svc`][altsvc] — which would otherwise leave
434    /// the client on h2 indefinitely. Only the resolver connection is affected; requests to
435    /// resolved hosts pick their protocol from SVCB/Alt-Svc as usual.
436    ///
437    /// A client holds a single DNS resolver; calling [`with_doh`](Client::with_doh),
438    /// [`with_dot`](Client::with_dot), or [`with_doq`](Client::with_doq) after this replaces it and
439    /// logs a warning.
440    ///
441    /// # Panics
442    ///
443    /// Panics if the client is not HTTP/3-capable (build it with [`Client::new_with_quic`]), or if
444    /// `resolver` is neither a valid URL nor a valid host.
445    ///
446    /// [RFC 8484]: https://www.rfc-editor.org/rfc/rfc8484
447    /// [altsvc]: https://www.rfc-editor.org/rfc/rfc7838
448    #[must_use]
449    pub fn with_doh3(mut self, resolver: impl AsRef<str>) -> Self {
450        assert!(
451            self.h3().is_some(),
452            "with_doh3 requires an HTTP/3-capable client; build it with Client::new_with_quic"
453        );
454        let url = Self::doh_resolver_url(resolver.as_ref());
455        self.set_resolver(Resolver::doh3(url));
456        self
457    }
458
459    /// Route all DNS resolution for this client through the given DNS-over-TLS
460    /// ([RFC 7858]) resolver, including SVCB/HTTPS records ([RFC 9460]).
461    ///
462    /// `resolver` may be a full `https://` URL or a bare host or IP (`1.1.1.1`), which expands to
463    /// `https://<host>:853` — the registered DoT port. An IP avoids any bootstrap lookup; a
464    /// hostname is resolved once via the underlying connector and then cached like any other name.
465    ///
466    /// Each lookup opens a one-shot TLS connection to the resolver, so the client must be
467    /// configured with a TLS connector; a plaintext connector fails the lookup (and, because
468    /// resolution is fail-closed, the request) rather than falling back to the system resolver.
469    ///
470    /// A client holds a single DNS resolver; calling [`with_doh`](Client::with_doh),
471    /// [`with_doh3`](Client::with_doh3), or [`with_doq`](Client::with_doq) after this replaces it
472    /// and logs a warning.
473    ///
474    /// # Panics
475    ///
476    /// Panics if `resolver` is neither a valid URL nor a valid host.
477    ///
478    /// [RFC 7858]: https://www.rfc-editor.org/rfc/rfc7858
479    /// [RFC 9460]: https://www.rfc-editor.org/rfc/rfc9460
480    #[must_use]
481    pub fn with_dot(mut self, resolver: impl AsRef<str>) -> Self {
482        let resolver = resolver.as_ref();
483        let url = if resolver.contains("://") {
484            Url::parse(resolver)
485        } else {
486            Url::parse(&format!("https://{resolver}:853"))
487        }
488        .expect("with_dot requires a valid resolver host or URL");
489        self.set_resolver(Resolver::dot(url));
490        self
491    }
492
493    /// Route all DNS resolution for this client through the given DNS-over-QUIC
494    /// ([RFC 9250]) resolver, including SVCB/HTTPS records ([RFC 9460]).
495    ///
496    /// `resolver` may be a full `https://` URL or a bare host or IP (`1.1.1.1`), which expands to
497    /// `https://<host>:853` — the registered DoQ port. An IP avoids any bootstrap lookup; a
498    /// hostname is resolved once via the underlying connector and then cached like any other name.
499    ///
500    /// Queries ride a cached, multiplexed QUIC connection (one bidirectional stream per query)
501    /// established over this client's HTTP/3 UDP endpoint with the `doq` ALPN, independent of the
502    /// HTTP/3 connection pool. Resolution is fail-closed, like [`with_doh`](Client::with_doh).
503    ///
504    /// A client holds a single DNS resolver; calling [`with_doh`](Client::with_doh),
505    /// [`with_doh3`](Client::with_doh3), or [`with_dot`](Client::with_dot) after this replaces it
506    /// and logs a warning.
507    ///
508    /// # Panics
509    ///
510    /// Panics if the client is not HTTP/3-capable (build it with [`Client::new_with_quic`]), or if
511    /// `resolver` is neither a valid URL nor a valid host.
512    ///
513    /// [RFC 9250]: https://www.rfc-editor.org/rfc/rfc9250
514    /// [RFC 9460]: https://www.rfc-editor.org/rfc/rfc9460
515    #[must_use]
516    pub fn with_doq(mut self, resolver: impl AsRef<str>) -> Self {
517        assert!(
518            self.h3().is_some(),
519            "with_doq requires an HTTP/3-capable client; build it with Client::new_with_quic"
520        );
521        let resolver = resolver.as_ref();
522        let url = if resolver.contains("://") {
523            Url::parse(resolver)
524        } else {
525            Url::parse(&format!("https://{resolver}:853"))
526        }
527        .expect("with_doq requires a valid resolver host or URL");
528        self.set_resolver(Resolver::doq(url));
529        self
530    }
531}
532
533#[cfg(test)]
534mod tests {
535    use super::*;
536    use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
537
538    #[test]
539    fn cache_round_trips_and_expires() {
540        let cache = DnsCache::default();
541        let resolved = Resolved {
542            addrs: vec![
543                IpAddr::V4(Ipv4Addr::new(192, 0, 2, 9)),
544                IpAddr::V6(Ipv6Addr::LOCALHOST),
545            ],
546            services: Vec::new(),
547        };
548
549        cache.insert("example.com", resolved.clone(), Duration::from_secs(300));
550        assert_eq!(cache.get("example.com").unwrap().addrs.len(), 2);
551        assert!(cache.get("absent.example").is_none());
552
553        // A zero TTL is floored to MIN_TTL, so it's briefly live rather than
554        // immediately expired.
555        cache.insert("floor.example", resolved, Duration::ZERO);
556        assert!(cache.get("floor.example").is_some());
557    }
558}