Skip to main content

trillium_http/headers/
unknown_header_name.rs

1use super::{HeaderName, HeaderNameInner::UnknownHeader};
2use hashbrown::{Equivalent, HashSet};
3use smartcow::SmartCow;
4use std::{
5    cmp::Ordering,
6    fmt::{self, Debug, Display, Formatter},
7    hash::{Hash, Hasher},
8    ops::Deref,
9    sync::{OnceLock, RwLock},
10};
11
12#[derive(Clone)]
13pub(crate) struct UnknownHeaderName<'a>(SmartCow<'a>);
14
15impl UnknownHeaderName<'_> {
16    pub(crate) fn is_valid_lower(&self) -> bool {
17        // Lowercase tchar per RFC 9110 §5.6.2 — the uppercase-letter branch is dropped
18        // because HTTP/2 and HTTP/3 require field names to be lowercase on the wire
19        // (RFC 9113 §8.2.1, RFC 9114 §4.2). The set otherwise matches `is_tchar`.
20        !self.is_empty()
21            && self.chars().all(|c| {
22                matches!(c,
23                    'a'..='z'
24                    | '0'..='9'
25                    | '!'
26                    | '#'
27                    | '$'
28                    | '%'
29                    | '&'
30                    | '\''
31                    | '*'
32                    | '+'
33                    | '-'
34                    | '.'
35                    | '^'
36                    | '_'
37                    | '`'
38                    | '|'
39                    | '~',
40                )
41            })
42    }
43
44    pub(crate) fn into_lower(self) -> Self {
45        match self.0 {
46            SmartCow::Borrowed(borrowed) => {
47                if let Some(first_upper) = borrowed.chars().position(|c| c.is_ascii_uppercase()) {
48                    Self(SmartCow::Owned(
49                        borrowed[..first_upper]
50                            .chars()
51                            .chain(
52                                borrowed[first_upper..]
53                                    .chars()
54                                    .map(|c| c.to_ascii_lowercase()),
55                            )
56                            .collect(),
57                    ))
58                } else {
59                    Self(SmartCow::Borrowed(borrowed))
60                }
61            }
62            SmartCow::Owned(mut smart_string) => {
63                smart_string.make_ascii_lowercase();
64                Self(SmartCow::Owned(smart_string))
65            }
66        }
67    }
68}
69
70impl PartialOrd for UnknownHeaderName<'_> {
71    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
72        Some(self.cmp(other))
73    }
74}
75
76impl Ord for UnknownHeaderName<'_> {
77    fn cmp(&self, other: &Self) -> Ordering {
78        self.0.cmp(&*other.0)
79    }
80}
81
82impl PartialEq for UnknownHeaderName<'_> {
83    fn eq(&self, other: &Self) -> bool {
84        self.0.eq_ignore_ascii_case(&other.0)
85    }
86}
87
88impl Eq for UnknownHeaderName<'_> {}
89
90impl Hash for UnknownHeaderName<'_> {
91    fn hash<H: Hasher>(&self, state: &mut H) {
92        for c in self.0.as_bytes() {
93            c.to_ascii_lowercase().hash(state);
94        }
95    }
96}
97
98impl Debug for UnknownHeaderName<'_> {
99    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
100        Debug::fmt(&self.0, f)
101    }
102}
103
104impl Display for UnknownHeaderName<'_> {
105    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
106        Display::fmt(&self.0, f)
107    }
108}
109
110impl<'a> From<UnknownHeaderName<'a>> for HeaderName<'a> {
111    fn from(value: UnknownHeaderName<'a>) -> Self {
112        HeaderName(UnknownHeader(value))
113    }
114}
115
116impl<'a> From<&'a UnknownHeaderName<'_>> for HeaderName<'a> {
117    fn from(value: &'a UnknownHeaderName<'_>) -> Self {
118        HeaderName(UnknownHeader(value.reborrow()))
119    }
120}
121
122fn is_tchar(c: char) -> bool {
123    matches!(
124        c,
125        'a'..='z'
126        | 'A'..='Z'
127        | '0'..='9'
128        | '!'
129        | '#'
130        | '$'
131        | '%'
132        | '&'
133        | '\''
134        | '*'
135        | '+'
136        | '-'
137        | '.'
138        | '^'
139        | '_'
140        | '`'
141        | '|'
142        | '~'
143    )
144}
145
146impl UnknownHeaderName<'_> {
147    pub(crate) fn is_valid(&self) -> bool {
148        // token per https://www.rfc-editor.org/rfc/rfc9110#section-5.1
149        // tchar per https://www.rfc-editor.org/rfc/rfc9110#section-5.6.2
150        !self.is_empty() && self.0.chars().all(is_tchar)
151    }
152
153    pub(crate) fn into_owned(self) -> UnknownHeaderName<'static> {
154        UnknownHeaderName(self.0.into_owned())
155    }
156}
157
158impl<'a> UnknownHeaderName<'a> {
159    pub(crate) fn reborrow<'b: 'a>(&'b self) -> UnknownHeaderName<'b> {
160        Self(self.0.borrow())
161    }
162}
163
164impl From<String> for UnknownHeaderName<'static> {
165    fn from(value: String) -> Self {
166        Self(value.into())
167    }
168}
169
170impl<'a> From<&'a str> for UnknownHeaderName<'a> {
171    fn from(value: &'a str) -> Self {
172        Self(value.into())
173    }
174}
175
176impl<'a> From<SmartCow<'a>> for UnknownHeaderName<'a> {
177    fn from(value: SmartCow<'a>) -> Self {
178        Self(value)
179    }
180}
181
182impl<'a> From<UnknownHeaderName<'a>> for SmartCow<'a> {
183    fn from(value: UnknownHeaderName<'a>) -> Self {
184        value.0
185    }
186}
187
188impl Deref for UnknownHeaderName<'_> {
189    type Target = str;
190
191    fn deref(&self) -> &Self::Target {
192        &self.0
193    }
194}
195
196impl Equivalent<UnknownHeaderName<'_>> for &UnknownHeaderName<'_> {
197    fn equivalent(&self, key: &UnknownHeaderName<'_>) -> bool {
198        key.eq_ignore_ascii_case(self)
199    }
200}
201
202/// Process-global table of canonical lowercased `&'static str` for literal
203/// header names that contained uppercase characters in source. Pure-lowercase
204/// literals bypass this table entirely (no need to intern — they're already
205/// `&'static`).
206///
207/// `RwLock` because once the application has exercised each uppercase literal
208/// once, the table is steady-state read-only. The hasher is case-insensitive so
209/// we can probe with the original uppercase input without first allocating its
210/// lowercased form.
211///
212/// Bounded above by distinct uppercase-containing lowercased literal names in
213/// the binary.
214static LOWER_INTERN: OnceLock<RwLock<HashSet<InternKey>>> = OnceLock::new();
215
216/// Wrapper around `&'static str` whose `Hash`/`Eq` are case-insensitive on
217/// ASCII. Lets the interner store the canonical lowercased form once and probe
218/// with the original casing without rebuilding the lowercased string just to
219/// look it up.
220#[derive(Copy, Clone, Eq)]
221struct InternKey(&'static str);
222
223impl PartialEq for InternKey {
224    fn eq(&self, other: &Self) -> bool {
225        self.0.eq_ignore_ascii_case(other.0)
226    }
227}
228
229impl Hash for InternKey {
230    fn hash<H: Hasher>(&self, state: &mut H) {
231        for b in self.0.bytes() {
232            state.write_u8(b.to_ascii_lowercase());
233        }
234    }
235}
236
237fn intern_table() -> &'static RwLock<HashSet<InternKey>> {
238    LOWER_INTERN.get_or_init(|| RwLock::new(HashSet::new()))
239}
240
241/// Return a canonical lowercased `&'static str` for `s`.
242///
243/// - If `s` is already all-lowercase: returns `s` directly. No lock, no alloc. Pure-lowercase
244///   literals are already `&'static`; they do not need to be interned. (The QPACK observer keys by
245///   content equality, not pointer identity, so two distinct `&'static str` pointers with identical
246///   bytes collide in the same observer entry.)
247///
248/// - Otherwise: probes the intern table with a case-insensitive hash. On hit, returns the stored
249///   canonical pointer. On miss, allocates the lowercased form, leaks it to obtain a `&'static
250///   str`, and inserts.
251///
252/// The leak is bounded by the number of distinct uppercase-containing
253/// lowercased literals in the binary — typically zero or single digits for
254/// well-behaved code.
255fn intern_lowercase(s: &'static str) -> &'static str {
256    if !s.bytes().any(|b| b.is_ascii_uppercase()) {
257        return s;
258    }
259    let probe = InternKey(s);
260    let table = intern_table();
261    {
262        let read = table.read().expect("intern table poisoned");
263        if let Some(hit) = read.get(&probe) {
264            return hit.0;
265        }
266    }
267    let lowered: String = s.chars().map(|c| c.to_ascii_lowercase()).collect();
268    let leaked: &'static str = Box::leak(lowered.into_boxed_str());
269    let mut write = table.write().expect("intern table poisoned");
270    if let Some(hit) = write.get(&probe) {
271        return hit.0;
272    }
273    write.insert(InternKey(leaked));
274    leaked
275}
276
277impl UnknownHeaderName<'static> {
278    /// Recover the underlying `&'static str` if this name is backed by a borrowed
279    /// reference into static memory (a literal or an interned lowercased literal).
280    /// Returns `None` for runtime-allocated names (`SmartCow::Owned`), which are
281    /// excluded from cross-connection QPACK observer tracking.
282    pub(crate) fn as_static_str(&self) -> Option<&'static str> {
283        match self.0 {
284            SmartCow::Borrowed(s) => Some(s),
285            SmartCow::Owned(_) => None,
286        }
287    }
288
289    /// Like [`Self::into_lower`], but for the uppercase-borrowed-static case it
290    /// interns the lowercased form via [`intern_lowercase`] instead of allocating
291    /// an Owned copy. The result is therefore *always* `SmartCow::Borrowed` (and
292    /// hence `&'static str`-recoverable via [`as_static_str`]) when the input was
293    /// `SmartCow::Borrowed`. `Owned` inputs fall back to the regular
294    /// [`Self::into_lower`] path and are not interned.
295    ///
296    /// [`as_static_str`]: Self::as_static_str
297    pub(crate) fn into_lower_static(self) -> Self {
298        match self.0 {
299            SmartCow::Borrowed(s) => {
300                if s.bytes().any(|b| b.is_ascii_uppercase()) {
301                    Self(SmartCow::Borrowed(intern_lowercase(s)))
302                } else {
303                    Self(SmartCow::Borrowed(s))
304                }
305            }
306            SmartCow::Owned(_) => self.into_lower(),
307        }
308    }
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    fn ensure_interned(s: &'static str) -> &'static str {
316        intern_lowercase(s)
317    }
318
319    #[test]
320    fn intern_idempotent() {
321        let a = ensure_interned("X-Idempotent-Header");
322        let b = ensure_interned("X-Idempotent-Header");
323        assert_eq!(a, "x-idempotent-header");
324        assert!(
325            std::ptr::eq(a, b),
326            "intern must return identical &'static str on repeat uppercase input",
327        );
328    }
329
330    #[test]
331    fn intern_lowercase_input_is_passthrough() {
332        // Pure-lowercase input never enters the intern table — caller's pointer
333        // is returned directly. The observer keys by content, not pointer
334        // identity, so cross-casing duplicates collide on content alone.
335        let original: &'static str = "x-already-lowercase";
336        let got = ensure_interned(original);
337        assert!(
338            std::ptr::eq(got, original),
339            "pure-lowercase literal should bypass interning entirely",
340        );
341    }
342
343    #[test]
344    fn intern_uppercase_then_lowercase_content_equal() {
345        let upper = ensure_interned("X-Cross-Casing-Header");
346        let lower = ensure_interned("x-cross-casing-header");
347        assert_eq!(upper, lower);
348        // (Pointer identity may or may not hold depending on which call ran first;
349        // observer correctness depends on content equality only.)
350    }
351
352    #[test]
353    fn intern_case_insensitive_hash_collapses_uppercase_variants() {
354        // Two different uppercase castings of the same lowercased content must
355        // intern to the same pointer.
356        let a = ensure_interned("X-Mixed-Casing");
357        let b = ensure_interned("x-MIXED-casing");
358        assert!(
359            std::ptr::eq(a, b),
360            "case-insensitive hash must collapse uppercase variants",
361        );
362    }
363
364    #[test]
365    fn into_lower_static_borrowed_uppercase() {
366        let n = UnknownHeaderName(SmartCow::Borrowed("X-Static-Upper")).into_lower_static();
367        assert_eq!(n.as_static_str(), Some("x-static-upper"));
368    }
369
370    #[test]
371    fn into_lower_static_borrowed_lowercase_passthrough() {
372        let original: &'static str = "x-static-lower";
373        let n = UnknownHeaderName(SmartCow::Borrowed(original)).into_lower_static();
374        let got = n.as_static_str().unwrap();
375        assert!(
376            std::ptr::eq(got, original),
377            "already-lowercase literal should pass through without interning",
378        );
379    }
380
381    #[test]
382    fn into_lower_static_owned_stays_owned() {
383        let owned = UnknownHeaderName::from(String::from("X-Owned-Upper"));
384        let lowered = owned.into_lower_static();
385        assert_eq!(lowered.as_static_str(), None);
386        assert_eq!(&*lowered, "x-owned-upper");
387    }
388}