Skip to main content

trillium_http/headers/
unknown_header_name.rs

1use super::{HeaderName, HeaderNameInner::UnknownHeader};
2use hashbrown::{Equivalent, HashSet};
3use smartcow::SmartCow;
4use std::{
5    cmp::Ordering,
6    fmt::{self, Debug, Display, Formatter},
7    hash::{Hash, Hasher},
8    ops::Deref,
9    sync::{OnceLock, RwLock},
10};
11
12#[derive(Clone)]
13pub(crate) struct UnknownHeaderName<'a>(SmartCow<'a>);
14
15impl UnknownHeaderName<'_> {
16    pub(crate) fn is_valid_lower(&self) -> bool {
17        // Lowercase tchar — the uppercase-letter branch is dropped because HTTP/2 and
18        // HTTP/3 require field names to be lowercase on the wire. Otherwise matches
19        // `is_tchar`.
20        !self.is_empty()
21            && self.chars().all(|c| {
22                matches!(c,
23                    'a'..='z'
24                    | '0'..='9'
25                    | '!'
26                    | '#'
27                    | '$'
28                    | '%'
29                    | '&'
30                    | '\''
31                    | '*'
32                    | '+'
33                    | '-'
34                    | '.'
35                    | '^'
36                    | '_'
37                    | '`'
38                    | '|'
39                    | '~',
40                )
41            })
42    }
43
44    pub(crate) fn into_lower(self) -> Self {
45        match self.0 {
46            SmartCow::Borrowed(borrowed) => {
47                if let Some(first_upper) = borrowed.chars().position(|c| c.is_ascii_uppercase()) {
48                    Self(SmartCow::Owned(
49                        borrowed[..first_upper]
50                            .chars()
51                            .chain(
52                                borrowed[first_upper..]
53                                    .chars()
54                                    .map(|c| c.to_ascii_lowercase()),
55                            )
56                            .collect(),
57                    ))
58                } else {
59                    Self(SmartCow::Borrowed(borrowed))
60                }
61            }
62            SmartCow::Owned(mut smart_string) => {
63                smart_string.make_ascii_lowercase();
64                Self(SmartCow::Owned(smart_string))
65            }
66        }
67    }
68}
69
70impl PartialOrd for UnknownHeaderName<'_> {
71    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
72        Some(self.cmp(other))
73    }
74}
75
76impl Ord for UnknownHeaderName<'_> {
77    fn cmp(&self, other: &Self) -> Ordering {
78        self.0.cmp(&*other.0)
79    }
80}
81
82impl PartialEq for UnknownHeaderName<'_> {
83    fn eq(&self, other: &Self) -> bool {
84        self.0.eq_ignore_ascii_case(&other.0)
85    }
86}
87
88impl Eq for UnknownHeaderName<'_> {}
89
90impl Hash for UnknownHeaderName<'_> {
91    fn hash<H: Hasher>(&self, state: &mut H) {
92        for c in self.0.as_bytes() {
93            c.to_ascii_lowercase().hash(state);
94        }
95    }
96}
97
98impl Debug for UnknownHeaderName<'_> {
99    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
100        Debug::fmt(&self.0, f)
101    }
102}
103
104impl Display for UnknownHeaderName<'_> {
105    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
106        Display::fmt(&self.0, f)
107    }
108}
109
110impl<'a> From<UnknownHeaderName<'a>> for HeaderName<'a> {
111    fn from(value: UnknownHeaderName<'a>) -> Self {
112        HeaderName(UnknownHeader(value))
113    }
114}
115
116impl<'a> From<&'a UnknownHeaderName<'_>> for HeaderName<'a> {
117    fn from(value: &'a UnknownHeaderName<'_>) -> Self {
118        HeaderName(UnknownHeader(value.reborrow()))
119    }
120}
121
122fn is_tchar(c: char) -> bool {
123    matches!(
124        c,
125        'a'..='z'
126        | 'A'..='Z'
127        | '0'..='9'
128        | '!'
129        | '#'
130        | '$'
131        | '%'
132        | '&'
133        | '\''
134        | '*'
135        | '+'
136        | '-'
137        | '.'
138        | '^'
139        | '_'
140        | '`'
141        | '|'
142        | '~'
143    )
144}
145
146impl UnknownHeaderName<'_> {
147    pub(crate) fn is_valid(&self) -> bool {
148        !self.is_empty() && self.0.chars().all(is_tchar)
149    }
150
151    pub(crate) fn into_owned(self) -> UnknownHeaderName<'static> {
152        UnknownHeaderName(self.0.into_owned())
153    }
154}
155
156impl<'a> UnknownHeaderName<'a> {
157    pub(crate) fn reborrow<'b: 'a>(&'b self) -> UnknownHeaderName<'b> {
158        Self(self.0.borrow())
159    }
160}
161
162impl From<String> for UnknownHeaderName<'static> {
163    fn from(value: String) -> Self {
164        Self(value.into())
165    }
166}
167
168impl<'a> From<&'a str> for UnknownHeaderName<'a> {
169    fn from(value: &'a str) -> Self {
170        Self(value.into())
171    }
172}
173
174impl<'a> From<SmartCow<'a>> for UnknownHeaderName<'a> {
175    fn from(value: SmartCow<'a>) -> Self {
176        Self(value)
177    }
178}
179
180impl<'a> From<UnknownHeaderName<'a>> for SmartCow<'a> {
181    fn from(value: UnknownHeaderName<'a>) -> Self {
182        value.0
183    }
184}
185
186impl Deref for UnknownHeaderName<'_> {
187    type Target = str;
188
189    fn deref(&self) -> &Self::Target {
190        &self.0
191    }
192}
193
194impl Equivalent<UnknownHeaderName<'_>> for &UnknownHeaderName<'_> {
195    fn equivalent(&self, key: &UnknownHeaderName<'_>) -> bool {
196        key.eq_ignore_ascii_case(self)
197    }
198}
199
200/// Process-global table of canonical lowercased `&'static str` for literal
201/// header names that contained uppercase characters in source. Pure-lowercase
202/// literals bypass this table entirely (no need to intern — they're already
203/// `&'static`).
204///
205/// `RwLock` because once the application has exercised each uppercase literal
206/// once, the table is steady-state read-only. The hasher is case-insensitive so
207/// we can probe with the original uppercase input without first allocating its
208/// lowercased form.
209///
210/// Bounded above by distinct uppercase-containing lowercased literal names in
211/// the binary.
212static LOWER_INTERN: OnceLock<RwLock<HashSet<InternKey>>> = OnceLock::new();
213
214/// Wrapper around `&'static str` whose `Hash`/`Eq` are case-insensitive on
215/// ASCII. Lets the interner store the canonical lowercased form once and probe
216/// with the original casing without rebuilding the lowercased string just to
217/// look it up.
218#[derive(Copy, Clone, Eq)]
219struct InternKey(&'static str);
220
221impl PartialEq for InternKey {
222    fn eq(&self, other: &Self) -> bool {
223        self.0.eq_ignore_ascii_case(other.0)
224    }
225}
226
227impl Hash for InternKey {
228    fn hash<H: Hasher>(&self, state: &mut H) {
229        for b in self.0.bytes() {
230            state.write_u8(b.to_ascii_lowercase());
231        }
232    }
233}
234
235fn intern_table() -> &'static RwLock<HashSet<InternKey>> {
236    LOWER_INTERN.get_or_init(|| RwLock::new(HashSet::new()))
237}
238
239/// Return a canonical lowercased `&'static str` for `s`.
240///
241/// - If `s` is already all-lowercase: returns `s` directly. No lock, no alloc.
242/// - Otherwise: probes the intern table with a case-insensitive hash. On hit, returns the stored
243///   canonical pointer. On miss, allocates the lowercased form, leaks it to obtain a `&'static
244///   str`, and inserts.
245///
246/// The leak is bounded by the number of distinct uppercase-containing lowercased
247/// literals in the binary — typically zero or single digits for well-behaved code.
248fn intern_lowercase(s: &'static str) -> &'static str {
249    if !s.bytes().any(|b| b.is_ascii_uppercase()) {
250        return s;
251    }
252    let probe = InternKey(s);
253    let table = intern_table();
254    {
255        let read = table.read().expect("intern table poisoned");
256        if let Some(hit) = read.get(&probe) {
257            return hit.0;
258        }
259    }
260    // Allocate-and-leak inside the write lock so two threads racing for the same
261    // uppercase literal don't both leak: the second arrival finds the first's
262    // insert via the post-acquire `get` and bails before allocating.
263    let mut write = table.write().expect("intern table poisoned");
264    if let Some(hit) = write.get(&probe) {
265        return hit.0;
266    }
267    let lowered: String = s.chars().map(|c| c.to_ascii_lowercase()).collect();
268    let leaked: &'static str = Box::leak(lowered.into_boxed_str());
269    write.insert(InternKey(leaked));
270    leaked
271}
272
273impl UnknownHeaderName<'static> {
274    /// Recover the underlying `&'static str` if this name is backed by a borrowed
275    /// reference into static memory (a literal or an interned lowercased literal).
276    /// Returns `None` for runtime-allocated names (`SmartCow::Owned`).
277    pub(crate) fn as_static_str(&self) -> Option<&'static str> {
278        match self.0 {
279            SmartCow::Borrowed(s) => Some(s),
280            SmartCow::Owned(_) => None,
281        }
282    }
283
284    /// Like [`Self::into_lower`], but for the uppercase-borrowed-static case it
285    /// interns the lowercased form via [`intern_lowercase`] instead of allocating
286    /// an Owned copy. The result is therefore *always* `SmartCow::Borrowed` (and
287    /// hence `&'static str`-recoverable via [`as_static_str`]) when the input was
288    /// `SmartCow::Borrowed`. `Owned` inputs fall back to the regular
289    /// [`Self::into_lower`] path and are not interned.
290    ///
291    /// [`as_static_str`]: Self::as_static_str
292    pub(crate) fn into_lower_static(self) -> Self {
293        match self.0 {
294            SmartCow::Borrowed(s) => {
295                if s.bytes().any(|b| b.is_ascii_uppercase()) {
296                    Self(SmartCow::Borrowed(intern_lowercase(s)))
297                } else {
298                    Self(SmartCow::Borrowed(s))
299                }
300            }
301            SmartCow::Owned(_) => self.into_lower(),
302        }
303    }
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309
310    fn ensure_interned(s: &'static str) -> &'static str {
311        intern_lowercase(s)
312    }
313
314    #[test]
315    fn intern_idempotent() {
316        let a = ensure_interned("X-Idempotent-Header");
317        let b = ensure_interned("X-Idempotent-Header");
318        assert_eq!(a, "x-idempotent-header");
319        assert!(
320            std::ptr::eq(a, b),
321            "intern must return identical &'static str on repeat uppercase input",
322        );
323    }
324
325    #[test]
326    fn intern_lowercase_input_is_passthrough() {
327        // Pure-lowercase input bypasses the intern table — caller's pointer is
328        // returned directly.
329        let original: &'static str = "x-already-lowercase";
330        let got = ensure_interned(original);
331        assert!(
332            std::ptr::eq(got, original),
333            "pure-lowercase literal should bypass interning entirely",
334        );
335    }
336
337    #[test]
338    fn intern_uppercase_then_lowercase_content_equal() {
339        let upper = ensure_interned("X-Cross-Casing-Header");
340        let lower = ensure_interned("x-cross-casing-header");
341        assert_eq!(upper, lower);
342        // Pointer identity may or may not hold depending on which call ran first.
343    }
344
345    #[test]
346    fn intern_case_insensitive_hash_collapses_uppercase_variants() {
347        // Two different uppercase castings of the same lowercased content must
348        // intern to the same pointer.
349        let a = ensure_interned("X-Mixed-Casing");
350        let b = ensure_interned("x-MIXED-casing");
351        assert!(
352            std::ptr::eq(a, b),
353            "case-insensitive hash must collapse uppercase variants",
354        );
355    }
356
357    #[test]
358    fn into_lower_static_borrowed_uppercase() {
359        let n = UnknownHeaderName(SmartCow::Borrowed("X-Static-Upper")).into_lower_static();
360        assert_eq!(n.as_static_str(), Some("x-static-upper"));
361    }
362
363    #[test]
364    fn into_lower_static_borrowed_lowercase_passthrough() {
365        let original: &'static str = "x-static-lower";
366        let n = UnknownHeaderName(SmartCow::Borrowed(original)).into_lower_static();
367        let got = n.as_static_str().unwrap();
368        assert!(
369            std::ptr::eq(got, original),
370            "already-lowercase literal should pass through without interning",
371        );
372    }
373
374    #[test]
375    fn into_lower_static_owned_stays_owned() {
376        let owned = UnknownHeaderName::from(String::from("X-Owned-Upper"));
377        let lowered = owned.into_lower_static();
378        assert_eq!(lowered.as_static_str(), None);
379        assert_eq!(&*lowered, "x-owned-upper");
380    }
381}