From 3a47ef6aed372e81fc6defc3b02d19879fe8a0fc Mon Sep 17 00:00:00 2001 From: Jose Quintana <1700322+joseluisq@users.noreply.github.com> Date: Sun, 9 Jul 2023 11:41:47 +0200 Subject: [PATCH] feat: replacements support for URL Rewrites destination (#235) * feat: placeholders support for url rewrites destination example: ```toml [advanced] [[advanced.rewrites]] source = "**/{*}.{png,gif}" destination = "/assets/$1.$2" * docs: rewrite destination replacements description [skip ci] --- Cargo.lock | 22 ++++++++++++++++++---- Cargo.toml | 6 ++++-- docs/content/features/url-rewrites.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- src/handler.rs | 44 +++++++++++++++++++++++++++++++++++++++----- src/settings/mod.rs | 21 ++++++++++++++++++++- tests/toml/config.toml | 4 ++-- 6 files changed, 135 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2bf5b6..0d95477 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1035,9 +1035,21 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.4" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick 1.0.2", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf" dependencies = [ "aho-corasick 1.0.2", "memchr", @@ -1046,9 +1058,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" [[package]] name = "ring" @@ -1285,6 +1297,7 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" name = "static-web-server" version = "2.19.0" dependencies = [ + "aho-corasick 1.0.2", "anyhow", "async-compression", "bcrypt", @@ -1304,6 +1317,7 @@ dependencies = [ "num_cpus", "percent-encoding", "pin-project", + "regex", "rustls-pemfile", "serde", "serde_ignored", diff --git a/Cargo.toml b/Cargo.toml index 3df089f..f96057b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,10 +52,13 @@ basic-auth = ["bcrypt"] fallback-page = [] [dependencies] +aho-corasick = "1.0" anyhow = "1.0" async-compression = { version = "0.4", default-features = false, optional = true, features = ["brotli", "deflate", "gzip", "zstd", "tokio"] } bcrypt = { version = "0.14", optional = true } bytes = "1.4" +chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true } +clap = { version = "4.3", features = ["derive", "env"] } form_urlencoded = "1.2" futures-util = { version = "0.3", default-features = false, features = ["sink"] } globset = { version = "0.4", features = ["serde1"] } @@ -69,12 +72,11 @@ mime_guess = "2.0" num_cpus = { version = "1.15" } percent-encoding = "2.3" pin-project = "1.1" +regex = "1.9" rustls-pemfile = { version = "1.0", optional = true } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_ignored = "0.1" serde_repr = "0.1" -clap = { version = "4.3", features = ["derive", "env"] } -chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true } tokio = { version = "1", default-features = false, features = ["rt-multi-thread", "macros", "fs", "io-util", "signal"] } tokio-rustls = { version = "0.24", optional = true } tokio-util = { version = "0.7", default-features = false, features = ["io"] } diff --git a/docs/content/features/url-rewrites.md b/docs/content/features/url-rewrites.md index 9e23ef1..f653746 100644 --- a/docs/content/features/url-rewrites.md +++ b/docs/content/features/url-rewrites.md @@ -6,24 +6,40 @@ URI rewrites are particularly useful with pattern matching ([globs](https://en.w ## Structure -The URL rewrite rules should be defined mainly as an [Array of Tables](https://toml.io/en/v1.0.0#array-of-tables). +URL rewrite rules should be defined mainly as an [Array of Tables](https://toml.io/en/v1.0.0#array-of-tables). Each table entry should have two key/value pairs: -- One `source` key containing a string _glob pattern_. -- One `destination` string containing the local file path. -- Optional `redirect` number containing the HTTP response code. +- `source`: a key containing a string _glob pattern_. +- `destination` a file path with optional replacements (placeholders). +- `redirect` an optional number containing the HTTP response code (redirection). !!! info "Note" The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern. ### Source -The source is a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file. +It's a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file. + +The glob pattern functionality is powered by the [globset](https://docs.rs/globset/latest/globset/) crate which supports Standard Unix-style glob syntax. + +!!! tip "Glob pattern syntax" + For more details about the Glob pattern syntax check out https://docs.rs/globset/latest/globset/#syntax ### Destination -A local file path must exist. It has to look something like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory. +The value can be either a local file path that maps to an existing file on the system or an external URL. +It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory. + +#### Replacements + +Additionally, a `destination` supports replacements for every Glob pattern group that matches against the `source`. + +Replacements order start from `0` to `n` and are defined with a dollar sign followed by an index (Glob pattern group occurrence). + +!!! tip "Group your Glob patterns" + When using replacements, also group your Glob pattern by surrounding them with curly braces so every group should map to its corresponding replacement.
+ For example: `source = "**/{*}.{png,gif}"` ### Redirect @@ -44,8 +60,37 @@ The values can be: source = "**/*.{png,ico,gif}" destination = "/assets/generic1.png" +# a. Route rewrite example with redirection [[advanced.rewrites]] source = "**/*.{jpg,jpeg}" destination = "/images/generic2.png" -redirect = 302 +## NOTE: `redirect` can be omitted too +redirect = 301 + +# b. Route rewrite example with destination replacements +[[advanced.rewrites]] +## Note that we're using curly braces to group the `*` wildcard. +## See https://docs.rs/globset/latest/globset/#syntax +source = "**/{*}.{png,gif}" +## For exmaple, the destination will result in `/assets/abcdef.png` +destination = "/assets/$1.$2" +``` + +If you request something like: + +```sh +curl -I http://localhost/abcdef.png +``` + +Then the Server logs should look like this: + +```log +2023-07-08T20:31:36.606035Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.png +2023-07-08T20:31:36.608439Z DEBUG static_web_server::handler: url rewrites glob patterns: ["$0", "$1", "$2"] +2023-07-08T20:31:36.608491Z DEBUG static_web_server::handler: url rewrites regex equivalent: (?-u:\b)(?:/?|.*/)(.*)\.(gif|png)$ +2023-07-08T20:31:36.608525Z DEBUG static_web_server::handler: url rewrites glob pattern captures: ["abcdef.png", "abcdef", "png"] +2023-07-08T20:31:36.608561Z DEBUG static_web_server::handler: url rewrites glob pattern destination: "/assets/$1.$2" +2023-07-08T20:31:36.609655Z DEBUG static_web_server::handler: url rewrites glob patterns destination replaced: "/assets/abcdef.png" +2023-07-08T20:31:36.609735Z TRACE static_web_server::static_files: dir: base="public", route="assets/abcdef.png" +... ``` diff --git a/src/handler.rs b/src/handler.rs index d5abf44..066e1b2 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -99,7 +99,7 @@ impl RequestHandler { let uri = req.uri(); let base_path = &self.opts.root_dir; - let mut uri_path = uri.path(); + let mut uri_path = uri.path().to_owned(); let uri_query = uri.query(); #[cfg(feature = "directory-listing")] let dir_listing = self.opts.dir_listing; @@ -205,7 +205,7 @@ impl RequestHandler { // Advanced options if let Some(advanced) = &self.opts.advanced_opts { // Redirects - if let Some(parts) = redirects::get_redirection(uri_path, &advanced.redirects) { + if let Some(parts) = redirects::get_redirection(&uri_path, &advanced.redirects) { let (uri_dest, status) = parts; match HeaderValue::from_str(uri_dest) { Ok(loc) => { @@ -232,10 +232,42 @@ impl RequestHandler { } // Rewrites - if let Some(rewrite) = rewrites::rewrite_uri_path(uri_path, &advanced.rewrites) { - uri_path = rewrite.destination.as_str(); + if let Some(rewrite) = + rewrites::rewrite_uri_path(&uri_path.clone(), &advanced.rewrites) + { + // Rewrites: Handle replacements (placeholders) + if let Some(regex_caps) = rewrite.source.captures(&uri_path) { + let caps_range = 0..regex_caps.len(); + let caps = caps_range + .clone() + .filter_map(|i| regex_caps.get(i).map(|s| s.as_str())) + .collect::>(); + + let patterns = caps_range + .map(|i| format!("${}", i)) + .collect::>(); + + let dest = rewrite.destination.as_str(); + + tracing::debug!("url rewrites glob pattern: {:?}", patterns); + tracing::debug!("url rewrites regex equivalent: {}", rewrite.source); + tracing::debug!("url rewrites glob pattern captures: {:?}", caps); + tracing::debug!("url rewrites glob pattern destination: {:?}", dest); + + if let Ok(ac) = aho_corasick::AhoCorasick::new(patterns) { + if let Ok(dest) = ac.try_replace_all(dest, &caps) { + tracing::debug!( + "url rewrites glob pattern destination replaced: {:?}", + dest + ); + uri_path = dest; + } + } + } + + // Rewrites: Handle redirections if let Some(redirect_type) = &rewrite.redirect { - let loc = match HeaderValue::from_str(uri_path) { + let loc = match HeaderValue::from_str(&uri_path) { Ok(val) => val, Err(err) => { tracing::error!("invalid header value from current uri: {:?}", err); @@ -259,6 +291,8 @@ impl RequestHandler { } } + let uri_path = &uri_path; + // Static files match static_files::handle(&HandleOpts { method, diff --git a/src/settings/mod.rs b/src/settings/mod.rs index b259bf3..03d786a 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -10,6 +10,7 @@ use clap::Parser; use globset::{Glob, GlobMatcher}; use headers::HeaderMap; use hyper::StatusCode; +use regex::Regex; use crate::{Context, Result}; @@ -34,7 +35,7 @@ pub struct Headers { /// The `Rewrites` file options. pub struct Rewrites { /// Source pattern glob matcher - pub source: GlobMatcher, + pub source: Regex, /// A local file that must exist pub destination: String, /// Optional redirect type either 301 (Moved Permanently) or 302 (Found). @@ -328,6 +329,24 @@ impl Settings { })? .compile_matcher(); + // NOTE: we don’t need Unicode-aware word boundary assertions, + // therefore we use (?-u:\b) instead of (?-u) + // so the former uses an ASCII-only definition of a word character. + // https://docs.rs/regex/latest/regex/#unicode-can-impact-memory-usage-and-search-speed + let pattern = source.glob().regex().replace("(?-u)^", "(?-u:\\b)"); + tracing::debug!( + "url rewrites glob pattern: {}", + &rewrites_entry.source + ); + tracing::debug!("url rewrites regex equivalent: {}", pattern); + + let source = Regex::new(&pattern).with_context(|| { + format!( + "can not compile regex pattern equivalent for rewrite source: {}", + &pattern + ) + })?; + rewrites_vec.push(Rewrites { source, destination: rewrites_entry.destination.to_owned(), diff --git a/tests/toml/config.toml b/tests/toml/config.toml index f551404..8935779 100644 --- a/tests/toml/config.toml +++ b/tests/toml/config.toml @@ -109,8 +109,8 @@ kind = 302 ### URL Rewrites [[advanced.rewrites]] -source = "**/*.{png,gif}" -destination = "/assets/favicon.ico" +source = "**/{*}.{png,gif}" +destination = "/assets/$1.$2" # redirect = 301 [[advanced.rewrites]] -- libgit2 1.7.2