From 7c66c5ccb6ec6b1a32cd9662854a6f2d813a8310 Mon Sep 17 00:00:00 2001 From: Jose Quintana <1700322+joseluisq@users.noreply.github.com> Date: Tue, 11 Jul 2023 23:32:53 +0200 Subject: [PATCH] feat: replacements support for URL Redirects destination (#239) example: ```toml [advanced] [[advanced.redirects]] source = "**/{*}.{jpg,jpeg,svg}" destination = "http://localhost/images/$1.$2" kind = 301 ``` --- docs/content/features/url-redirects.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ docs/content/features/url-rewrites.md | 19 ++++++++++--------- src/handler.rs | 49 ++++++++++++++++++++++++++++++++++++++++--------- src/redirects.rs | 6 ++---- src/settings/mod.rs | 24 +++++++++++++++++++++--- tests/toml/config.toml | 4 ++-- 6 files changed, 127 insertions(+), 33 deletions(-) diff --git a/docs/content/features/url-redirects.md b/docs/content/features/url-redirects.md index d088315..da32d89 100644 --- a/docs/content/features/url-redirects.md +++ b/docs/content/features/url-redirects.md @@ -1,6 +1,6 @@ # URL Redirects -**SWS** provides the ability to redirect request URLs with pattern matching support. +**SWS** provides the ability to redirect request URLs with Glob pattern-matching support. URI redirects are particularly useful with pattern matching ([globs](https://en.wikipedia.org/wiki/Glob_(programming))). Use them for example to prevent broken links if you've moved a page or to shorten URLs. @@ -10,9 +10,9 @@ The URL redirect rules should be defined mainly as an [Array of Tables](https:// Each table entry should have the following key/value pairs: -- One `source` key containing a string _glob pattern_. -- One `destination` string containing the local file path or a full URL. -- One `kind` number containing the HTTP response code. +- `source`: key containing a string _glob pattern_. +- `destination`: local file path or a full URL with optional replacements (placeholders). +- `kind`: optional number containing the HTTP response code (redirection). !!! info "Note" The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern. @@ -21,13 +21,29 @@ Each table entry should have the following key/value pairs: The source is a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file. +The glob pattern functionality is powered by the [globset](https://docs.rs/globset/latest/globset/) crate which supports Standard Unix-style glob syntax. + +!!! tip "Glob pattern syntax" + For more details about the Glob pattern syntax check out https://docs.rs/globset/latest/globset/#syntax + ### Destination -A local file path must exist. It can be a local path `/some/directory/file.html` or a full URL. It is worth noting that the `/` at the beginning indicates the server's root directory. +The value can be either a local file path that maps to an existing file on the system or an external URL. +It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory. + +#### Replacements + +Additionally, a `destination` supports replacements for every Glob pattern group that matches against the `source`. + +Replacements order start from `0` to `n` and are defined with a dollar sign followed by an index (Glob pattern group occurrence). + +!!! tip "Group your Glob patterns" + When using replacements, also group your Glob pattern by surrounding them with curly braces so every group should map to its corresponding replacement.
+ For example: `source = "**/{*}.{jpg,jpeg,svg}"` ### Kind -It indicates the HTTP response code. +It is a number that indicates the HTTP response code (redirect). The values can be: - `301` for "Moved Permanently" @@ -40,13 +56,43 @@ The values can be: ### URL Redirects +# a. Simple route redirect example (existing file) [[advanced.redirects]] source = "**/*.{jpg,jpeg}" destination = "/images/generic1.png" kind = 301 +# b. Simple route redirect example (external URL) [[advanced.redirects]] source = "/index.html" destination = "https://static-web-server.net" kind = 302 + +# c. Simple route redirect example with destination replacements +[[advanced.redirects]] +## Note that we're using curly braces to group the `*` wildcard. +## See https://docs.rs/globset/latest/globset/#syntax +source = "**/{*}.{jpg,jpeg,svg}" +## For example, the destination will result in `http://localhost/assets/abcdef.jpeg` +destination = "http://localhost/assets/$1.$2" +kind = 301 +``` + +If you request something like: + +```sh +curl -I http://localhost:4433/abcdef.jpeg +``` + +Then the server logs should look something like this: + +```log +2023-07-11T21:11:22.217358Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.jpeg +2023-07-11T21:11:22.217974Z DEBUG static_web_server::handler: url redirects glob pattern: ["$0", "$1", "$2"] +2023-07-11T21:11:22.217992Z DEBUG static_web_server::handler: url redirects regex equivalent: (?-u:\b)(?:/?|.*/)(.*)\.(jpeg|jpg)$ +2023-07-11T21:11:22.218002Z DEBUG static_web_server::handler: url redirects glob pattern captures: ["abcdef.jpeg", "abcdef", "jpeg"] +2023-07-11T21:11:22.218076Z DEBUG static_web_server::handler: url redirects glob pattern destination: "http://localhost/assets/$1.$2" +2023-07-11T21:11:22.218712Z DEBUG static_web_server::handler: url redirects glob pattern destination replaced: "http://localhost/assets/abcdef.jpeg" +2023-07-11T21:11:22.218739Z TRACE static_web_server::handler: uri matches redirects glob pattern, redirecting with status '301 Moved Permanently' +... ``` diff --git a/docs/content/features/url-rewrites.md b/docs/content/features/url-rewrites.md index f653746..3576b7f 100644 --- a/docs/content/features/url-rewrites.md +++ b/docs/content/features/url-rewrites.md @@ -1,6 +1,6 @@ # URL Rewrites -**SWS** provides the ability to rewrite request URLs with pattern-matching support. +**SWS** provides the ability to rewrite request URLs (routes) with Glob pattern-matching support. URI rewrites are particularly useful with pattern matching ([globs](https://en.wikipedia.org/wiki/Glob_(programming))), as the server can accept any URL that matches the pattern and let the client-side code decide what to display. @@ -10,9 +10,9 @@ URL rewrite rules should be defined mainly as an [Array of Tables](https://toml. Each table entry should have two key/value pairs: -- `source`: a key containing a string _glob pattern_. -- `destination` a file path with optional replacements (placeholders). -- `redirect` an optional number containing the HTTP response code (redirection). +- `source`: key containing a string _glob pattern_. +- `destination`: file path with optional replacements (placeholders). +- `redirect`: optional number containing the HTTP response code (redirection). !!! info "Note" The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern. @@ -28,7 +28,7 @@ The glob pattern functionality is powered by the [globset](https://docs.rs/globs ### Destination -The value can be either a local file path that maps to an existing file on the system or an external URL. +The value can be either a local file path that maps to an existing file on the system or an external URL (URLs only in case of redirection). It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory. #### Replacements @@ -56,23 +56,24 @@ The values can be: ### URL Rewrites +# a. Simple route rewrite example [[advanced.rewrites]] source = "**/*.{png,ico,gif}" destination = "/assets/generic1.png" -# a. Route rewrite example with redirection +# b. Route rewrite example with redirection [[advanced.rewrites]] source = "**/*.{jpg,jpeg}" destination = "/images/generic2.png" ## NOTE: `redirect` can be omitted too redirect = 301 -# b. Route rewrite example with destination replacements +# c. Route rewrite example with destination replacements [[advanced.rewrites]] ## Note that we're using curly braces to group the `*` wildcard. ## See https://docs.rs/globset/latest/globset/#syntax source = "**/{*}.{png,gif}" -## For exmaple, the destination will result in `/assets/abcdef.png` +## For example, the destination will result in `/assets/abcdef.png` destination = "/assets/$1.$2" ``` @@ -82,7 +83,7 @@ If you request something like: curl -I http://localhost/abcdef.png ``` -Then the Server logs should look like this: +Then the server logs should look something like this: ```log 2023-07-08T20:31:36.606035Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.png diff --git a/src/handler.rs b/src/handler.rs index 0e8fea3..7448256 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -232,16 +232,47 @@ impl RequestHandler { // Advanced options if let Some(advanced) = &self.opts.advanced_opts { // Redirects - if let Some(parts) = redirects::get_redirection(&uri_path, &advanced.redirects) { - let (uri_dest, status) = parts; - match HeaderValue::from_str(uri_dest) { + if let Some(redirects) = + redirects::get_redirection(uri_path.clone().as_str(), &advanced.redirects) + { + // Redirects: Handle replacements (placeholders) + if let Some(regex_caps) = redirects.source.captures(uri_path.as_str()) { + let caps_range = 0..regex_caps.len(); + let caps = caps_range + .clone() + .filter_map(|i| regex_caps.get(i).map(|s| s.as_str())) + .collect::>(); + + let patterns = caps_range + .map(|i| format!("${}", i)) + .collect::>(); + + let dest = redirects.destination.as_str(); + + tracing::debug!("url redirects glob pattern: {:?}", patterns); + tracing::debug!("url redirects regex equivalent: {}", redirects.source); + tracing::debug!("url redirects glob pattern captures: {:?}", caps); + tracing::debug!("url redirects glob pattern destination: {:?}", dest); + + if let Ok(ac) = aho_corasick::AhoCorasick::new(patterns) { + if let Ok(dest) = ac.try_replace_all(dest, &caps) { + tracing::debug!( + "url redirects glob pattern destination replaced: {:?}", + dest + ); + uri_path = dest; + } + } + } + + match HeaderValue::from_str(uri_path.as_str()) { Ok(loc) => { let mut resp = Response::new(Body::empty()); resp.headers_mut().insert(hyper::header::LOCATION, loc); - *resp.status_mut() = *status; + *resp.status_mut() = redirects.kind; tracing::trace!( - "uri matches redirect pattern, redirecting with status {}", - status.canonical_reason().unwrap_or_default() + "uri matches redirects glob pattern, redirecting with status '{}'", + redirects.kind ); return Ok(resp); } @@ -260,10 +291,10 @@ impl RequestHandler { // Rewrites if let Some(rewrite) = - rewrites::rewrite_uri_path(&uri_path.clone(), &advanced.rewrites) + rewrites::rewrite_uri_path(uri_path.clone().as_str(), &advanced.rewrites) { // Rewrites: Handle replacements (placeholders) - if let Some(regex_caps) = rewrite.source.captures(&uri_path) { + if let Some(regex_caps) = rewrite.source.captures(uri_path.as_str()) { let caps_range = 0..regex_caps.len(); let caps = caps_range .clone() @@ -294,7 +325,7 @@ impl RequestHandler { // Rewrites: Handle redirections if let Some(redirect_type) = &rewrite.redirect { - let loc = match HeaderValue::from_str(&uri_path) { + let loc = match HeaderValue::from_str(uri_path.as_str()) { Ok(val) => val, Err(err) => { tracing::error!("invalid header value from current uri: {:?}", err); diff --git a/src/redirects.rs b/src/redirects.rs index c056180..76a9729 100644 --- a/src/redirects.rs +++ b/src/redirects.rs @@ -6,8 +6,6 @@ //! Redirection module to handle config redirect URLs with pattern matching support. //! -use hyper::StatusCode; - use crate::settings::Redirects; /// It returns a redirect's destination path and status code if the current request uri @@ -15,12 +13,12 @@ use crate::settings::Redirects; pub fn get_redirection<'a>( uri_path: &'a str, redirects_opts_vec: &'a Option>, -) -> Option<(&'a str, &'a StatusCode)> { +) -> Option<&'a Redirects> { if let Some(redirects_vec) = redirects_opts_vec { for redirect_entry in redirects_vec.iter() { // Match source glob pattern against the request uri path if redirect_entry.source.is_match(uri_path) { - return Some((redirect_entry.destination.as_str(), &redirect_entry.kind)); + return Some(redirect_entry); } } } diff --git a/src/settings/mod.rs b/src/settings/mod.rs index d0f3757..9b165f2 100644 --- a/src/settings/mod.rs +++ b/src/settings/mod.rs @@ -34,7 +34,7 @@ pub struct Headers { /// The `Rewrites` file options. pub struct Rewrites { - /// Source pattern glob matcher + /// Source pattern Regex matcher pub source: Regex, /// A local file that must exist pub destination: String, @@ -44,8 +44,8 @@ pub struct Rewrites { /// The `Redirects` file options. pub struct Redirects { - /// Source pattern glob matcher - pub source: GlobMatcher, + /// Source pattern Regex matcher + pub source: Regex, /// A local file that must exist pub destination: String, /// Redirection type either 301 (Moved Permanently) or 302 (Found) @@ -378,6 +378,24 @@ impl Settings { })? .compile_matcher(); + // NOTE: we don’t need Unicode-aware word boundary assertions, + // therefore we use (?-u:\b) instead of (?-u) + // so the former uses an ASCII-only definition of a word character. + // https://docs.rs/regex/latest/regex/#unicode-can-impact-memory-usage-and-search-speed + let pattern = source.glob().regex().replace("(?-u)^", "(?-u:\\b)"); + tracing::debug!( + "url rewrites glob pattern: {}", + &redirects_entry.source + ); + tracing::debug!("url rewrites regex equivalent: {}", pattern); + + let source = Regex::new(&pattern).with_context(|| { + format!( + "can not compile regex pattern equivalent for rewrite source: {}", + &pattern + ) + })?; + let status_code = redirects_entry.kind.to_owned() as u16; redirects_vec.push(Redirects { source, diff --git a/tests/toml/config.toml b/tests/toml/config.toml index 8935779..fe9849b 100644 --- a/tests/toml/config.toml +++ b/tests/toml/config.toml @@ -97,8 +97,8 @@ headers.Strict-Transport-Security = "max-age=63072000; includeSubDomains; preloa ### URL Redirects [[advanced.redirects]] -source = "**/*.{jpg,jpeg}" -destination = "/images/generic1.png" +source = "**/{*}.{jpg,jpeg}" +destination = "http://localhost/$1.$2" kind = 301 [[advanced.redirects]] -- libgit2 1.7.2