From 7c66c5ccb6ec6b1a32cd9662854a6f2d813a8310 Mon Sep 17 00:00:00 2001
From: Jose Quintana <1700322+joseluisq@users.noreply.github.com>
Date: Tue, 11 Jul 2023 23:32:53 +0200
Subject: [PATCH] feat: replacements support for URL Redirects destination (#239)
example:
```toml
[advanced]
[[advanced.redirects]]
source = "**/{*}.{jpg,jpeg,svg}"
destination = "http://localhost/images/$1.$2"
kind = 301
```
---
docs/content/features/url-redirects.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
docs/content/features/url-rewrites.md | 19 ++++++++++---------
src/handler.rs | 49 ++++++++++++++++++++++++++++++++++++++++---------
src/redirects.rs | 6 ++----
src/settings/mod.rs | 24 +++++++++++++++++++++---
tests/toml/config.toml | 4 ++--
6 files changed, 127 insertions(+), 33 deletions(-)
diff --git a/docs/content/features/url-redirects.md b/docs/content/features/url-redirects.md
index d088315..da32d89 100644
--- a/docs/content/features/url-redirects.md
+++ b/docs/content/features/url-redirects.md
@@ -1,6 +1,6 @@
# URL Redirects
-**SWS** provides the ability to redirect request URLs with pattern matching support.
+**SWS** provides the ability to redirect request URLs with Glob pattern-matching support.
URI redirects are particularly useful with pattern matching ([globs](https://en.wikipedia.org/wiki/Glob_(programming))). Use them for example to prevent broken links if you've moved a page or to shorten URLs.
@@ -10,9 +10,9 @@ The URL redirect rules should be defined mainly as an [Array of Tables](https://
Each table entry should have the following key/value pairs:
-- One `source` key containing a string _glob pattern_.
-- One `destination` string containing the local file path or a full URL.
-- One `kind` number containing the HTTP response code.
+- `source`: key containing a string _glob pattern_.
+- `destination`: local file path or a full URL with optional replacements (placeholders).
+- `kind`: optional number containing the HTTP response code (redirection).
!!! info "Note"
The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern.
@@ -21,13 +21,29 @@ Each table entry should have the following key/value pairs:
The source is a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file.
+The glob pattern functionality is powered by the [globset](https://docs.rs/globset/latest/globset/) crate which supports Standard Unix-style glob syntax.
+
+!!! tip "Glob pattern syntax"
+ For more details about the Glob pattern syntax check out https://docs.rs/globset/latest/globset/#syntax
+
### Destination
-A local file path must exist. It can be a local path `/some/directory/file.html` or a full URL. It is worth noting that the `/` at the beginning indicates the server's root directory.
+The value can be either a local file path that maps to an existing file on the system or an external URL.
+It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.
+
+#### Replacements
+
+Additionally, a `destination` supports replacements for every Glob pattern group that matches against the `source`.
+
+Replacements order start from `0` to `n` and are defined with a dollar sign followed by an index (Glob pattern group occurrence).
+
+!!! tip "Group your Glob patterns"
+ When using replacements, also group your Glob pattern by surrounding them with curly braces so every group should map to its corresponding replacement.
+ For example: `source = "**/{*}.{jpg,jpeg,svg}"`
### Kind
-It indicates the HTTP response code.
+It is a number that indicates the HTTP response code (redirect).
The values can be:
- `301` for "Moved Permanently"
@@ -40,13 +56,43 @@ The values can be:
### URL Redirects
+# a. Simple route redirect example (existing file)
[[advanced.redirects]]
source = "**/*.{jpg,jpeg}"
destination = "/images/generic1.png"
kind = 301
+# b. Simple route redirect example (external URL)
[[advanced.redirects]]
source = "/index.html"
destination = "https://static-web-server.net"
kind = 302
+
+# c. Simple route redirect example with destination replacements
+[[advanced.redirects]]
+## Note that we're using curly braces to group the `*` wildcard.
+## See https://docs.rs/globset/latest/globset/#syntax
+source = "**/{*}.{jpg,jpeg,svg}"
+## For example, the destination will result in `http://localhost/assets/abcdef.jpeg`
+destination = "http://localhost/assets/$1.$2"
+kind = 301
+```
+
+If you request something like:
+
+```sh
+curl -I http://localhost:4433/abcdef.jpeg
+```
+
+Then the server logs should look something like this:
+
+```log
+2023-07-11T21:11:22.217358Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.jpeg
+2023-07-11T21:11:22.217974Z DEBUG static_web_server::handler: url redirects glob pattern: ["$0", "$1", "$2"]
+2023-07-11T21:11:22.217992Z DEBUG static_web_server::handler: url redirects regex equivalent: (?-u:\b)(?:/?|.*/)(.*)\.(jpeg|jpg)$
+2023-07-11T21:11:22.218002Z DEBUG static_web_server::handler: url redirects glob pattern captures: ["abcdef.jpeg", "abcdef", "jpeg"]
+2023-07-11T21:11:22.218076Z DEBUG static_web_server::handler: url redirects glob pattern destination: "http://localhost/assets/$1.$2"
+2023-07-11T21:11:22.218712Z DEBUG static_web_server::handler: url redirects glob pattern destination replaced: "http://localhost/assets/abcdef.jpeg"
+2023-07-11T21:11:22.218739Z TRACE static_web_server::handler: uri matches redirects glob pattern, redirecting with status '301 Moved Permanently'
+...
```
diff --git a/docs/content/features/url-rewrites.md b/docs/content/features/url-rewrites.md
index f653746..3576b7f 100644
--- a/docs/content/features/url-rewrites.md
+++ b/docs/content/features/url-rewrites.md
@@ -1,6 +1,6 @@
# URL Rewrites
-**SWS** provides the ability to rewrite request URLs with pattern-matching support.
+**SWS** provides the ability to rewrite request URLs (routes) with Glob pattern-matching support.
URI rewrites are particularly useful with pattern matching ([globs](https://en.wikipedia.org/wiki/Glob_(programming))), as the server can accept any URL that matches the pattern and let the client-side code decide what to display.
@@ -10,9 +10,9 @@ URL rewrite rules should be defined mainly as an [Array of Tables](https://toml.
Each table entry should have two key/value pairs:
-- `source`: a key containing a string _glob pattern_.
-- `destination` a file path with optional replacements (placeholders).
-- `redirect` an optional number containing the HTTP response code (redirection).
+- `source`: key containing a string _glob pattern_.
+- `destination`: file path with optional replacements (placeholders).
+- `redirect`: optional number containing the HTTP response code (redirection).
!!! info "Note"
The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern.
@@ -28,7 +28,7 @@ The glob pattern functionality is powered by the [globset](https://docs.rs/globs
### Destination
-The value can be either a local file path that maps to an existing file on the system or an external URL.
+The value can be either a local file path that maps to an existing file on the system or an external URL (URLs only in case of redirection).
It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.
#### Replacements
@@ -56,23 +56,24 @@ The values can be:
### URL Rewrites
+# a. Simple route rewrite example
[[advanced.rewrites]]
source = "**/*.{png,ico,gif}"
destination = "/assets/generic1.png"
-# a. Route rewrite example with redirection
+# b. Route rewrite example with redirection
[[advanced.rewrites]]
source = "**/*.{jpg,jpeg}"
destination = "/images/generic2.png"
## NOTE: `redirect` can be omitted too
redirect = 301
-# b. Route rewrite example with destination replacements
+# c. Route rewrite example with destination replacements
[[advanced.rewrites]]
## Note that we're using curly braces to group the `*` wildcard.
## See https://docs.rs/globset/latest/globset/#syntax
source = "**/{*}.{png,gif}"
-## For exmaple, the destination will result in `/assets/abcdef.png`
+## For example, the destination will result in `/assets/abcdef.png`
destination = "/assets/$1.$2"
```
@@ -82,7 +83,7 @@ If you request something like:
curl -I http://localhost/abcdef.png
```
-Then the Server logs should look like this:
+Then the server logs should look something like this:
```log
2023-07-08T20:31:36.606035Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.png
diff --git a/src/handler.rs b/src/handler.rs
index 0e8fea3..7448256 100644
--- a/src/handler.rs
+++ b/src/handler.rs
@@ -232,16 +232,47 @@ impl RequestHandler {
// Advanced options
if let Some(advanced) = &self.opts.advanced_opts {
// Redirects
- if let Some(parts) = redirects::get_redirection(&uri_path, &advanced.redirects) {
- let (uri_dest, status) = parts;
- match HeaderValue::from_str(uri_dest) {
+ if let Some(redirects) =
+ redirects::get_redirection(uri_path.clone().as_str(), &advanced.redirects)
+ {
+ // Redirects: Handle replacements (placeholders)
+ if let Some(regex_caps) = redirects.source.captures(uri_path.as_str()) {
+ let caps_range = 0..regex_caps.len();
+ let caps = caps_range
+ .clone()
+ .filter_map(|i| regex_caps.get(i).map(|s| s.as_str()))
+ .collect::>();
+
+ let patterns = caps_range
+ .map(|i| format!("${}", i))
+ .collect::>();
+
+ let dest = redirects.destination.as_str();
+
+ tracing::debug!("url redirects glob pattern: {:?}", patterns);
+ tracing::debug!("url redirects regex equivalent: {}", redirects.source);
+ tracing::debug!("url redirects glob pattern captures: {:?}", caps);
+ tracing::debug!("url redirects glob pattern destination: {:?}", dest);
+
+ if let Ok(ac) = aho_corasick::AhoCorasick::new(patterns) {
+ if let Ok(dest) = ac.try_replace_all(dest, &caps) {
+ tracing::debug!(
+ "url redirects glob pattern destination replaced: {:?}",
+ dest
+ );
+ uri_path = dest;
+ }
+ }
+ }
+
+ match HeaderValue::from_str(uri_path.as_str()) {
Ok(loc) => {
let mut resp = Response::new(Body::empty());
resp.headers_mut().insert(hyper::header::LOCATION, loc);
- *resp.status_mut() = *status;
+ *resp.status_mut() = redirects.kind;
tracing::trace!(
- "uri matches redirect pattern, redirecting with status {}",
- status.canonical_reason().unwrap_or_default()
+ "uri matches redirects glob pattern, redirecting with status '{}'",
+ redirects.kind
);
return Ok(resp);
}
@@ -260,10 +291,10 @@ impl RequestHandler {
// Rewrites
if let Some(rewrite) =
- rewrites::rewrite_uri_path(&uri_path.clone(), &advanced.rewrites)
+ rewrites::rewrite_uri_path(uri_path.clone().as_str(), &advanced.rewrites)
{
// Rewrites: Handle replacements (placeholders)
- if let Some(regex_caps) = rewrite.source.captures(&uri_path) {
+ if let Some(regex_caps) = rewrite.source.captures(uri_path.as_str()) {
let caps_range = 0..regex_caps.len();
let caps = caps_range
.clone()
@@ -294,7 +325,7 @@ impl RequestHandler {
// Rewrites: Handle redirections
if let Some(redirect_type) = &rewrite.redirect {
- let loc = match HeaderValue::from_str(&uri_path) {
+ let loc = match HeaderValue::from_str(uri_path.as_str()) {
Ok(val) => val,
Err(err) => {
tracing::error!("invalid header value from current uri: {:?}", err);
diff --git a/src/redirects.rs b/src/redirects.rs
index c056180..76a9729 100644
--- a/src/redirects.rs
+++ b/src/redirects.rs
@@ -6,8 +6,6 @@
//! Redirection module to handle config redirect URLs with pattern matching support.
//!
-use hyper::StatusCode;
-
use crate::settings::Redirects;
/// It returns a redirect's destination path and status code if the current request uri
@@ -15,12 +13,12 @@ use crate::settings::Redirects;
pub fn get_redirection<'a>(
uri_path: &'a str,
redirects_opts_vec: &'a Option>,
-) -> Option<(&'a str, &'a StatusCode)> {
+) -> Option<&'a Redirects> {
if let Some(redirects_vec) = redirects_opts_vec {
for redirect_entry in redirects_vec.iter() {
// Match source glob pattern against the request uri path
if redirect_entry.source.is_match(uri_path) {
- return Some((redirect_entry.destination.as_str(), &redirect_entry.kind));
+ return Some(redirect_entry);
}
}
}
diff --git a/src/settings/mod.rs b/src/settings/mod.rs
index d0f3757..9b165f2 100644
--- a/src/settings/mod.rs
+++ b/src/settings/mod.rs
@@ -34,7 +34,7 @@ pub struct Headers {
/// The `Rewrites` file options.
pub struct Rewrites {
- /// Source pattern glob matcher
+ /// Source pattern Regex matcher
pub source: Regex,
/// A local file that must exist
pub destination: String,
@@ -44,8 +44,8 @@ pub struct Rewrites {
/// The `Redirects` file options.
pub struct Redirects {
- /// Source pattern glob matcher
- pub source: GlobMatcher,
+ /// Source pattern Regex matcher
+ pub source: Regex,
/// A local file that must exist
pub destination: String,
/// Redirection type either 301 (Moved Permanently) or 302 (Found)
@@ -378,6 +378,24 @@ impl Settings {
})?
.compile_matcher();
+ // NOTE: we don’t need Unicode-aware word boundary assertions,
+ // therefore we use (?-u:\b) instead of (?-u)
+ // so the former uses an ASCII-only definition of a word character.
+ // https://docs.rs/regex/latest/regex/#unicode-can-impact-memory-usage-and-search-speed
+ let pattern = source.glob().regex().replace("(?-u)^", "(?-u:\\b)");
+ tracing::debug!(
+ "url rewrites glob pattern: {}",
+ &redirects_entry.source
+ );
+ tracing::debug!("url rewrites regex equivalent: {}", pattern);
+
+ let source = Regex::new(&pattern).with_context(|| {
+ format!(
+ "can not compile regex pattern equivalent for rewrite source: {}",
+ &pattern
+ )
+ })?;
+
let status_code = redirects_entry.kind.to_owned() as u16;
redirects_vec.push(Redirects {
source,
diff --git a/tests/toml/config.toml b/tests/toml/config.toml
index 8935779..fe9849b 100644
--- a/tests/toml/config.toml
+++ b/tests/toml/config.toml
@@ -97,8 +97,8 @@ headers.Strict-Transport-Security = "max-age=63072000; includeSubDomains; preloa
### URL Redirects
[[advanced.redirects]]
-source = "**/*.{jpg,jpeg}"
-destination = "/images/generic1.png"
+source = "**/{*}.{jpg,jpeg}"
+destination = "http://localhost/$1.$2"
kind = 301
[[advanced.redirects]]
--
libgit2 1.7.2