From 3a47ef6aed372e81fc6defc3b02d19879fe8a0fc Mon Sep 17 00:00:00 2001
From: Jose Quintana <1700322+joseluisq@users.noreply.github.com>
Date: Sun, 9 Jul 2023 11:41:47 +0200
Subject: [PATCH] feat: replacements support for URL Rewrites destination (#235)
* feat: placeholders support for url rewrites destination
example:
```toml
[advanced]
[[advanced.rewrites]]
source = "**/{*}.{png,gif}"
destination = "/assets/$1.$2"
* docs: rewrite destination replacements description [skip ci]
---
Cargo.lock | 22 ++++++++++++++++++----
Cargo.toml | 6 ++++--
docs/content/features/url-rewrites.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------
src/handler.rs | 44 +++++++++++++++++++++++++++++++++++++++-----
src/settings/mod.rs | 21 ++++++++++++++++++++-
tests/toml/config.toml | 4 ++--
6 files changed, 135 insertions(+), 21 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index c2bf5b6..0d95477 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1035,9 +1035,21 @@ dependencies = [
[[package]]
name = "regex"
-version = "1.8.4"
+version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
+checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
+dependencies = [
+ "aho-corasick 1.0.2",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf"
dependencies = [
"aho-corasick 1.0.2",
"memchr",
@@ -1046,9 +1058,9 @@ dependencies = [
[[package]]
name = "regex-syntax"
-version = "0.7.2"
+version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
+checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
[[package]]
name = "ring"
@@ -1285,6 +1297,7 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
name = "static-web-server"
version = "2.19.0"
dependencies = [
+ "aho-corasick 1.0.2",
"anyhow",
"async-compression",
"bcrypt",
@@ -1304,6 +1317,7 @@ dependencies = [
"num_cpus",
"percent-encoding",
"pin-project",
+ "regex",
"rustls-pemfile",
"serde",
"serde_ignored",
diff --git a/Cargo.toml b/Cargo.toml
index 3df089f..f96057b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,10 +52,13 @@ basic-auth = ["bcrypt"]
fallback-page = []
[dependencies]
+aho-corasick = "1.0"
anyhow = "1.0"
async-compression = { version = "0.4", default-features = false, optional = true, features = ["brotli", "deflate", "gzip", "zstd", "tokio"] }
bcrypt = { version = "0.14", optional = true }
bytes = "1.4"
+chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true }
+clap = { version = "4.3", features = ["derive", "env"] }
form_urlencoded = "1.2"
futures-util = { version = "0.3", default-features = false, features = ["sink"] }
globset = { version = "0.4", features = ["serde1"] }
@@ -69,12 +72,11 @@ mime_guess = "2.0"
num_cpus = { version = "1.15" }
percent-encoding = "2.3"
pin-project = "1.1"
+regex = "1.9"
rustls-pemfile = { version = "1.0", optional = true }
serde = { version = "1.0", default-features = false, features = ["derive"] }
serde_ignored = "0.1"
serde_repr = "0.1"
-clap = { version = "4.3", features = ["derive", "env"] }
-chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true }
tokio = { version = "1", default-features = false, features = ["rt-multi-thread", "macros", "fs", "io-util", "signal"] }
tokio-rustls = { version = "0.24", optional = true }
tokio-util = { version = "0.7", default-features = false, features = ["io"] }
diff --git a/docs/content/features/url-rewrites.md b/docs/content/features/url-rewrites.md
index 9e23ef1..f653746 100644
--- a/docs/content/features/url-rewrites.md
+++ b/docs/content/features/url-rewrites.md
@@ -6,24 +6,40 @@ URI rewrites are particularly useful with pattern matching ([globs](https://en.w
## Structure
-The URL rewrite rules should be defined mainly as an [Array of Tables](https://toml.io/en/v1.0.0#array-of-tables).
+URL rewrite rules should be defined mainly as an [Array of Tables](https://toml.io/en/v1.0.0#array-of-tables).
Each table entry should have two key/value pairs:
-- One `source` key containing a string _glob pattern_.
-- One `destination` string containing the local file path.
-- Optional `redirect` number containing the HTTP response code.
+- `source`: a key containing a string _glob pattern_.
+- `destination` a file path with optional replacements (placeholders).
+- `redirect` an optional number containing the HTTP response code (redirection).
!!! info "Note"
The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern.
### Source
-The source is a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file.
+It's a [Glob pattern](https://en.wikipedia.org/wiki/Glob_(programming)) that should match against the URI that is requesting a resource file.
+
+The glob pattern functionality is powered by the [globset](https://docs.rs/globset/latest/globset/) crate which supports Standard Unix-style glob syntax.
+
+!!! tip "Glob pattern syntax"
+ For more details about the Glob pattern syntax check out https://docs.rs/globset/latest/globset/#syntax
### Destination
-A local file path must exist. It has to look something like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.
+The value can be either a local file path that maps to an existing file on the system or an external URL.
+It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.
+
+#### Replacements
+
+Additionally, a `destination` supports replacements for every Glob pattern group that matches against the `source`.
+
+Replacements order start from `0` to `n` and are defined with a dollar sign followed by an index (Glob pattern group occurrence).
+
+!!! tip "Group your Glob patterns"
+ When using replacements, also group your Glob pattern by surrounding them with curly braces so every group should map to its corresponding replacement.
+ For example: `source = "**/{*}.{png,gif}"`
### Redirect
@@ -44,8 +60,37 @@ The values can be:
source = "**/*.{png,ico,gif}"
destination = "/assets/generic1.png"
+# a. Route rewrite example with redirection
[[advanced.rewrites]]
source = "**/*.{jpg,jpeg}"
destination = "/images/generic2.png"
-redirect = 302
+## NOTE: `redirect` can be omitted too
+redirect = 301
+
+# b. Route rewrite example with destination replacements
+[[advanced.rewrites]]
+## Note that we're using curly braces to group the `*` wildcard.
+## See https://docs.rs/globset/latest/globset/#syntax
+source = "**/{*}.{png,gif}"
+## For exmaple, the destination will result in `/assets/abcdef.png`
+destination = "/assets/$1.$2"
+```
+
+If you request something like:
+
+```sh
+curl -I http://localhost/abcdef.png
+```
+
+Then the Server logs should look like this:
+
+```log
+2023-07-08T20:31:36.606035Z INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.png
+2023-07-08T20:31:36.608439Z DEBUG static_web_server::handler: url rewrites glob patterns: ["$0", "$1", "$2"]
+2023-07-08T20:31:36.608491Z DEBUG static_web_server::handler: url rewrites regex equivalent: (?-u:\b)(?:/?|.*/)(.*)\.(gif|png)$
+2023-07-08T20:31:36.608525Z DEBUG static_web_server::handler: url rewrites glob pattern captures: ["abcdef.png", "abcdef", "png"]
+2023-07-08T20:31:36.608561Z DEBUG static_web_server::handler: url rewrites glob pattern destination: "/assets/$1.$2"
+2023-07-08T20:31:36.609655Z DEBUG static_web_server::handler: url rewrites glob patterns destination replaced: "/assets/abcdef.png"
+2023-07-08T20:31:36.609735Z TRACE static_web_server::static_files: dir: base="public", route="assets/abcdef.png"
+...
```
diff --git a/src/handler.rs b/src/handler.rs
index d5abf44..066e1b2 100644
--- a/src/handler.rs
+++ b/src/handler.rs
@@ -99,7 +99,7 @@ impl RequestHandler {
let uri = req.uri();
let base_path = &self.opts.root_dir;
- let mut uri_path = uri.path();
+ let mut uri_path = uri.path().to_owned();
let uri_query = uri.query();
#[cfg(feature = "directory-listing")]
let dir_listing = self.opts.dir_listing;
@@ -205,7 +205,7 @@ impl RequestHandler {
// Advanced options
if let Some(advanced) = &self.opts.advanced_opts {
// Redirects
- if let Some(parts) = redirects::get_redirection(uri_path, &advanced.redirects) {
+ if let Some(parts) = redirects::get_redirection(&uri_path, &advanced.redirects) {
let (uri_dest, status) = parts;
match HeaderValue::from_str(uri_dest) {
Ok(loc) => {
@@ -232,10 +232,42 @@ impl RequestHandler {
}
// Rewrites
- if let Some(rewrite) = rewrites::rewrite_uri_path(uri_path, &advanced.rewrites) {
- uri_path = rewrite.destination.as_str();
+ if let Some(rewrite) =
+ rewrites::rewrite_uri_path(&uri_path.clone(), &advanced.rewrites)
+ {
+ // Rewrites: Handle replacements (placeholders)
+ if let Some(regex_caps) = rewrite.source.captures(&uri_path) {
+ let caps_range = 0..regex_caps.len();
+ let caps = caps_range
+ .clone()
+ .filter_map(|i| regex_caps.get(i).map(|s| s.as_str()))
+ .collect::>();
+
+ let patterns = caps_range
+ .map(|i| format!("${}", i))
+ .collect::>();
+
+ let dest = rewrite.destination.as_str();
+
+ tracing::debug!("url rewrites glob pattern: {:?}", patterns);
+ tracing::debug!("url rewrites regex equivalent: {}", rewrite.source);
+ tracing::debug!("url rewrites glob pattern captures: {:?}", caps);
+ tracing::debug!("url rewrites glob pattern destination: {:?}", dest);
+
+ if let Ok(ac) = aho_corasick::AhoCorasick::new(patterns) {
+ if let Ok(dest) = ac.try_replace_all(dest, &caps) {
+ tracing::debug!(
+ "url rewrites glob pattern destination replaced: {:?}",
+ dest
+ );
+ uri_path = dest;
+ }
+ }
+ }
+
+ // Rewrites: Handle redirections
if let Some(redirect_type) = &rewrite.redirect {
- let loc = match HeaderValue::from_str(uri_path) {
+ let loc = match HeaderValue::from_str(&uri_path) {
Ok(val) => val,
Err(err) => {
tracing::error!("invalid header value from current uri: {:?}", err);
@@ -259,6 +291,8 @@ impl RequestHandler {
}
}
+ let uri_path = &uri_path;
+
// Static files
match static_files::handle(&HandleOpts {
method,
diff --git a/src/settings/mod.rs b/src/settings/mod.rs
index b259bf3..03d786a 100644
--- a/src/settings/mod.rs
+++ b/src/settings/mod.rs
@@ -10,6 +10,7 @@ use clap::Parser;
use globset::{Glob, GlobMatcher};
use headers::HeaderMap;
use hyper::StatusCode;
+use regex::Regex;
use crate::{Context, Result};
@@ -34,7 +35,7 @@ pub struct Headers {
/// The `Rewrites` file options.
pub struct Rewrites {
/// Source pattern glob matcher
- pub source: GlobMatcher,
+ pub source: Regex,
/// A local file that must exist
pub destination: String,
/// Optional redirect type either 301 (Moved Permanently) or 302 (Found).
@@ -328,6 +329,24 @@ impl Settings {
})?
.compile_matcher();
+ // NOTE: we don’t need Unicode-aware word boundary assertions,
+ // therefore we use (?-u:\b) instead of (?-u)
+ // so the former uses an ASCII-only definition of a word character.
+ // https://docs.rs/regex/latest/regex/#unicode-can-impact-memory-usage-and-search-speed
+ let pattern = source.glob().regex().replace("(?-u)^", "(?-u:\\b)");
+ tracing::debug!(
+ "url rewrites glob pattern: {}",
+ &rewrites_entry.source
+ );
+ tracing::debug!("url rewrites regex equivalent: {}", pattern);
+
+ let source = Regex::new(&pattern).with_context(|| {
+ format!(
+ "can not compile regex pattern equivalent for rewrite source: {}",
+ &pattern
+ )
+ })?;
+
rewrites_vec.push(Rewrites {
source,
destination: rewrites_entry.destination.to_owned(),
diff --git a/tests/toml/config.toml b/tests/toml/config.toml
index f551404..8935779 100644
--- a/tests/toml/config.toml
+++ b/tests/toml/config.toml
@@ -109,8 +109,8 @@ kind = 302
### URL Rewrites
[[advanced.rewrites]]
-source = "**/*.{png,gif}"
-destination = "/assets/favicon.ico"
+source = "**/{*}.{png,gif}"
+destination = "/assets/$1.$2"
# redirect = 301
[[advanced.rewrites]]
--
libgit2 1.7.2