index : static-web-server.git

ascending towards madness

author Jose Quintana <1700322+joseluisq@users.noreply.github.com> 2023-07-09 9:41:47.0 +00:00:00
committer GitHub <noreply@github.com> 2023-07-09 9:41:47.0 +00:00:00
commit
3a47ef6aed372e81fc6defc3b02d19879fe8a0fc [patch]
tree
a27e079dc5455a84253fb8ea2d65bf8cd0043ebd
parent
e23a06def06426fc185d1ff4d758deeb2faca743
download
3a47ef6aed372e81fc6defc3b02d19879fe8a0fc.tar.gz

feat: replacements support for URL Rewrites destination (#235)

* feat: placeholders support for url rewrites destination

example:

```toml
[advanced]

[[advanced.rewrites]]
source = "**/{*}.{png,gif}"
destination = "/assets/$1.$2"

* docs: rewrite destination replacements description [skip ci]

Diff

 Cargo.lock                            | 22 ++++++++++---
 Cargo.toml                            |  6 ++--
 docs/content/features/url-rewrites.md | 59 +++++++++++++++++++++++++++++++-----
 src/handler.rs                        | 44 +++++++++++++++++++++++----
 src/settings/mod.rs                   | 21 ++++++++++++-
 tests/toml/config.toml                |  4 +-
 6 files changed, 135 insertions(+), 21 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c2bf5b6..0d95477 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1035,9 +1035,21 @@ dependencies = [

[[package]]
name = "regex"
version = "1.8.4"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
 "aho-corasick 1.0.2",
 "memchr",
 "regex-automata",
 "regex-syntax",
]

[[package]]
name = "regex-automata"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83d3daa6976cffb758ec878f108ba0e062a45b2d6ca3a2cca965338855476caf"
dependencies = [
 "aho-corasick 1.0.2",
 "memchr",
@@ -1046,9 +1058,9 @@ dependencies = [

[[package]]
name = "regex-syntax"
version = "0.7.2"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"

[[package]]
name = "ring"
@@ -1285,6 +1297,7 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
name = "static-web-server"
version = "2.19.0"
dependencies = [
 "aho-corasick 1.0.2",
 "anyhow",
 "async-compression",
 "bcrypt",
@@ -1304,6 +1317,7 @@ dependencies = [
 "num_cpus",
 "percent-encoding",
 "pin-project",
 "regex",
 "rustls-pemfile",
 "serde",
 "serde_ignored",
diff --git a/Cargo.toml b/Cargo.toml
index 3df089f..f96057b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,10 +52,13 @@ basic-auth = ["bcrypt"]
fallback-page = []

[dependencies]
aho-corasick = "1.0"
anyhow = "1.0"
async-compression = { version = "0.4", default-features = false, optional = true, features = ["brotli", "deflate", "gzip", "zstd", "tokio"] }
bcrypt = { version = "0.14", optional = true }
bytes = "1.4"
chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true }
clap = { version = "4.3", features = ["derive", "env"] }
form_urlencoded = "1.2"
futures-util = { version = "0.3", default-features = false, features = ["sink"] }
globset = { version = "0.4", features = ["serde1"] }
@@ -69,12 +72,11 @@ mime_guess = "2.0"
num_cpus = { version = "1.15" }
percent-encoding = "2.3"
pin-project = "1.1"
regex = "1.9"
rustls-pemfile = { version = "1.0", optional = true }
serde = { version = "1.0", default-features = false, features = ["derive"] }
serde_ignored = "0.1"
serde_repr = "0.1"
clap = { version = "4.3", features = ["derive", "env"] }
chrono = { version = "0.4", default-features = false, features = ["std", "clock"], optional = true }
tokio = { version = "1", default-features = false, features = ["rt-multi-thread", "macros", "fs", "io-util", "signal"] }
tokio-rustls = { version = "0.24", optional = true }
tokio-util = { version = "0.7", default-features = false, features = ["io"] }
diff --git a/docs/content/features/url-rewrites.md b/docs/content/features/url-rewrites.md
index 9e23ef1..f653746 100644
--- a/docs/content/features/url-rewrites.md
+++ b/docs/content/features/url-rewrites.md
@@ -6,24 +6,40 @@ URI rewrites are particularly useful with pattern matching ([globs](https://en.w

## Structure

The URL rewrite rules should be defined mainly as an [Array of Tables]https://toml.io/en/v1.0.0#array-of-tables.
URL rewrite rules should be defined mainly as an [Array of Tables]https://toml.io/en/v1.0.0#array-of-tables.

Each table entry should have two key/value pairs:

- One `source` key containing a string _glob pattern_.
- One `destination` string containing the local file path.
- Optional `redirect` number containing the HTTP response code.
- `source`: a key containing a string _glob pattern_.
- `destination` a file path with optional replacements (placeholders).
- `redirect` an optional number containing the HTTP response code (redirection).

!!! info "Note"
    The incoming request(s) will reach the `destination` only if the request(s) URI matches the `source` pattern.

### Source

The source is a [Glob pattern]https://en.wikipedia.org/wiki/Glob_(programming) that should match against the URI that is requesting a resource file.
It's a [Glob pattern]https://en.wikipedia.org/wiki/Glob_(programming) that should match against the URI that is requesting a resource file.

The glob pattern functionality is powered by the [globset]https://docs.rs/globset/latest/globset/ crate which supports Standard Unix-style glob syntax.

!!! tip "Glob pattern syntax"
    For more details about the Glob pattern syntax check out https://docs.rs/globset/latest/globset/#syntax

### Destination

A local file path must exist. It has to look something like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.
The value can be either a local file path that maps to an existing file on the system or an external URL.
It could look like `/some/directory/file.html`. It is worth noting that the `/` at the beginning indicates the server's root directory.

#### Replacements

Additionally, a `destination` supports replacements for every Glob pattern group that matches against the `source`.

Replacements order start from `0` to `n` and are defined with a dollar sign followed by an index (Glob pattern group occurrence).

!!! tip "Group your Glob patterns"
    When using replacements, also group your Glob pattern by surrounding them with curly braces so every group should map to its corresponding replacement.<br>
    For example: `source = "**/{*}.{png,gif}"`

### Redirect

@@ -44,8 +60,37 @@ The values can be:
source = "**/*.{png,ico,gif}"
destination = "/assets/generic1.png"

# a. Route rewrite example with redirection
[[advanced.rewrites]]
source = "**/*.{jpg,jpeg}"
destination = "/images/generic2.png"
redirect = 302
## NOTE: `redirect` can be omitted too
redirect = 301

# b. Route rewrite example with destination replacements
[[advanced.rewrites]]
## Note that we're using curly braces to group the `*` wildcard.
## See https://docs.rs/globset/latest/globset/#syntax
source = "**/{*}.{png,gif}"
## For exmaple, the destination will result in `/assets/abcdef.png`
destination = "/assets/$1.$2"
```

If you request something like:

```sh
curl -I http://localhost/abcdef.png
```

Then the Server logs should look like this:

```log
2023-07-08T20:31:36.606035Z  INFO static_web_server::handler: incoming request: method=HEAD uri=/abcdef.png
2023-07-08T20:31:36.608439Z DEBUG static_web_server::handler: url rewrites glob patterns: ["$0", "$1", "$2"]
2023-07-08T20:31:36.608491Z DEBUG static_web_server::handler: url rewrites regex equivalent: (?-u:\b)(?:/?|.*/)(.*)\.(gif|png)$
2023-07-08T20:31:36.608525Z DEBUG static_web_server::handler: url rewrites glob pattern captures: ["abcdef.png", "abcdef", "png"]
2023-07-08T20:31:36.608561Z DEBUG static_web_server::handler: url rewrites glob pattern destination: "/assets/$1.$2"
2023-07-08T20:31:36.609655Z DEBUG static_web_server::handler: url rewrites glob patterns destination replaced: "/assets/abcdef.png"
2023-07-08T20:31:36.609735Z TRACE static_web_server::static_files: dir: base="public", route="assets/abcdef.png"
...
```
diff --git a/src/handler.rs b/src/handler.rs
index d5abf44..066e1b2 100644
--- a/src/handler.rs
+++ b/src/handler.rs
@@ -99,7 +99,7 @@ impl RequestHandler {
        let uri = req.uri();

        let base_path = &self.opts.root_dir;
        let mut uri_path = uri.path();
        let mut uri_path = uri.path().to_owned();
        let uri_query = uri.query();
        #[cfg(feature = "directory-listing")]
        let dir_listing = self.opts.dir_listing;
@@ -205,7 +205,7 @@ impl RequestHandler {
            // Advanced options
            if let Some(advanced) = &self.opts.advanced_opts {
                // Redirects
                if let Some(parts) = redirects::get_redirection(uri_path, &advanced.redirects) {
                if let Some(parts) = redirects::get_redirection(&uri_path, &advanced.redirects) {
                    let (uri_dest, status) = parts;
                    match HeaderValue::from_str(uri_dest) {
                        Ok(loc) => {
@@ -232,10 +232,42 @@ impl RequestHandler {
                }

                // Rewrites
                if let Some(rewrite) = rewrites::rewrite_uri_path(uri_path, &advanced.rewrites) {
                    uri_path = rewrite.destination.as_str();
                if let Some(rewrite) =
                    rewrites::rewrite_uri_path(&uri_path.clone(), &advanced.rewrites)
                {
                    // Rewrites: Handle replacements (placeholders)
                    if let Some(regex_caps) = rewrite.source.captures(&uri_path) {
                        let caps_range = 0..regex_caps.len();
                        let caps = caps_range
                            .clone()
                            .filter_map(|i| regex_caps.get(i).map(|s| s.as_str()))
                            .collect::<Vec<&str>>();

                        let patterns = caps_range
                            .map(|i| format!("${}", i))
                            .collect::<Vec<String>>();

                        let dest = rewrite.destination.as_str();

                        tracing::debug!("url rewrites glob pattern: {:?}", patterns);
                        tracing::debug!("url rewrites regex equivalent: {}", rewrite.source);
                        tracing::debug!("url rewrites glob pattern captures: {:?}", caps);
                        tracing::debug!("url rewrites glob pattern destination: {:?}", dest);

                        if let Ok(ac) = aho_corasick::AhoCorasick::new(patterns) {
                            if let Ok(dest) = ac.try_replace_all(dest, &caps) {
                                tracing::debug!(
                                    "url rewrites glob pattern destination replaced: {:?}",
                                    dest
                                );
                                uri_path = dest;
                            }
                        }
                    }

                    // Rewrites: Handle redirections
                    if let Some(redirect_type) = &rewrite.redirect {
                        let loc = match HeaderValue::from_str(uri_path) {
                        let loc = match HeaderValue::from_str(&uri_path) {
                            Ok(val) => val,
                            Err(err) => {
                                tracing::error!("invalid header value from current uri: {:?}", err);
@@ -259,6 +291,8 @@ impl RequestHandler {
                }
            }

            let uri_path = &uri_path;

            // Static files
            match static_files::handle(&HandleOpts {
                method,
diff --git a/src/settings/mod.rs b/src/settings/mod.rs
index b259bf3..03d786a 100644
--- a/src/settings/mod.rs
+++ b/src/settings/mod.rs
@@ -10,6 +10,7 @@ use clap::Parser;
use globset::{Glob, GlobMatcher};
use headers::HeaderMap;
use hyper::StatusCode;
use regex::Regex;

use crate::{Context, Result};

@@ -34,7 +35,7 @@ pub struct Headers {
/// The `Rewrites` file options.
pub struct Rewrites {
    /// Source pattern glob matcher
    pub source: GlobMatcher,
    pub source: Regex,
    /// A local file that must exist
    pub destination: String,
    /// Optional redirect type either 301 (Moved Permanently) or 302 (Found).
@@ -328,6 +329,24 @@ impl Settings {
                                    })?
                                    .compile_matcher();

                                // NOTE: we don’t need Unicode-aware word boundary assertions,
                                // therefore we use (?-u:\b) instead of (?-u)
                                // so the former uses an ASCII-only definition of a word character.
                                // https://docs.rs/regex/latest/regex/#unicode-can-impact-memory-usage-and-search-speed
                                let pattern = source.glob().regex().replace("(?-u)^", "(?-u:\\b)");
                                tracing::debug!(
                                    "url rewrites glob pattern: {}",
                                    &rewrites_entry.source
                                );
                                tracing::debug!("url rewrites regex equivalent: {}", pattern);

                                let source = Regex::new(&pattern).with_context(|| {
                                    format!(
                                        "can not compile regex pattern equivalent for rewrite source: {}",
                                        &pattern
                                    )
                                })?;

                                rewrites_vec.push(Rewrites {
                                    source,
                                    destination: rewrites_entry.destination.to_owned(),
diff --git a/tests/toml/config.toml b/tests/toml/config.toml
index f551404..8935779 100644
--- a/tests/toml/config.toml
+++ b/tests/toml/config.toml
@@ -109,8 +109,8 @@ kind = 302
### URL Rewrites

[[advanced.rewrites]]
source = "**/*.{png,gif}"
destination = "/assets/favicon.ico"
source = "**/{*}.{png,gif}"
destination = "/assets/$1.$2"
# redirect = 301

[[advanced.rewrites]]