index : static-web-server.git

ascending towards madness

author Jose Quintana <joseluisquintana20@gmail.com> 2021-05-01 13:50:05.0 +00:00:00
committer Jose Quintana <joseluisquintana20@gmail.com> 2021-05-01 13:50:05.0 +00:00:00
commit
d453567468cb15e2c8b4213c875497ce0d7e53e8 [patch]
tree
9f3a83e1b8e2a2f1c409e6f30fbcd4bac1ef1c9f
parent
04ec1b1fd65383ba787e008670ff065085d820bd
download
d453567468cb15e2c8b4213c875497ce0d7e53e8.tar.gz

refactor: default and custom 404-50x error pages



Diff

 Cargo.lock          |  20 +-
 src/controller.rs   |  38 +----
 src/error_page.rs   |  84 ++++++++++-
 src/fs.rs           | 451 +-----------------------------------------------------
 src/handler.rs      |  13 ++-
 src/lib.rs          |   4 +-
 src/server.rs       |   4 +-
 src/static_files.rs | 452 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 8 files changed, 563 insertions(+), 503 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 79aaa67..7ac142d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -352,9 +352,9 @@ dependencies = [

[[package]]
name = "memchr"
version = "2.3.4"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"

[[package]]
name = "mime"
@@ -539,9 +539,9 @@ dependencies = [

[[package]]
name = "regex"
version = "1.4.6"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759"
checksum = "1efb2352a0f4d4b128f734b5c44c79ff80117351138733f12f982fe3e2b13343"
dependencies = [
 "regex-syntax",
]
@@ -558,9 +558,9 @@ dependencies = [

[[package]]
name = "regex-syntax"
version = "0.6.23"
version = "0.6.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
checksum = "00efb87459ba4f6fb2169d20f68565555688e1250ee6825cdf6254f8b48fafb2"

[[package]]
name = "ryu"
@@ -769,9 +769,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"

[[package]]
name = "tracing"
version = "0.1.25"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f"
checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
dependencies = [
 "cfg-if 1.0.0",
 "pin-project-lite",
@@ -792,9 +792,9 @@ dependencies = [

[[package]]
name = "tracing-core"
version = "0.1.17"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f50de3927f93d202783f4513cda820ab47ef17f624b03c096e86ef00c67e6b5f"
checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052"
dependencies = [
 "lazy_static",
]
diff --git a/src/controller.rs b/src/controller.rs
deleted file mode 100644
index bb44d2f..0000000
--- a/src/controller.rs
+++ /dev/null
@@ -1,38 +0,0 @@
use headers::{AcceptRanges, ContentLength, ContentType, HeaderMapExt};
use hyper::{Body, Request, Response};
use std::path::Path;

use crate::error::Result;
use crate::fs;

/// Main server request entry point.
pub async fn handle(base: &Path, req: Request<Body>) -> Result<Response<Body>> {
    let path = req.uri().path();
    let resp = fs::handle_request(base, req.headers(), path).await;
    match resp {
        Ok(resp) => Ok(resp),
        Err(status) => {
            let method = req.method();
            tracing::warn!(method = ?method, status = status.as_u16(), error = ?status.to_string());

            let mut body = Body::empty();
            let mut len = 0_u64;
            if method == hyper::Method::GET {
                let content = format!(
                    "<html><head><title>{}</title></head><body><center><h1>{}</h1></center></body></html>",
                    status, status
                );
                len = content.len() as u64;
                body = Body::from(content)
            }

            let mut resp = Response::new(body);
            *resp.status_mut() = status;
            resp.headers_mut().typed_insert(ContentLength(len));
            resp.headers_mut().typed_insert(ContentType::html());
            resp.headers_mut().typed_insert(AcceptRanges::bytes());

            Ok(resp)
        }
    }
}
diff --git a/src/error_page.rs b/src/error_page.rs
index db455f2..c9088ed 100644
--- a/src/error_page.rs
+++ b/src/error_page.rs
@@ -1,4 +1,88 @@
use headers::{AcceptRanges, ContentLength, ContentType, HeaderMapExt};
use hyper::{Body, Method, Response, StatusCode};
use once_cell::sync::OnceCell;

use crate::error::Result;

pub static PAGE_404: OnceCell<String> = OnceCell::new();
pub static PAGE_50X: OnceCell<String> = OnceCell::new();

pub fn get_error_response(method: &Method, status_code: &StatusCode) -> Result<Response<Body>> {
    tracing::warn!(method = ?method, status = status_code.as_u16(), error = ?status_code.to_string());

    // Check for 4xx and 50x status codes
    let mut error_page_content = String::new();
    let status_code = match status_code {
        // 4xx
        &StatusCode::BAD_REQUEST
        | &StatusCode::UNAUTHORIZED
        | &StatusCode::PAYMENT_REQUIRED
        | &StatusCode::FORBIDDEN
        | &StatusCode::NOT_FOUND
        | &StatusCode::METHOD_NOT_ALLOWED
        | &StatusCode::NOT_ACCEPTABLE
        | &StatusCode::PROXY_AUTHENTICATION_REQUIRED
        | &StatusCode::REQUEST_TIMEOUT
        | &StatusCode::CONFLICT
        | &StatusCode::GONE
        | &StatusCode::LENGTH_REQUIRED
        | &StatusCode::PRECONDITION_FAILED
        | &StatusCode::PAYLOAD_TOO_LARGE
        | &StatusCode::URI_TOO_LONG
        | &StatusCode::UNSUPPORTED_MEDIA_TYPE
        | &StatusCode::RANGE_NOT_SATISFIABLE
        | &StatusCode::EXPECTATION_FAILED => {
            // Extra check for 404 status code and HTML content
            if status_code == &StatusCode::NOT_FOUND {
                // TODO: handle this potential panic properly
                error_page_content = PAGE_404
                    .get()
                    .expect("PAGE_404 contant is not initialized")
                    .to_string();
            }
            status_code
        }
        // 50x
        &StatusCode::INTERNAL_SERVER_ERROR
        | &StatusCode::NOT_IMPLEMENTED
        | &StatusCode::BAD_GATEWAY
        | &StatusCode::SERVICE_UNAVAILABLE
        | &StatusCode::GATEWAY_TIMEOUT
        | &StatusCode::HTTP_VERSION_NOT_SUPPORTED
        | &StatusCode::VARIANT_ALSO_NEGOTIATES
        | &StatusCode::INSUFFICIENT_STORAGE
        | &StatusCode::LOOP_DETECTED => {
            // HTML content for error status codes 50x
            // TODO: handle this potential panic properly
            error_page_content = PAGE_50X
                .get()
                .expect("PAGE_50X contant is not initialized")
                .to_string();
            status_code
        }
        // other status codes
        _ => status_code,
    };

    if error_page_content.is_empty() {
        error_page_content = format!(
            "<html><head><title>{}</title></head><body><center><h1>{}</h1></center></body></html>",
            status_code, status_code
        );
    }

    let mut body = Body::empty();
    let len = error_page_content.len() as u64;

    if method == Method::GET {
        body = Body::from(error_page_content)
    }

    let mut resp = Response::new(body);
    *resp.status_mut() = *status_code;
    resp.headers_mut().typed_insert(ContentLength(len));
    resp.headers_mut().typed_insert(ContentType::html());
    resp.headers_mut().typed_insert(AcceptRanges::bytes());

    Ok(resp)
}
diff --git a/src/fs.rs b/src/fs.rs
deleted file mode 100644
index c0b3cc5..0000000
--- a/src/fs.rs
+++ /dev/null
@@ -1,451 +0,0 @@
// Most of the this file is borrowed from https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs

use bytes::{Bytes, BytesMut};
use futures::future::Either;
use futures::{future, ready, stream, FutureExt, Stream, StreamExt, TryFutureExt};
use headers::{
    AcceptRanges, ContentLength, ContentRange, ContentType, HeaderMap, HeaderMapExt, HeaderValue,
    IfModifiedSince, IfRange, IfUnmodifiedSince, LastModified, Range,
};
use hyper::{Body, Response, StatusCode};
use percent_encoding::percent_decode_str;
use std::fs::Metadata;
use std::future::Future;
use std::io;
use std::ops::Bound;
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use std::task::Poll;
use std::{cmp, path::Path};
use tokio::fs::File as TkFile;
use tokio::io::AsyncSeekExt;
use tokio_util::io::poll_read_buf;

/// A small Arch `PathBuf` wrapper since Arc<PathBuf> doesn't implement AsRef<Path>.
#[derive(Clone, Debug)]
pub struct ArcPath(pub Arc<PathBuf>);

impl AsRef<Path> for ArcPath {
    fn as_ref(&self) -> &Path {
        (*self.0).as_ref()
    }
}

/// Entry point to handle web server requests which map to specific files
/// on file system and return a file response.
pub async fn handle_request(
    base: &Path,
    headers: &HeaderMap<HeaderValue>,
    path: &str,
) -> Result<Response<Body>, StatusCode> {
    let base = Arc::new(base.into());
    let res = path_from_tail(base, path).await?;
    file_reply(headers, res).await
}

fn path_from_tail(
    base: Arc<PathBuf>,
    tail: &str,
) -> impl Future<Output = Result<(ArcPath, Metadata, bool), StatusCode>> + Send {
    future::ready(sanitize_path(base.as_ref(), tail)).and_then(|mut buf| async {
        match tokio::fs::metadata(&buf).await {
            Ok(meta) => {
                let mut auto_index = false;
                if meta.is_dir() {
                    tracing::debug!("dir: appending index.html to directory path");
                    buf.push("index.html");
                    auto_index = true;
                }
                tracing::trace!("dir: {:?}", buf);
                Ok((ArcPath(Arc::new(buf)), meta, auto_index))
            }
            Err(err) => {
                tracing::debug!("file not found: {:?}", err);
                Err(StatusCode::NOT_FOUND)
            }
        }
    })
}

/// Reply with a file content.
fn file_reply(
    headers: &HeaderMap<HeaderValue>,
    res: (ArcPath, Metadata, bool),
) -> impl Future<Output = Result<Response<Body>, StatusCode>> + Send {
    // TODO: directory listing

    let (path, meta, auto_index) = res;
    let conditionals = get_conditional_headers(headers);
    TkFile::open(path.clone()).then(move |res| match res {
        Ok(f) => Either::Left(file_conditional(f, path, meta, auto_index, conditionals)),
        Err(err) => {
            let status = match err.kind() {
                io::ErrorKind::NotFound => {
                    tracing::debug!("file not found: {:?}", path.as_ref().display());
                    StatusCode::NOT_FOUND
                }
                io::ErrorKind::PermissionDenied => {
                    tracing::warn!("file permission denied: {:?}", path.as_ref().display());
                    StatusCode::FORBIDDEN
                }
                _ => {
                    tracing::error!(
                        "file open error (path={:?}): {} ",
                        path.as_ref().display(),
                        err
                    );
                    StatusCode::INTERNAL_SERVER_ERROR
                }
            };
            Either::Right(future::err(status))
        }
    })
}

fn get_conditional_headers(header_list: &HeaderMap<HeaderValue>) -> Conditionals {
    let if_modified_since = header_list.typed_get::<IfModifiedSince>();
    let if_unmodified_since = header_list.typed_get::<IfUnmodifiedSince>();
    let if_range = header_list.typed_get::<IfRange>();
    let range = header_list.typed_get::<Range>();

    Conditionals {
        if_modified_since,
        if_unmodified_since,
        if_range,
        range,
    }
}

fn sanitize_path(base: impl AsRef<Path>, tail: &str) -> Result<PathBuf, StatusCode> {
    let mut buf = PathBuf::from(base.as_ref());
    let p = match percent_decode_str(tail).decode_utf8() {
        Ok(p) => p,
        Err(err) => {
            tracing::debug!("dir: failed to decode route={:?}: {:?}", tail, err);
            return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE);
        }
    };
    tracing::trace!("dir? base={:?}, route={:?}", base.as_ref(), p);
    for seg in p.split('/') {
        if seg.starts_with("..") {
            tracing::warn!("dir: rejecting segment starting with '..'");
            return Err(StatusCode::NOT_FOUND);
        } else if seg.contains('\\') {
            tracing::warn!("dir: rejecting segment containing with backslash (\\)");
            return Err(StatusCode::NOT_FOUND);
        } else {
            buf.push(seg);
        }
    }
    Ok(buf)
}

#[derive(Debug)]
struct Conditionals {
    if_modified_since: Option<IfModifiedSince>,
    if_unmodified_since: Option<IfUnmodifiedSince>,
    if_range: Option<IfRange>,
    range: Option<Range>,
}

enum Cond {
    NoBody(Response<Body>),
    WithBody(Option<Range>),
}

impl Conditionals {
    fn check(self, last_modified: Option<LastModified>) -> Cond {
        if let Some(since) = self.if_unmodified_since {
            let precondition = last_modified
                .map(|time| since.precondition_passes(time.into()))
                .unwrap_or(false);

            tracing::trace!(
                "if-unmodified-since? {:?} vs {:?} = {}",
                since,
                last_modified,
                precondition
            );
            if !precondition {
                let mut res = Response::new(Body::empty());
                *res.status_mut() = StatusCode::PRECONDITION_FAILED;
                return Cond::NoBody(res);
            }
        }

        if let Some(since) = self.if_modified_since {
            tracing::trace!(
                "if-modified-since? header = {:?}, file = {:?}",
                since,
                last_modified
            );
            let unmodified = last_modified
                .map(|time| !since.is_modified(time.into()))
                // no last_modified means its always modified
                .unwrap_or(false);
            if unmodified {
                let mut res = Response::new(Body::empty());
                *res.status_mut() = StatusCode::NOT_MODIFIED;
                return Cond::NoBody(res);
            }
        }

        if let Some(if_range) = self.if_range {
            tracing::trace!("if-range? {:?} vs {:?}", if_range, last_modified);
            let can_range = !if_range.is_modified(None, last_modified.as_ref());
            if !can_range {
                return Cond::WithBody(None);
            }
        }

        Cond::WithBody(self.range)
    }
}

fn file_conditional(
    f: TkFile,
    path: ArcPath,
    meta: Metadata,
    auto_index: bool,
    conditionals: Conditionals,
) -> impl Future<Output = Result<Response<Body>, StatusCode>> + Send {
    file_metadata(f, meta, auto_index)
        .map_ok(|(file, meta)| response_body(file, meta, path, conditionals))
}

async fn file_metadata(
    f: TkFile,
    meta: Metadata,
    auto_index: bool,
) -> Result<(TkFile, Metadata), StatusCode> {
    if !auto_index {
        return Ok((f, meta));
    }
    match f.metadata().await {
        Ok(meta) => Ok((f, meta)),
        Err(err) => {
            tracing::debug!("file metadata error: {}", err);
            Err(StatusCode::INTERNAL_SERVER_ERROR)
        }
    }
}

fn response_body(
    file: TkFile,
    meta: Metadata,
    path: ArcPath,
    conditionals: Conditionals,
) -> Response<Body> {
    let mut len = meta.len();
    let modified = meta.modified().ok().map(LastModified::from);
    match conditionals.check(modified) {
        Cond::NoBody(resp) => resp,
        Cond::WithBody(range) => {
            bytes_range(range, len)
                .map(|(start, end)| {
                    let sub_len = end - start;
                    let buf_size = optimal_buf_size(&meta);
                    let stream = file_stream(file, buf_size, (start, end));
                    let body = Body::wrap_stream(stream);

                    let mut resp = Response::new(body);

                    if sub_len != len {
                        *resp.status_mut() = StatusCode::PARTIAL_CONTENT;
                        resp.headers_mut().typed_insert(
                            ContentRange::bytes(start..end, len).expect("valid ContentRange"),
                        );

                        len = sub_len;
                    }

                    let mime = mime_guess::from_path(path.as_ref()).first_or_octet_stream();

                    resp.headers_mut().typed_insert(ContentLength(len));
                    resp.headers_mut().typed_insert(ContentType::from(mime));
                    resp.headers_mut().typed_insert(AcceptRanges::bytes());

                    if let Some(last_modified) = modified {
                        resp.headers_mut().typed_insert(last_modified);
                    }

                    resp
                })
                .unwrap_or_else(|BadRange| {
                    // bad byte range
                    let mut resp = Response::new(Body::empty());
                    *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE;
                    resp.headers_mut()
                        .typed_insert(ContentRange::unsatisfied_bytes(len));
                    resp
                })
        }
    }
}

struct BadRange;

fn bytes_range(range: Option<Range>, max_len: u64) -> Result<(u64, u64), BadRange> {
    let range = if let Some(range) = range {
        range
    } else {
        return Ok((0, max_len));
    };

    let ret = range
        .iter()
        .map(|(start, end)| {
            let start = match start {
                Bound::Unbounded => 0,
                Bound::Included(s) => s,
                Bound::Excluded(s) => s + 1,
            };

            let end = match end {
                Bound::Unbounded => max_len,
                Bound::Included(s) => {
                    // For the special case where s == the file size
                    if s == max_len {
                        s
                    } else {
                        s + 1
                    }
                }
                Bound::Excluded(s) => s,
            };

            if start < end && end <= max_len {
                Ok((start, end))
            } else {
                tracing::trace!("unsatisfiable byte range: {}-{}/{}", start, end, max_len);
                Err(BadRange)
            }
        })
        .next()
        .unwrap_or(Ok((0, max_len)));
    ret
}

fn file_stream(
    mut file: TkFile,
    buf_size: usize,
    (start, end): (u64, u64),
) -> impl Stream<Item = Result<Bytes, io::Error>> + Send {
    let seek = async move {
        if start != 0 {
            file.seek(io::SeekFrom::Start(start)).await?;
        }
        Ok(file)
    };

    seek.into_stream()
        .map(move |result| {
            let mut buf = BytesMut::new();
            let mut len = end - start;
            let mut f = match result {
                Ok(f) => f,
                Err(f) => return Either::Left(stream::once(future::err(f))),
            };

            Either::Right(stream::poll_fn(move |cx| {
                if len == 0 {
                    return Poll::Ready(None);
                }
                reserve_at_least(&mut buf, buf_size);

                let n = match ready!(poll_read_buf(Pin::new(&mut f), cx, &mut buf)) {
                    Ok(n) => n as u64,
                    Err(err) => {
                        tracing::debug!("file read error: {}", err);
                        return Poll::Ready(Some(Err(err)));
                    }
                };

                if n == 0 {
                    tracing::debug!("file read found EOF before expected length");
                    return Poll::Ready(None);
                }

                let mut chunk = buf.split().freeze();
                if n > len {
                    chunk = chunk.split_to(len as usize);
                    len = 0;
                } else {
                    len -= n;
                }

                Poll::Ready(Some(Ok(chunk)))
            }))
        })
        .flatten()
}

fn reserve_at_least(buf: &mut BytesMut, cap: usize) {
    if buf.capacity() - buf.len() < cap {
        buf.reserve(cap);
    }
}

const DEFAULT_READ_BUF_SIZE: usize = 8_192;

fn optimal_buf_size(metadata: &Metadata) -> usize {
    let block_size = get_block_size(metadata);

    // If file length is smaller than block size, don't waste space
    // reserving a bigger-than-needed buffer.
    cmp::min(block_size as u64, metadata.len()) as usize
}

#[cfg(unix)]
fn get_block_size(metadata: &Metadata) -> usize {
    use std::os::unix::fs::MetadataExt;
    //TODO: blksize() returns u64, should handle bad cast...
    //(really, a block size bigger than 4gb?)

    // Use device blocksize unless it's really small.
    cmp::max(metadata.blksize() as usize, DEFAULT_READ_BUF_SIZE)
}

#[cfg(not(unix))]
fn get_block_size(_metadata: &Metadata) -> usize {
    DEFAULT_READ_BUF_SIZE
}

#[cfg(test)]
mod tests {
    use super::sanitize_path;
    use bytes::BytesMut;

    #[test]
    fn test_sanitize_path() {
        let base = "/var/www";

        fn p(s: &str) -> &::std::path::Path {
            s.as_ref()
        }

        assert_eq!(
            sanitize_path(base, "/foo.html").unwrap(),
            p("/var/www/foo.html")
        );

        // bad paths
        sanitize_path(base, "/../foo.html").expect_err("dot dot");

        sanitize_path(base, "/C:\\/foo.html").expect_err("C:\\");
    }

    #[test]
    fn test_reserve_at_least() {
        let mut buf = BytesMut::new();
        let cap = 8_192;

        assert_eq!(buf.len(), 0);
        assert_eq!(buf.capacity(), 0);

        super::reserve_at_least(&mut buf, cap);
        assert_eq!(buf.len(), 0);
        assert_eq!(buf.capacity(), cap);
    }
}
diff --git a/src/handler.rs b/src/handler.rs
new file mode 100644
index 0000000..5dfe141
--- /dev/null
+++ b/src/handler.rs
@@ -0,0 +1,13 @@
use hyper::{Body, Request, Response};
use std::path::Path;

use crate::static_files;
use crate::{error::Result, error_page};

/// Main server request handler.
pub async fn handle_request(base: &Path, req: Request<Body>) -> Result<Response<Body>> {
    match static_files::handle_request(base, req.headers(), req.uri().path()).await {
        Ok(resp) => Ok(resp),
        Err(status) => error_page::get_error_response(req.method(), &status),
    }
}
diff --git a/src/lib.rs b/src/lib.rs
index 21caaec..add858b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,13 +4,13 @@
extern crate anyhow;

pub mod config;
pub mod controller;
pub mod error_page;
pub mod fs;
pub mod handler;
pub mod helpers;
pub mod logger;
pub mod server;
pub mod signals;
pub mod static_files;

#[macro_use]
pub mod error;
diff --git a/src/server.rs b/src/server.rs
index bdb9eb8..0fa9876 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -8,8 +8,8 @@ use crate::{
    config::{Config, CONFIG},
    error_page,
};
use crate::{controller::handle, fs::ArcPath};
use crate::{error, helpers, logger, Result};
use crate::{handler, static_files::ArcPath};

/// Define a multi-thread HTTP or HTTP/2 web server.
pub struct Server {
@@ -88,7 +88,7 @@ impl Server {
                async move {
                    Ok::<_, error::Error>(service_fn(move |req| {
                        let root_dir = root_dir.clone();
                        async move { handle(root_dir.as_ref(), req).await }
                        async move { handler::handle_request(root_dir.as_ref(), req).await }
                    }))
                }
            });
diff --git a/src/static_files.rs b/src/static_files.rs
new file mode 100644
index 0000000..bf38060
--- /dev/null
+++ b/src/static_files.rs
@@ -0,0 +1,452 @@
// Static File handler
// -> Most of the file is borrowed from https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs

use bytes::{Bytes, BytesMut};
use futures::future::Either;
use futures::{future, ready, stream, FutureExt, Stream, StreamExt, TryFutureExt};
use headers::{
    AcceptRanges, ContentLength, ContentRange, ContentType, HeaderMap, HeaderMapExt, HeaderValue,
    IfModifiedSince, IfRange, IfUnmodifiedSince, LastModified, Range,
};
use hyper::{Body, Response, StatusCode};
use percent_encoding::percent_decode_str;
use std::fs::Metadata;
use std::future::Future;
use std::io;
use std::ops::Bound;
use std::path::PathBuf;
use std::pin::Pin;
use std::sync::Arc;
use std::task::Poll;
use std::{cmp, path::Path};
use tokio::fs::File as TkFile;
use tokio::io::AsyncSeekExt;
use tokio_util::io::poll_read_buf;

/// A small Arch `PathBuf` wrapper since Arc<PathBuf> doesn't implement AsRef<Path>.
#[derive(Clone, Debug)]
pub struct ArcPath(pub Arc<PathBuf>);

impl AsRef<Path> for ArcPath {
    fn as_ref(&self) -> &Path {
        (*self.0).as_ref()
    }
}

/// Entry point to handle web server requests which map to specific files
/// on file system and return a file response.
pub async fn handle_request(
    base: &Path,
    headers: &HeaderMap<HeaderValue>,
    uri_path: &str,
) -> Result<Response<Body>, StatusCode> {
    let base = Arc::new(base.into());
    let res = path_from_tail(base, uri_path).await?;
    file_reply(headers, res).await
}

fn path_from_tail(
    base: Arc<PathBuf>,
    tail: &str,
) -> impl Future<Output = Result<(ArcPath, Metadata, bool), StatusCode>> + Send {
    future::ready(sanitize_path(base.as_ref(), tail)).and_then(|mut buf| async {
        match tokio::fs::metadata(&buf).await {
            Ok(meta) => {
                let mut auto_index = false;
                if meta.is_dir() {
                    tracing::debug!("dir: appending index.html to directory path");
                    buf.push("index.html");
                    auto_index = true;
                }
                tracing::trace!("dir: {:?}", buf);
                Ok((ArcPath(Arc::new(buf)), meta, auto_index))
            }
            Err(err) => {
                tracing::debug!("file not found: {:?}", err);
                Err(StatusCode::NOT_FOUND)
            }
        }
    })
}

/// Reply with a file content.
fn file_reply(
    headers: &HeaderMap<HeaderValue>,
    res: (ArcPath, Metadata, bool),
) -> impl Future<Output = Result<Response<Body>, StatusCode>> + Send {
    // TODO: directory listing

    let (path, meta, auto_index) = res;
    let conditionals = get_conditional_headers(headers);
    TkFile::open(path.clone()).then(move |res| match res {
        Ok(f) => Either::Left(file_conditional(f, path, meta, auto_index, conditionals)),
        Err(err) => {
            let status = match err.kind() {
                io::ErrorKind::NotFound => {
                    tracing::debug!("file not found: {:?}", path.as_ref().display());
                    StatusCode::NOT_FOUND
                }
                io::ErrorKind::PermissionDenied => {
                    tracing::warn!("file permission denied: {:?}", path.as_ref().display());
                    StatusCode::FORBIDDEN
                }
                _ => {
                    tracing::error!(
                        "file open error (path={:?}): {} ",
                        path.as_ref().display(),
                        err
                    );
                    StatusCode::INTERNAL_SERVER_ERROR
                }
            };
            Either::Right(future::err(status))
        }
    })
}

fn get_conditional_headers(header_list: &HeaderMap<HeaderValue>) -> Conditionals {
    let if_modified_since = header_list.typed_get::<IfModifiedSince>();
    let if_unmodified_since = header_list.typed_get::<IfUnmodifiedSince>();
    let if_range = header_list.typed_get::<IfRange>();
    let range = header_list.typed_get::<Range>();

    Conditionals {
        if_modified_since,
        if_unmodified_since,
        if_range,
        range,
    }
}

fn sanitize_path(base: impl AsRef<Path>, tail: &str) -> Result<PathBuf, StatusCode> {
    let mut buf = PathBuf::from(base.as_ref());
    let p = match percent_decode_str(tail).decode_utf8() {
        Ok(p) => p,
        Err(err) => {
            tracing::debug!("dir: failed to decode route={:?}: {:?}", tail, err);
            return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE);
        }
    };
    tracing::trace!("dir? base={:?}, route={:?}", base.as_ref(), p);
    for seg in p.split('/') {
        if seg.starts_with("..") {
            tracing::warn!("dir: rejecting segment starting with '..'");
            return Err(StatusCode::NOT_FOUND);
        } else if seg.contains('\\') {
            tracing::warn!("dir: rejecting segment containing with backslash (\\)");
            return Err(StatusCode::NOT_FOUND);
        } else {
            buf.push(seg);
        }
    }
    Ok(buf)
}

#[derive(Debug)]
struct Conditionals {
    if_modified_since: Option<IfModifiedSince>,
    if_unmodified_since: Option<IfUnmodifiedSince>,
    if_range: Option<IfRange>,
    range: Option<Range>,
}

enum Cond {
    NoBody(Response<Body>),
    WithBody(Option<Range>),
}

impl Conditionals {
    fn check(self, last_modified: Option<LastModified>) -> Cond {
        if let Some(since) = self.if_unmodified_since {
            let precondition = last_modified
                .map(|time| since.precondition_passes(time.into()))
                .unwrap_or(false);

            tracing::trace!(
                "if-unmodified-since? {:?} vs {:?} = {}",
                since,
                last_modified,
                precondition
            );
            if !precondition {
                let mut res = Response::new(Body::empty());
                *res.status_mut() = StatusCode::PRECONDITION_FAILED;
                return Cond::NoBody(res);
            }
        }

        if let Some(since) = self.if_modified_since {
            tracing::trace!(
                "if-modified-since? header = {:?}, file = {:?}",
                since,
                last_modified
            );
            let unmodified = last_modified
                .map(|time| !since.is_modified(time.into()))
                // no last_modified means its always modified
                .unwrap_or(false);
            if unmodified {
                let mut res = Response::new(Body::empty());
                *res.status_mut() = StatusCode::NOT_MODIFIED;
                return Cond::NoBody(res);
            }
        }

        if let Some(if_range) = self.if_range {
            tracing::trace!("if-range? {:?} vs {:?}", if_range, last_modified);
            let can_range = !if_range.is_modified(None, last_modified.as_ref());
            if !can_range {
                return Cond::WithBody(None);
            }
        }

        Cond::WithBody(self.range)
    }
}

fn file_conditional(
    f: TkFile,
    path: ArcPath,
    meta: Metadata,
    auto_index: bool,
    conditionals: Conditionals,
) -> impl Future<Output = Result<Response<Body>, StatusCode>> + Send {
    file_metadata(f, meta, auto_index)
        .map_ok(|(file, meta)| response_body(file, meta, path, conditionals))
}

async fn file_metadata(
    f: TkFile,
    meta: Metadata,
    auto_index: bool,
) -> Result<(TkFile, Metadata), StatusCode> {
    if !auto_index {
        return Ok((f, meta));
    }
    match f.metadata().await {
        Ok(meta) => Ok((f, meta)),
        Err(err) => {
            tracing::debug!("file metadata error: {}", err);
            Err(StatusCode::INTERNAL_SERVER_ERROR)
        }
    }
}

fn response_body(
    file: TkFile,
    meta: Metadata,
    path: ArcPath,
    conditionals: Conditionals,
) -> Response<Body> {
    let mut len = meta.len();
    let modified = meta.modified().ok().map(LastModified::from);
    match conditionals.check(modified) {
        Cond::NoBody(resp) => resp,
        Cond::WithBody(range) => {
            bytes_range(range, len)
                .map(|(start, end)| {
                    let sub_len = end - start;
                    let buf_size = optimal_buf_size(&meta);
                    let stream = file_stream(file, buf_size, (start, end));
                    let body = Body::wrap_stream(stream);

                    let mut resp = Response::new(body);

                    if sub_len != len {
                        *resp.status_mut() = StatusCode::PARTIAL_CONTENT;
                        resp.headers_mut().typed_insert(
                            ContentRange::bytes(start..end, len).expect("valid ContentRange"),
                        );

                        len = sub_len;
                    }

                    let mime = mime_guess::from_path(path.as_ref()).first_or_octet_stream();

                    resp.headers_mut().typed_insert(ContentLength(len));
                    resp.headers_mut().typed_insert(ContentType::from(mime));
                    resp.headers_mut().typed_insert(AcceptRanges::bytes());

                    if let Some(last_modified) = modified {
                        resp.headers_mut().typed_insert(last_modified);
                    }

                    resp
                })
                .unwrap_or_else(|BadRange| {
                    // bad byte range
                    let mut resp = Response::new(Body::empty());
                    *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE;
                    resp.headers_mut()
                        .typed_insert(ContentRange::unsatisfied_bytes(len));
                    resp
                })
        }
    }
}

struct BadRange;

fn bytes_range(range: Option<Range>, max_len: u64) -> Result<(u64, u64), BadRange> {
    let range = if let Some(range) = range {
        range
    } else {
        return Ok((0, max_len));
    };

    let ret = range
        .iter()
        .map(|(start, end)| {
            let start = match start {
                Bound::Unbounded => 0,
                Bound::Included(s) => s,
                Bound::Excluded(s) => s + 1,
            };

            let end = match end {
                Bound::Unbounded => max_len,
                Bound::Included(s) => {
                    // For the special case where s == the file size
                    if s == max_len {
                        s
                    } else {
                        s + 1
                    }
                }
                Bound::Excluded(s) => s,
            };

            if start < end && end <= max_len {
                Ok((start, end))
            } else {
                tracing::trace!("unsatisfiable byte range: {}-{}/{}", start, end, max_len);
                Err(BadRange)
            }
        })
        .next()
        .unwrap_or(Ok((0, max_len)));
    ret
}

fn file_stream(
    mut file: TkFile,
    buf_size: usize,
    (start, end): (u64, u64),
) -> impl Stream<Item = Result<Bytes, io::Error>> + Send {
    let seek = async move {
        if start != 0 {
            file.seek(io::SeekFrom::Start(start)).await?;
        }
        Ok(file)
    };

    seek.into_stream()
        .map(move |result| {
            let mut buf = BytesMut::new();
            let mut len = end - start;
            let mut f = match result {
                Ok(f) => f,
                Err(f) => return Either::Left(stream::once(future::err(f))),
            };

            Either::Right(stream::poll_fn(move |cx| {
                if len == 0 {
                    return Poll::Ready(None);
                }
                reserve_at_least(&mut buf, buf_size);

                let n = match ready!(poll_read_buf(Pin::new(&mut f), cx, &mut buf)) {
                    Ok(n) => n as u64,
                    Err(err) => {
                        tracing::debug!("file read error: {}", err);
                        return Poll::Ready(Some(Err(err)));
                    }
                };

                if n == 0 {
                    tracing::debug!("file read found EOF before expected length");
                    return Poll::Ready(None);
                }

                let mut chunk = buf.split().freeze();
                if n > len {
                    chunk = chunk.split_to(len as usize);
                    len = 0;
                } else {
                    len -= n;
                }

                Poll::Ready(Some(Ok(chunk)))
            }))
        })
        .flatten()
}

fn reserve_at_least(buf: &mut BytesMut, cap: usize) {
    if buf.capacity() - buf.len() < cap {
        buf.reserve(cap);
    }
}

const DEFAULT_READ_BUF_SIZE: usize = 8_192;

fn optimal_buf_size(metadata: &Metadata) -> usize {
    let block_size = get_block_size(metadata);

    // If file length is smaller than block size, don't waste space
    // reserving a bigger-than-needed buffer.
    cmp::min(block_size as u64, metadata.len()) as usize
}

#[cfg(unix)]
fn get_block_size(metadata: &Metadata) -> usize {
    use std::os::unix::fs::MetadataExt;
    //TODO: blksize() returns u64, should handle bad cast...
    //(really, a block size bigger than 4gb?)

    // Use device blocksize unless it's really small.
    cmp::max(metadata.blksize() as usize, DEFAULT_READ_BUF_SIZE)
}

#[cfg(not(unix))]
fn get_block_size(_metadata: &Metadata) -> usize {
    DEFAULT_READ_BUF_SIZE
}

#[cfg(test)]
mod tests {
    use super::sanitize_path;
    use bytes::BytesMut;

    #[test]
    fn test_sanitize_path() {
        let base = "/var/www";

        fn p(s: &str) -> &::std::path::Path {
            s.as_ref()
        }

        assert_eq!(
            sanitize_path(base, "/foo.html").unwrap(),
            p("/var/www/foo.html")
        );

        // bad paths
        sanitize_path(base, "/../foo.html").expect_err("dot dot");

        sanitize_path(base, "/C:\\/foo.html").expect_err("C:\\");
    }

    #[test]
    fn test_reserve_at_least() {
        let mut buf = BytesMut::new();
        let cap = 8_192;

        assert_eq!(buf.len(), 0);
        assert_eq!(buf.capacity(), 0);

        super::reserve_at_least(&mut buf, cap);
        assert_eq!(buf.len(), 0);
        assert_eq!(buf.capacity(), cap);
    }
}