From d453567468cb15e2c8b4213c875497ce0d7e53e8 Mon Sep 17 00:00:00 2001 From: Jose Quintana Date: Sat, 1 May 2021 15:50:05 +0200 Subject: [PATCH] refactor: default and custom 404-50x error pages --- Cargo.lock | 20 ++++++++++---------- src/controller.rs | 38 -------------------------------------- src/error_page.rs | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/fs.rs | 451 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/handler.rs | 13 +++++++++++++ src/lib.rs | 4 ++-- src/server.rs | 4 ++-- src/static_files.rs | 452 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 563 insertions(+), 503 deletions(-) delete mode 100644 src/controller.rs delete mode 100644 src/fs.rs create mode 100644 src/handler.rs create mode 100644 src/static_files.rs diff --git a/Cargo.lock b/Cargo.lock index 79aaa67..7ac142d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -352,9 +352,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "mime" @@ -539,9 +539,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.4.6" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" +checksum = "1efb2352a0f4d4b128f734b5c44c79ff80117351138733f12f982fe3e2b13343" dependencies = [ "regex-syntax", ] @@ -558,9 +558,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.23" +version = "0.6.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" +checksum = "00efb87459ba4f6fb2169d20f68565555688e1250ee6825cdf6254f8b48fafb2" [[package]] name = "ryu" @@ -769,9 +769,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" [[package]] name = "tracing" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ebdc2bb4498ab1ab5f5b73c5803825e60199229ccba0698170e3be0e7f959f" +checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d" dependencies = [ "cfg-if 1.0.0", "pin-project-lite", @@ -792,9 +792,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f50de3927f93d202783f4513cda820ab47ef17f624b03c096e86ef00c67e6b5f" +checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052" dependencies = [ "lazy_static", ] diff --git a/src/controller.rs b/src/controller.rs deleted file mode 100644 index bb44d2f..0000000 --- a/src/controller.rs +++ /dev/null @@ -1,38 +0,0 @@ -use headers::{AcceptRanges, ContentLength, ContentType, HeaderMapExt}; -use hyper::{Body, Request, Response}; -use std::path::Path; - -use crate::error::Result; -use crate::fs; - -/// Main server request entry point. -pub async fn handle(base: &Path, req: Request) -> Result> { - let path = req.uri().path(); - let resp = fs::handle_request(base, req.headers(), path).await; - match resp { - Ok(resp) => Ok(resp), - Err(status) => { - let method = req.method(); - tracing::warn!(method = ?method, status = status.as_u16(), error = ?status.to_string()); - - let mut body = Body::empty(); - let mut len = 0_u64; - if method == hyper::Method::GET { - let content = format!( - "{}

{}

", - status, status - ); - len = content.len() as u64; - body = Body::from(content) - } - - let mut resp = Response::new(body); - *resp.status_mut() = status; - resp.headers_mut().typed_insert(ContentLength(len)); - resp.headers_mut().typed_insert(ContentType::html()); - resp.headers_mut().typed_insert(AcceptRanges::bytes()); - - Ok(resp) - } - } -} diff --git a/src/error_page.rs b/src/error_page.rs index db455f2..c9088ed 100644 --- a/src/error_page.rs +++ b/src/error_page.rs @@ -1,4 +1,88 @@ +use headers::{AcceptRanges, ContentLength, ContentType, HeaderMapExt}; +use hyper::{Body, Method, Response, StatusCode}; use once_cell::sync::OnceCell; +use crate::error::Result; + pub static PAGE_404: OnceCell = OnceCell::new(); pub static PAGE_50X: OnceCell = OnceCell::new(); + +pub fn get_error_response(method: &Method, status_code: &StatusCode) -> Result> { + tracing::warn!(method = ?method, status = status_code.as_u16(), error = ?status_code.to_string()); + + // Check for 4xx and 50x status codes + let mut error_page_content = String::new(); + let status_code = match status_code { + // 4xx + &StatusCode::BAD_REQUEST + | &StatusCode::UNAUTHORIZED + | &StatusCode::PAYMENT_REQUIRED + | &StatusCode::FORBIDDEN + | &StatusCode::NOT_FOUND + | &StatusCode::METHOD_NOT_ALLOWED + | &StatusCode::NOT_ACCEPTABLE + | &StatusCode::PROXY_AUTHENTICATION_REQUIRED + | &StatusCode::REQUEST_TIMEOUT + | &StatusCode::CONFLICT + | &StatusCode::GONE + | &StatusCode::LENGTH_REQUIRED + | &StatusCode::PRECONDITION_FAILED + | &StatusCode::PAYLOAD_TOO_LARGE + | &StatusCode::URI_TOO_LONG + | &StatusCode::UNSUPPORTED_MEDIA_TYPE + | &StatusCode::RANGE_NOT_SATISFIABLE + | &StatusCode::EXPECTATION_FAILED => { + // Extra check for 404 status code and HTML content + if status_code == &StatusCode::NOT_FOUND { + // TODO: handle this potential panic properly + error_page_content = PAGE_404 + .get() + .expect("PAGE_404 contant is not initialized") + .to_string(); + } + status_code + } + // 50x + &StatusCode::INTERNAL_SERVER_ERROR + | &StatusCode::NOT_IMPLEMENTED + | &StatusCode::BAD_GATEWAY + | &StatusCode::SERVICE_UNAVAILABLE + | &StatusCode::GATEWAY_TIMEOUT + | &StatusCode::HTTP_VERSION_NOT_SUPPORTED + | &StatusCode::VARIANT_ALSO_NEGOTIATES + | &StatusCode::INSUFFICIENT_STORAGE + | &StatusCode::LOOP_DETECTED => { + // HTML content for error status codes 50x + // TODO: handle this potential panic properly + error_page_content = PAGE_50X + .get() + .expect("PAGE_50X contant is not initialized") + .to_string(); + status_code + } + // other status codes + _ => status_code, + }; + + if error_page_content.is_empty() { + error_page_content = format!( + "{}

{}

", + status_code, status_code + ); + } + + let mut body = Body::empty(); + let len = error_page_content.len() as u64; + + if method == Method::GET { + body = Body::from(error_page_content) + } + + let mut resp = Response::new(body); + *resp.status_mut() = *status_code; + resp.headers_mut().typed_insert(ContentLength(len)); + resp.headers_mut().typed_insert(ContentType::html()); + resp.headers_mut().typed_insert(AcceptRanges::bytes()); + + Ok(resp) +} diff --git a/src/fs.rs b/src/fs.rs deleted file mode 100644 index c0b3cc5..0000000 --- a/src/fs.rs +++ /dev/null @@ -1,451 +0,0 @@ -// Most of the this file is borrowed from https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs - -use bytes::{Bytes, BytesMut}; -use futures::future::Either; -use futures::{future, ready, stream, FutureExt, Stream, StreamExt, TryFutureExt}; -use headers::{ - AcceptRanges, ContentLength, ContentRange, ContentType, HeaderMap, HeaderMapExt, HeaderValue, - IfModifiedSince, IfRange, IfUnmodifiedSince, LastModified, Range, -}; -use hyper::{Body, Response, StatusCode}; -use percent_encoding::percent_decode_str; -use std::fs::Metadata; -use std::future::Future; -use std::io; -use std::ops::Bound; -use std::path::PathBuf; -use std::pin::Pin; -use std::sync::Arc; -use std::task::Poll; -use std::{cmp, path::Path}; -use tokio::fs::File as TkFile; -use tokio::io::AsyncSeekExt; -use tokio_util::io::poll_read_buf; - -/// A small Arch `PathBuf` wrapper since Arc doesn't implement AsRef. -#[derive(Clone, Debug)] -pub struct ArcPath(pub Arc); - -impl AsRef for ArcPath { - fn as_ref(&self) -> &Path { - (*self.0).as_ref() - } -} - -/// Entry point to handle web server requests which map to specific files -/// on file system and return a file response. -pub async fn handle_request( - base: &Path, - headers: &HeaderMap, - path: &str, -) -> Result, StatusCode> { - let base = Arc::new(base.into()); - let res = path_from_tail(base, path).await?; - file_reply(headers, res).await -} - -fn path_from_tail( - base: Arc, - tail: &str, -) -> impl Future> + Send { - future::ready(sanitize_path(base.as_ref(), tail)).and_then(|mut buf| async { - match tokio::fs::metadata(&buf).await { - Ok(meta) => { - let mut auto_index = false; - if meta.is_dir() { - tracing::debug!("dir: appending index.html to directory path"); - buf.push("index.html"); - auto_index = true; - } - tracing::trace!("dir: {:?}", buf); - Ok((ArcPath(Arc::new(buf)), meta, auto_index)) - } - Err(err) => { - tracing::debug!("file not found: {:?}", err); - Err(StatusCode::NOT_FOUND) - } - } - }) -} - -/// Reply with a file content. -fn file_reply( - headers: &HeaderMap, - res: (ArcPath, Metadata, bool), -) -> impl Future, StatusCode>> + Send { - // TODO: directory listing - - let (path, meta, auto_index) = res; - let conditionals = get_conditional_headers(headers); - TkFile::open(path.clone()).then(move |res| match res { - Ok(f) => Either::Left(file_conditional(f, path, meta, auto_index, conditionals)), - Err(err) => { - let status = match err.kind() { - io::ErrorKind::NotFound => { - tracing::debug!("file not found: {:?}", path.as_ref().display()); - StatusCode::NOT_FOUND - } - io::ErrorKind::PermissionDenied => { - tracing::warn!("file permission denied: {:?}", path.as_ref().display()); - StatusCode::FORBIDDEN - } - _ => { - tracing::error!( - "file open error (path={:?}): {} ", - path.as_ref().display(), - err - ); - StatusCode::INTERNAL_SERVER_ERROR - } - }; - Either::Right(future::err(status)) - } - }) -} - -fn get_conditional_headers(header_list: &HeaderMap) -> Conditionals { - let if_modified_since = header_list.typed_get::(); - let if_unmodified_since = header_list.typed_get::(); - let if_range = header_list.typed_get::(); - let range = header_list.typed_get::(); - - Conditionals { - if_modified_since, - if_unmodified_since, - if_range, - range, - } -} - -fn sanitize_path(base: impl AsRef, tail: &str) -> Result { - let mut buf = PathBuf::from(base.as_ref()); - let p = match percent_decode_str(tail).decode_utf8() { - Ok(p) => p, - Err(err) => { - tracing::debug!("dir: failed to decode route={:?}: {:?}", tail, err); - return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE); - } - }; - tracing::trace!("dir? base={:?}, route={:?}", base.as_ref(), p); - for seg in p.split('/') { - if seg.starts_with("..") { - tracing::warn!("dir: rejecting segment starting with '..'"); - return Err(StatusCode::NOT_FOUND); - } else if seg.contains('\\') { - tracing::warn!("dir: rejecting segment containing with backslash (\\)"); - return Err(StatusCode::NOT_FOUND); - } else { - buf.push(seg); - } - } - Ok(buf) -} - -#[derive(Debug)] -struct Conditionals { - if_modified_since: Option, - if_unmodified_since: Option, - if_range: Option, - range: Option, -} - -enum Cond { - NoBody(Response), - WithBody(Option), -} - -impl Conditionals { - fn check(self, last_modified: Option) -> Cond { - if let Some(since) = self.if_unmodified_since { - let precondition = last_modified - .map(|time| since.precondition_passes(time.into())) - .unwrap_or(false); - - tracing::trace!( - "if-unmodified-since? {:?} vs {:?} = {}", - since, - last_modified, - precondition - ); - if !precondition { - let mut res = Response::new(Body::empty()); - *res.status_mut() = StatusCode::PRECONDITION_FAILED; - return Cond::NoBody(res); - } - } - - if let Some(since) = self.if_modified_since { - tracing::trace!( - "if-modified-since? header = {:?}, file = {:?}", - since, - last_modified - ); - let unmodified = last_modified - .map(|time| !since.is_modified(time.into())) - // no last_modified means its always modified - .unwrap_or(false); - if unmodified { - let mut res = Response::new(Body::empty()); - *res.status_mut() = StatusCode::NOT_MODIFIED; - return Cond::NoBody(res); - } - } - - if let Some(if_range) = self.if_range { - tracing::trace!("if-range? {:?} vs {:?}", if_range, last_modified); - let can_range = !if_range.is_modified(None, last_modified.as_ref()); - if !can_range { - return Cond::WithBody(None); - } - } - - Cond::WithBody(self.range) - } -} - -fn file_conditional( - f: TkFile, - path: ArcPath, - meta: Metadata, - auto_index: bool, - conditionals: Conditionals, -) -> impl Future, StatusCode>> + Send { - file_metadata(f, meta, auto_index) - .map_ok(|(file, meta)| response_body(file, meta, path, conditionals)) -} - -async fn file_metadata( - f: TkFile, - meta: Metadata, - auto_index: bool, -) -> Result<(TkFile, Metadata), StatusCode> { - if !auto_index { - return Ok((f, meta)); - } - match f.metadata().await { - Ok(meta) => Ok((f, meta)), - Err(err) => { - tracing::debug!("file metadata error: {}", err); - Err(StatusCode::INTERNAL_SERVER_ERROR) - } - } -} - -fn response_body( - file: TkFile, - meta: Metadata, - path: ArcPath, - conditionals: Conditionals, -) -> Response { - let mut len = meta.len(); - let modified = meta.modified().ok().map(LastModified::from); - match conditionals.check(modified) { - Cond::NoBody(resp) => resp, - Cond::WithBody(range) => { - bytes_range(range, len) - .map(|(start, end)| { - let sub_len = end - start; - let buf_size = optimal_buf_size(&meta); - let stream = file_stream(file, buf_size, (start, end)); - let body = Body::wrap_stream(stream); - - let mut resp = Response::new(body); - - if sub_len != len { - *resp.status_mut() = StatusCode::PARTIAL_CONTENT; - resp.headers_mut().typed_insert( - ContentRange::bytes(start..end, len).expect("valid ContentRange"), - ); - - len = sub_len; - } - - let mime = mime_guess::from_path(path.as_ref()).first_or_octet_stream(); - - resp.headers_mut().typed_insert(ContentLength(len)); - resp.headers_mut().typed_insert(ContentType::from(mime)); - resp.headers_mut().typed_insert(AcceptRanges::bytes()); - - if let Some(last_modified) = modified { - resp.headers_mut().typed_insert(last_modified); - } - - resp - }) - .unwrap_or_else(|BadRange| { - // bad byte range - let mut resp = Response::new(Body::empty()); - *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE; - resp.headers_mut() - .typed_insert(ContentRange::unsatisfied_bytes(len)); - resp - }) - } - } -} - -struct BadRange; - -fn bytes_range(range: Option, max_len: u64) -> Result<(u64, u64), BadRange> { - let range = if let Some(range) = range { - range - } else { - return Ok((0, max_len)); - }; - - let ret = range - .iter() - .map(|(start, end)| { - let start = match start { - Bound::Unbounded => 0, - Bound::Included(s) => s, - Bound::Excluded(s) => s + 1, - }; - - let end = match end { - Bound::Unbounded => max_len, - Bound::Included(s) => { - // For the special case where s == the file size - if s == max_len { - s - } else { - s + 1 - } - } - Bound::Excluded(s) => s, - }; - - if start < end && end <= max_len { - Ok((start, end)) - } else { - tracing::trace!("unsatisfiable byte range: {}-{}/{}", start, end, max_len); - Err(BadRange) - } - }) - .next() - .unwrap_or(Ok((0, max_len))); - ret -} - -fn file_stream( - mut file: TkFile, - buf_size: usize, - (start, end): (u64, u64), -) -> impl Stream> + Send { - let seek = async move { - if start != 0 { - file.seek(io::SeekFrom::Start(start)).await?; - } - Ok(file) - }; - - seek.into_stream() - .map(move |result| { - let mut buf = BytesMut::new(); - let mut len = end - start; - let mut f = match result { - Ok(f) => f, - Err(f) => return Either::Left(stream::once(future::err(f))), - }; - - Either::Right(stream::poll_fn(move |cx| { - if len == 0 { - return Poll::Ready(None); - } - reserve_at_least(&mut buf, buf_size); - - let n = match ready!(poll_read_buf(Pin::new(&mut f), cx, &mut buf)) { - Ok(n) => n as u64, - Err(err) => { - tracing::debug!("file read error: {}", err); - return Poll::Ready(Some(Err(err))); - } - }; - - if n == 0 { - tracing::debug!("file read found EOF before expected length"); - return Poll::Ready(None); - } - - let mut chunk = buf.split().freeze(); - if n > len { - chunk = chunk.split_to(len as usize); - len = 0; - } else { - len -= n; - } - - Poll::Ready(Some(Ok(chunk))) - })) - }) - .flatten() -} - -fn reserve_at_least(buf: &mut BytesMut, cap: usize) { - if buf.capacity() - buf.len() < cap { - buf.reserve(cap); - } -} - -const DEFAULT_READ_BUF_SIZE: usize = 8_192; - -fn optimal_buf_size(metadata: &Metadata) -> usize { - let block_size = get_block_size(metadata); - - // If file length is smaller than block size, don't waste space - // reserving a bigger-than-needed buffer. - cmp::min(block_size as u64, metadata.len()) as usize -} - -#[cfg(unix)] -fn get_block_size(metadata: &Metadata) -> usize { - use std::os::unix::fs::MetadataExt; - //TODO: blksize() returns u64, should handle bad cast... - //(really, a block size bigger than 4gb?) - - // Use device blocksize unless it's really small. - cmp::max(metadata.blksize() as usize, DEFAULT_READ_BUF_SIZE) -} - -#[cfg(not(unix))] -fn get_block_size(_metadata: &Metadata) -> usize { - DEFAULT_READ_BUF_SIZE -} - -#[cfg(test)] -mod tests { - use super::sanitize_path; - use bytes::BytesMut; - - #[test] - fn test_sanitize_path() { - let base = "/var/www"; - - fn p(s: &str) -> &::std::path::Path { - s.as_ref() - } - - assert_eq!( - sanitize_path(base, "/foo.html").unwrap(), - p("/var/www/foo.html") - ); - - // bad paths - sanitize_path(base, "/../foo.html").expect_err("dot dot"); - - sanitize_path(base, "/C:\\/foo.html").expect_err("C:\\"); - } - - #[test] - fn test_reserve_at_least() { - let mut buf = BytesMut::new(); - let cap = 8_192; - - assert_eq!(buf.len(), 0); - assert_eq!(buf.capacity(), 0); - - super::reserve_at_least(&mut buf, cap); - assert_eq!(buf.len(), 0); - assert_eq!(buf.capacity(), cap); - } -} diff --git a/src/handler.rs b/src/handler.rs new file mode 100644 index 0000000..5dfe141 --- /dev/null +++ b/src/handler.rs @@ -0,0 +1,13 @@ +use hyper::{Body, Request, Response}; +use std::path::Path; + +use crate::static_files; +use crate::{error::Result, error_page}; + +/// Main server request handler. +pub async fn handle_request(base: &Path, req: Request) -> Result> { + match static_files::handle_request(base, req.headers(), req.uri().path()).await { + Ok(resp) => Ok(resp), + Err(status) => error_page::get_error_response(req.method(), &status), + } +} diff --git a/src/lib.rs b/src/lib.rs index 21caaec..add858b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,13 +4,13 @@ extern crate anyhow; pub mod config; -pub mod controller; pub mod error_page; -pub mod fs; +pub mod handler; pub mod helpers; pub mod logger; pub mod server; pub mod signals; +pub mod static_files; #[macro_use] pub mod error; diff --git a/src/server.rs b/src/server.rs index bdb9eb8..0fa9876 100644 --- a/src/server.rs +++ b/src/server.rs @@ -8,8 +8,8 @@ use crate::{ config::{Config, CONFIG}, error_page, }; -use crate::{controller::handle, fs::ArcPath}; use crate::{error, helpers, logger, Result}; +use crate::{handler, static_files::ArcPath}; /// Define a multi-thread HTTP or HTTP/2 web server. pub struct Server { @@ -88,7 +88,7 @@ impl Server { async move { Ok::<_, error::Error>(service_fn(move |req| { let root_dir = root_dir.clone(); - async move { handle(root_dir.as_ref(), req).await } + async move { handler::handle_request(root_dir.as_ref(), req).await } })) } }); diff --git a/src/static_files.rs b/src/static_files.rs new file mode 100644 index 0000000..bf38060 --- /dev/null +++ b/src/static_files.rs @@ -0,0 +1,452 @@ +// Static File handler +// -> Most of the file is borrowed from https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs + +use bytes::{Bytes, BytesMut}; +use futures::future::Either; +use futures::{future, ready, stream, FutureExt, Stream, StreamExt, TryFutureExt}; +use headers::{ + AcceptRanges, ContentLength, ContentRange, ContentType, HeaderMap, HeaderMapExt, HeaderValue, + IfModifiedSince, IfRange, IfUnmodifiedSince, LastModified, Range, +}; +use hyper::{Body, Response, StatusCode}; +use percent_encoding::percent_decode_str; +use std::fs::Metadata; +use std::future::Future; +use std::io; +use std::ops::Bound; +use std::path::PathBuf; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Poll; +use std::{cmp, path::Path}; +use tokio::fs::File as TkFile; +use tokio::io::AsyncSeekExt; +use tokio_util::io::poll_read_buf; + +/// A small Arch `PathBuf` wrapper since Arc doesn't implement AsRef. +#[derive(Clone, Debug)] +pub struct ArcPath(pub Arc); + +impl AsRef for ArcPath { + fn as_ref(&self) -> &Path { + (*self.0).as_ref() + } +} + +/// Entry point to handle web server requests which map to specific files +/// on file system and return a file response. +pub async fn handle_request( + base: &Path, + headers: &HeaderMap, + uri_path: &str, +) -> Result, StatusCode> { + let base = Arc::new(base.into()); + let res = path_from_tail(base, uri_path).await?; + file_reply(headers, res).await +} + +fn path_from_tail( + base: Arc, + tail: &str, +) -> impl Future> + Send { + future::ready(sanitize_path(base.as_ref(), tail)).and_then(|mut buf| async { + match tokio::fs::metadata(&buf).await { + Ok(meta) => { + let mut auto_index = false; + if meta.is_dir() { + tracing::debug!("dir: appending index.html to directory path"); + buf.push("index.html"); + auto_index = true; + } + tracing::trace!("dir: {:?}", buf); + Ok((ArcPath(Arc::new(buf)), meta, auto_index)) + } + Err(err) => { + tracing::debug!("file not found: {:?}", err); + Err(StatusCode::NOT_FOUND) + } + } + }) +} + +/// Reply with a file content. +fn file_reply( + headers: &HeaderMap, + res: (ArcPath, Metadata, bool), +) -> impl Future, StatusCode>> + Send { + // TODO: directory listing + + let (path, meta, auto_index) = res; + let conditionals = get_conditional_headers(headers); + TkFile::open(path.clone()).then(move |res| match res { + Ok(f) => Either::Left(file_conditional(f, path, meta, auto_index, conditionals)), + Err(err) => { + let status = match err.kind() { + io::ErrorKind::NotFound => { + tracing::debug!("file not found: {:?}", path.as_ref().display()); + StatusCode::NOT_FOUND + } + io::ErrorKind::PermissionDenied => { + tracing::warn!("file permission denied: {:?}", path.as_ref().display()); + StatusCode::FORBIDDEN + } + _ => { + tracing::error!( + "file open error (path={:?}): {} ", + path.as_ref().display(), + err + ); + StatusCode::INTERNAL_SERVER_ERROR + } + }; + Either::Right(future::err(status)) + } + }) +} + +fn get_conditional_headers(header_list: &HeaderMap) -> Conditionals { + let if_modified_since = header_list.typed_get::(); + let if_unmodified_since = header_list.typed_get::(); + let if_range = header_list.typed_get::(); + let range = header_list.typed_get::(); + + Conditionals { + if_modified_since, + if_unmodified_since, + if_range, + range, + } +} + +fn sanitize_path(base: impl AsRef, tail: &str) -> Result { + let mut buf = PathBuf::from(base.as_ref()); + let p = match percent_decode_str(tail).decode_utf8() { + Ok(p) => p, + Err(err) => { + tracing::debug!("dir: failed to decode route={:?}: {:?}", tail, err); + return Err(StatusCode::UNSUPPORTED_MEDIA_TYPE); + } + }; + tracing::trace!("dir? base={:?}, route={:?}", base.as_ref(), p); + for seg in p.split('/') { + if seg.starts_with("..") { + tracing::warn!("dir: rejecting segment starting with '..'"); + return Err(StatusCode::NOT_FOUND); + } else if seg.contains('\\') { + tracing::warn!("dir: rejecting segment containing with backslash (\\)"); + return Err(StatusCode::NOT_FOUND); + } else { + buf.push(seg); + } + } + Ok(buf) +} + +#[derive(Debug)] +struct Conditionals { + if_modified_since: Option, + if_unmodified_since: Option, + if_range: Option, + range: Option, +} + +enum Cond { + NoBody(Response), + WithBody(Option), +} + +impl Conditionals { + fn check(self, last_modified: Option) -> Cond { + if let Some(since) = self.if_unmodified_since { + let precondition = last_modified + .map(|time| since.precondition_passes(time.into())) + .unwrap_or(false); + + tracing::trace!( + "if-unmodified-since? {:?} vs {:?} = {}", + since, + last_modified, + precondition + ); + if !precondition { + let mut res = Response::new(Body::empty()); + *res.status_mut() = StatusCode::PRECONDITION_FAILED; + return Cond::NoBody(res); + } + } + + if let Some(since) = self.if_modified_since { + tracing::trace!( + "if-modified-since? header = {:?}, file = {:?}", + since, + last_modified + ); + let unmodified = last_modified + .map(|time| !since.is_modified(time.into())) + // no last_modified means its always modified + .unwrap_or(false); + if unmodified { + let mut res = Response::new(Body::empty()); + *res.status_mut() = StatusCode::NOT_MODIFIED; + return Cond::NoBody(res); + } + } + + if let Some(if_range) = self.if_range { + tracing::trace!("if-range? {:?} vs {:?}", if_range, last_modified); + let can_range = !if_range.is_modified(None, last_modified.as_ref()); + if !can_range { + return Cond::WithBody(None); + } + } + + Cond::WithBody(self.range) + } +} + +fn file_conditional( + f: TkFile, + path: ArcPath, + meta: Metadata, + auto_index: bool, + conditionals: Conditionals, +) -> impl Future, StatusCode>> + Send { + file_metadata(f, meta, auto_index) + .map_ok(|(file, meta)| response_body(file, meta, path, conditionals)) +} + +async fn file_metadata( + f: TkFile, + meta: Metadata, + auto_index: bool, +) -> Result<(TkFile, Metadata), StatusCode> { + if !auto_index { + return Ok((f, meta)); + } + match f.metadata().await { + Ok(meta) => Ok((f, meta)), + Err(err) => { + tracing::debug!("file metadata error: {}", err); + Err(StatusCode::INTERNAL_SERVER_ERROR) + } + } +} + +fn response_body( + file: TkFile, + meta: Metadata, + path: ArcPath, + conditionals: Conditionals, +) -> Response { + let mut len = meta.len(); + let modified = meta.modified().ok().map(LastModified::from); + match conditionals.check(modified) { + Cond::NoBody(resp) => resp, + Cond::WithBody(range) => { + bytes_range(range, len) + .map(|(start, end)| { + let sub_len = end - start; + let buf_size = optimal_buf_size(&meta); + let stream = file_stream(file, buf_size, (start, end)); + let body = Body::wrap_stream(stream); + + let mut resp = Response::new(body); + + if sub_len != len { + *resp.status_mut() = StatusCode::PARTIAL_CONTENT; + resp.headers_mut().typed_insert( + ContentRange::bytes(start..end, len).expect("valid ContentRange"), + ); + + len = sub_len; + } + + let mime = mime_guess::from_path(path.as_ref()).first_or_octet_stream(); + + resp.headers_mut().typed_insert(ContentLength(len)); + resp.headers_mut().typed_insert(ContentType::from(mime)); + resp.headers_mut().typed_insert(AcceptRanges::bytes()); + + if let Some(last_modified) = modified { + resp.headers_mut().typed_insert(last_modified); + } + + resp + }) + .unwrap_or_else(|BadRange| { + // bad byte range + let mut resp = Response::new(Body::empty()); + *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE; + resp.headers_mut() + .typed_insert(ContentRange::unsatisfied_bytes(len)); + resp + }) + } + } +} + +struct BadRange; + +fn bytes_range(range: Option, max_len: u64) -> Result<(u64, u64), BadRange> { + let range = if let Some(range) = range { + range + } else { + return Ok((0, max_len)); + }; + + let ret = range + .iter() + .map(|(start, end)| { + let start = match start { + Bound::Unbounded => 0, + Bound::Included(s) => s, + Bound::Excluded(s) => s + 1, + }; + + let end = match end { + Bound::Unbounded => max_len, + Bound::Included(s) => { + // For the special case where s == the file size + if s == max_len { + s + } else { + s + 1 + } + } + Bound::Excluded(s) => s, + }; + + if start < end && end <= max_len { + Ok((start, end)) + } else { + tracing::trace!("unsatisfiable byte range: {}-{}/{}", start, end, max_len); + Err(BadRange) + } + }) + .next() + .unwrap_or(Ok((0, max_len))); + ret +} + +fn file_stream( + mut file: TkFile, + buf_size: usize, + (start, end): (u64, u64), +) -> impl Stream> + Send { + let seek = async move { + if start != 0 { + file.seek(io::SeekFrom::Start(start)).await?; + } + Ok(file) + }; + + seek.into_stream() + .map(move |result| { + let mut buf = BytesMut::new(); + let mut len = end - start; + let mut f = match result { + Ok(f) => f, + Err(f) => return Either::Left(stream::once(future::err(f))), + }; + + Either::Right(stream::poll_fn(move |cx| { + if len == 0 { + return Poll::Ready(None); + } + reserve_at_least(&mut buf, buf_size); + + let n = match ready!(poll_read_buf(Pin::new(&mut f), cx, &mut buf)) { + Ok(n) => n as u64, + Err(err) => { + tracing::debug!("file read error: {}", err); + return Poll::Ready(Some(Err(err))); + } + }; + + if n == 0 { + tracing::debug!("file read found EOF before expected length"); + return Poll::Ready(None); + } + + let mut chunk = buf.split().freeze(); + if n > len { + chunk = chunk.split_to(len as usize); + len = 0; + } else { + len -= n; + } + + Poll::Ready(Some(Ok(chunk))) + })) + }) + .flatten() +} + +fn reserve_at_least(buf: &mut BytesMut, cap: usize) { + if buf.capacity() - buf.len() < cap { + buf.reserve(cap); + } +} + +const DEFAULT_READ_BUF_SIZE: usize = 8_192; + +fn optimal_buf_size(metadata: &Metadata) -> usize { + let block_size = get_block_size(metadata); + + // If file length is smaller than block size, don't waste space + // reserving a bigger-than-needed buffer. + cmp::min(block_size as u64, metadata.len()) as usize +} + +#[cfg(unix)] +fn get_block_size(metadata: &Metadata) -> usize { + use std::os::unix::fs::MetadataExt; + //TODO: blksize() returns u64, should handle bad cast... + //(really, a block size bigger than 4gb?) + + // Use device blocksize unless it's really small. + cmp::max(metadata.blksize() as usize, DEFAULT_READ_BUF_SIZE) +} + +#[cfg(not(unix))] +fn get_block_size(_metadata: &Metadata) -> usize { + DEFAULT_READ_BUF_SIZE +} + +#[cfg(test)] +mod tests { + use super::sanitize_path; + use bytes::BytesMut; + + #[test] + fn test_sanitize_path() { + let base = "/var/www"; + + fn p(s: &str) -> &::std::path::Path { + s.as_ref() + } + + assert_eq!( + sanitize_path(base, "/foo.html").unwrap(), + p("/var/www/foo.html") + ); + + // bad paths + sanitize_path(base, "/../foo.html").expect_err("dot dot"); + + sanitize_path(base, "/C:\\/foo.html").expect_err("C:\\"); + } + + #[test] + fn test_reserve_at_least() { + let mut buf = BytesMut::new(); + let cap = 8_192; + + assert_eq!(buf.len(), 0); + assert_eq!(buf.capacity(), 0); + + super::reserve_at_least(&mut buf, cap); + assert_eq!(buf.len(), 0); + assert_eq!(buf.capacity(), cap); + } +} -- libgit2 1.7.2