index : smol-guess.git

ascending towards madness

use std::path::PathBuf;

/// A collection of programming language definitions.
///
/// `LanguageDefinitions` is used to manage a list of programming language definitions, which consist
/// of pairs of a language name and its corresponding file extension.
///
/// # Examples
///
/// ```
/// use smolguess::core::{LanguageDefinition, LanguageDefinitions};
///
/// let languages = LanguageDefinitions {
///     items: vec![
///         LanguageDefinition { name: "Rust".to_string(), extension: "rs".to_string() },
///         LanguageDefinition::new("Markdown", "md"),
///     ],
/// };
///
/// assert_eq!(languages.items.len(), 2);
/// assert_eq!((languages.items[0].name.as_str(), languages.items[0].extension.as_str()), ("Rust", "rs"));
/// assert_eq!((languages.items[1].name.as_str(), languages.items[1].extension.as_str()), ("Markdown", "md"));
/// ```
#[derive(Debug, Default)]
pub struct LanguageDefinitions {
    pub items: Vec<LanguageDefinition>,
}

impl LanguageDefinitions {
    /// Identifies the programming language of a file based on its extension.
    ///
    /// The `identify_file` method takes a `PathBuf` representing the file and searches
    /// the collection of language definitions for a matching file extension. If a match
    /// is found, it returns the corresponding `LanguageDefinition`. If no match is found,
    /// it returns `None`.
    ///
    /// # Parameters
    ///
    /// - `file`: The path to the file whose language is to be identified.
    ///
    /// # Returns
    ///
    /// An `Option<LanguageDefinition>` representing the identified language if found,
    /// or `None` if no matching language is found.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::path::PathBuf;
    /// use smolguess::core::{LanguageDefinition, LanguageDefinitions};
    ///
    /// let languages = LanguageDefinitions::default()
    ///     .insert(("Rust", "rs"))
    ///     .insert(("Markdown", "md"));
    ///
    /// let file_path = PathBuf::from("example.rs");
    /// let identified_language = languages.identify_file(file_path);
    ///
    /// match identified_language {
    ///     Some(language) => assert_eq!(language.name, "Rust"),
    ///     None => panic!("Language not identified."),
    /// }
    /// ```
    pub fn identify_file(&self, file: PathBuf) -> Option<LanguageDefinition> {
        self.items
            .iter()
            .find(|lang| {
                lang.extension.eq(&file
                    .extension()
                    .unwrap_or_else(|| file.file_name().expect("Unable to get file name"))
                    .to_string_lossy()
                    .to_string()
                    .to_lowercase())
            })
            .cloned()
    }

    /// Identifies the programming languages of a list of files based on their extensions.
    ///
    /// The `identify_files` function takes a vector of `PathBuf` representing files and
    /// identifies the programming languages for each file based on their extensions. It returns
    /// a vector of `LanguageMatch` instances containing information about each identified file
    /// and its corresponding programming language.
    ///
    /// # Parameters
    ///
    /// - `files`: A vector of `PathBuf` representing the paths to the files to be identified.
    ///
    /// # Returns
    ///
    /// A vector of `LanguageMatch` instances, each containing information about an identified file
    /// and its corresponding programming language.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::path::PathBuf;
    /// use smolguess::core::{LanguageDefinition, LanguageDefinitions, LanguageMatch};
    ///
    /// let languages = LanguageDefinitions::default()
    ///     .insert(("Rust", "rs"))
    ///     .insert(("Markdown", "md"));
    ///
    /// let file_paths = vec![
    ///     PathBuf::from("example.rs"),
    ///     PathBuf::from("README.md"),
    ///     PathBuf::from("unknown_file.txt"),
    /// ];
    ///
    /// let identified_files = languages.identify_files(file_paths);
    ///
    /// let test = identified_files
    /// 	.iter()
    /// 	.find(|e| e.language.name.eq(&String::from("Rust")))
    /// 	.unwrap();
    ///
    /// assert_eq!(identified_files.len(), 3);
    /// assert_eq!(test.language.name, "Rust");
    /// ```
    pub fn identify_files(self, files: Vec<PathBuf>) -> Vec<LanguageMatch> {
        let mut matches: Vec<LanguageMatch> = Vec::new();

        for file in files {
            matches.push(LanguageMatch {
                file: file.clone(),
                language: self
                    .identify_file(file.clone())
                    .unwrap_or(LanguageDefinition {
                        name: String::from("Unknown"),
                        extension: file
                            .extension()
                            .unwrap_or(file.clone().file_name().unwrap_or_default())
                            .to_string_lossy()
                            .to_string()
                            .to_lowercase(),
                    }),
            });
        }

        matches.sort_by(|first, second| first.language.name.cmp(&second.language.name));
        matches
    }

    /// Insert a language definition into the collection.
    ///
    /// This method takes a generic parameter `T` that can be converted into a `LanguageDefinition`.
    /// The language definition is then added to the collection, and the modified collection is
    /// returned.
    ///
    /// # Parameters
    ///
    /// - `self`: The current `LanguageDefinitions` instance.
    /// - `definition`: The language definition to insert into the collection.
    ///
    /// # Returns
    ///
    /// A new `LanguageDefinitions` instance with the added language definition.
    ///
    /// # Examples
    ///
    /// ```
    /// use smolguess::core::{LanguageDefinition, LanguageDefinitions};
    ///
    /// let languages = LanguageDefinitions::default()
    ///     .insert(("Rust", "rs"))
    ///     .insert(LanguageDefinition::new("Markdown", "md"));
    ///
    /// assert_eq!(languages.items.len(), 2);
    /// assert_eq!((languages.items[0].name.as_str(), languages.items[0].extension.as_str()), ("Rust", "rs"));
    /// assert_eq!((languages.items[1].name.as_str(), languages.items[1].extension.as_str()), ("Markdown", "md"));
    /// ```
    pub fn insert<T>(mut self, definition: T) -> Self
    where
        T: Into<LanguageDefinition>,
    {
        self.items.push(definition.into());

        self
    }

    /// Loads a set of built-in language definitions into the collection.
    ///
    /// The `load_builtins` function is used to add predefined language definitions to the
    /// `LanguageDefinitions` collection. This can be convenient for initializing the collection
    /// with common languages and extensions.
    ///
    /// # Parameters
    ///
    /// - `self`: The current `LanguageDefinitions` instance.
    ///
    /// # Returns
    ///
    /// A new `LanguageDefinitions` instance with the built-in language definitions added.
    ///
    /// # Examples
    ///
    /// ```
    /// use smolguess::core::{LanguageDefinition, LanguageDefinitions};
    ///
    /// // Load built-in language definitions
    /// let languages = LanguageDefinitions::default().load_builtins();
    ///
    /// assert_eq!(languages.items.iter().any(|lang| lang.name.eq("Rust")), true);
    /// ```
    pub fn load_builtins(self) -> Self {
        self.insert(("Assembly", "asm"))
            .insert(("Batchfile", "bat"))
            .insert(("C", "c"))
            .insert(("C#", "cs"))
            .insert(("C++", "cpp"))
            .insert(("Clojure", "clj"))
            .insert(("CMake", "cmake"))
            .insert(("COBOL", "cbl"))
            .insert(("CoffeeScript", "coffee"))
            .insert(("CSS", "css"))
            .insert(("CSV", "csv"))
            .insert(("Dart", "dart"))
            .insert(("DM", "dm"))
            .insert(("Dockerfile", "dockerfile"))
            .insert(("Elixir", "ex"))
            .insert(("Erlang", "erl"))
            .insert(("Fortran", "f90"))
            .insert(("GDScript", "gd"))
            .insert(("Go", "go"))
            .insert(("Groovy", "groovy"))
            .insert(("Haskell", "hs"))
            .insert(("HTML", "html"))
            .insert(("INI", "ini"))
            .insert(("Java", "java"))
            .insert(("JavaScript", "js"))
            .insert(("JSON", "json"))
            .insert(("Julia", "jl"))
            .insert(("Kotlin", "kt"))
            .insert(("Lisp", "lisp"))
            .insert(("Lua", "lua"))
            .insert(("Makefile", "makefile"))
            .insert(("Markdown", "md"))
            .insert(("Matlab", "matlab"))
            .insert(("Objective-C", "mm"))
            .insert(("OCaml", "ml"))
            .insert(("Pascal", "pas"))
            .insert(("Perl", "pm"))
            .insert(("PHP", "php"))
            .insert(("PowerShell", "ps1"))
            .insert(("Prolog", "prolog"))
            .insert(("Python", "py"))
            .insert(("R", "r"))
            .insert(("Ruby", "rb"))
            .insert(("Rust", "rs"))
            .insert(("Scala", "scala"))
            .insert(("SCSS", "scss"))
            .insert(("Shell", "sh"))
            .insert(("SQL", "sql"))
            .insert(("Swift", "swift"))
            .insert(("TeX", "tex"))
            .insert(("TOML", "toml"))
            .insert(("TypeScrpit", "ts"))
            .insert(("Verilog", "v"))
            .insert(("Visual Basic", "vba"))
            .insert(("XML", "xml"))
            .insert(("YAML", "yaml"))
    }
}

/// Represents a language definition, consisting of a name and its corresponding file extension.
///
/// # Fields
///
/// - `name`: The name of the programming language.
/// - `extension`: The file extension associated with the programming language.
///
/// # Examples
///
/// Creating a new `LanguageDefinition` instance using the `new` method:
///
/// ```
/// use smolguess::core::LanguageDefinition;
///
/// let definition = LanguageDefinition::new("Rust", "rs");
///
/// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
/// ```
///
/// Creating a new `LanguageDefinition` instance using struct initialization:
///
/// ```
/// use smolguess::core::LanguageDefinition;
///
/// let definition = LanguageDefinition { name: "Rust".to_string(), extension: "rs".to_string() };
///
/// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
/// ```
#[derive(Debug, Clone)]
pub struct LanguageDefinition {
    pub name: String,
    pub extension: String,
}

impl LanguageDefinition {
    /// Creates a new `LanguageDefinition` instance with the specified name and file extension.
    ///
    /// # Parameters
    ///
    /// - `name`: The name of the programming language.
    /// - `extension`: The file extension associated with the programming language.
    ///
    /// # Returns
    ///
    /// A new `LanguageDefinition` instance.
    ///
    /// # Examples
    ///
    /// ```
    /// use smolguess::core::LanguageDefinition;
    ///
    /// let definition = LanguageDefinition::new("Rust", "rs");
    ///
    /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
    /// ```
    pub fn new(name: impl Into<String>, extension: impl Into<String>) -> Self {
        LanguageDefinition {
            name: name.into(),
            extension: extension.into(),
        }
    }
}

impl<T> From<(T, T)> for LanguageDefinition
where
    T: Into<String>,
{
    /// Converts a tuple of two values into a `LanguageDefinition` instance.
    ///
    /// The first value in the tuple is used as the name, and the second value is used as the extension.
    ///
    /// # Parameters
    ///
    /// - `value`: A tuple containing the name and extension values.
    ///
    /// # Returns
    ///
    /// A new `LanguageDefinition` instance.
    ///
    /// # Examples
    ///
    /// ```
    /// use smolguess::core::LanguageDefinition;
    ///
    /// let definition = LanguageDefinition::from(("Rust", "rs"));
    ///
    /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
    /// ```
    fn from((name, extension): (T, T)) -> Self {
        LanguageDefinition::new(name, extension)
    }
}

/// Represents a pairing of a file and its identified programming language.
///
/// The `LanguageMatch` struct is used to associate a `PathBuf` representing a file
/// with its corresponding `LanguageDefinition`. This pairing provides information about
/// a file and its identified programming language.
///
/// # Fields
///
/// - `file`: The path to the identified file.
/// - `language`: The identified programming language associated with the file.
///
/// # Examples
///
/// ```
/// use std::path::PathBuf;
/// use smolguess::core::{LanguageDefinition, LanguageMatch};
///
/// let file_path = PathBuf::from("example.rs");
/// let language = LanguageDefinition::new("Rust", "rs");
///
/// let language_match = LanguageMatch { file: file_path.clone(), language };
///
/// assert_eq!(language_match.file, file_path);
/// assert_eq!(language_match.language.name, "Rust");
/// assert_eq!(language_match.language.extension, "rs");
/// ```
#[derive(Debug)]
pub struct LanguageMatch {
    pub file: PathBuf,
    pub language: LanguageDefinition,
}