use std::path::PathBuf; use serde::Serialize; /// A collection of programming language definitions. /// /// `LanguageDefinitions` is used to manage a list of programming language definitions, which consist /// of pairs of a language name and its corresponding file extension. /// /// # Examples /// /// ``` /// use smolguess::core::{LanguageDefinition, LanguageDefinitions}; /// /// let languages = LanguageDefinitions { /// items: vec![ /// LanguageDefinition { name: "Rust".into(), extension: "rs".into() }, /// LanguageDefinition::new("Markdown", "md"), /// ], /// }; /// /// assert_eq!(languages.items.len(), 2); /// assert_eq!((languages.items[0].name.as_str(), languages.items[0].extension.as_str()), ("Rust", "rs")); /// assert_eq!((languages.items[1].name.as_str(), languages.items[1].extension.as_str()), ("Markdown", "md")); /// ``` #[derive(Debug, Default)] pub struct LanguageDefinitions { pub items: Vec, } impl LanguageDefinitions { /// Identifies the programming language of a file based on its extension. /// /// The `identify_file` method takes a `PathBuf` representing the file and searches /// the collection of language definitions for a matching file extension. If a match /// is found, it returns the corresponding `LanguageDefinition`. If no match is found, /// it returns `None`. /// /// # Parameters /// /// - `file`: The path to the file whose language is to be identified. /// /// # Returns /// /// An `Option` representing the identified language if found, /// or `None` if no matching language is found. /// /// # Examples /// /// ``` /// use std::path::PathBuf; /// use smolguess::core::{LanguageDefinition, LanguageDefinitions}; /// /// let languages = LanguageDefinitions::default() /// .insert(("Rust", "rs")) /// .insert(("Markdown", "md")); /// /// let file_path = PathBuf::from("example.rs"); /// let identified_language = languages.identify_file(file_path); /// /// match identified_language { /// Some(language) => assert_eq!(language.name, "Rust"), /// None => panic!("Language not identified."), /// } /// ``` pub fn identify_file(&self, file: PathBuf) -> Option { self.items .iter() .find(|lang| { lang.extension.eq(&file .extension() .unwrap_or_else(|| file.file_name().expect("Unable to get file name")) .to_string_lossy() .to_string() .to_lowercase()) }) .cloned() } /// Identifies the programming languages of a list of files based on their extensions. /// /// The `identify_files` function takes a vector of `PathBuf` representing files and /// identifies the programming languages for each file based on their extensions. It returns /// a vector of `LanguageMatch` instances containing information about each identified file /// and its corresponding programming language. /// /// # Parameters /// /// - `files`: A vector of `PathBuf` representing the paths to the files to be identified. /// /// # Returns /// /// A vector of `LanguageMatch` instances, each containing information about an identified file /// and its corresponding programming language. /// /// # Examples /// /// ``` /// use std::path::PathBuf; /// use smolguess::core::{LanguageDefinition, LanguageDefinitions, LanguageMatch}; /// /// let languages = LanguageDefinitions::default() /// .insert(("Rust", "rs")) /// .insert(("Markdown", "md")); /// /// let file_paths = vec![ /// PathBuf::from("example.rs"), /// PathBuf::from("README.md"), /// PathBuf::from("unknown_file.txt"), /// ]; /// /// let identified_files = languages.identify_files(file_paths); /// /// let test = identified_files /// .iter() /// .find(|e| e.language.name.eq(&String::from("Rust"))) /// .unwrap(); /// /// assert_eq!(identified_files.len(), 3); /// assert_eq!(test.language.name, "Rust"); /// ``` pub fn identify_files(self, files: Vec) -> Vec { let mut matches: Vec = Vec::new(); for file in files { matches.push(LanguageMatch { file: file.clone(), language: self .identify_file(file.clone()) .unwrap_or(LanguageDefinition { name: String::from("Unknown"), extension: file .extension() .unwrap_or(file.clone().file_name().unwrap_or_default()) .to_string_lossy() .to_string() .to_lowercase(), }), }); } matches.sort_by(|first, second| first.language.name.cmp(&second.language.name)); matches } /// Insert a language definition into the collection. /// /// This method takes a generic parameter `T` that can be converted into a `LanguageDefinition`. /// The language definition is then added to the collection, and the modified collection is /// returned. /// /// # Parameters /// /// - `self`: The current `LanguageDefinitions` instance. /// - `definition`: The language definition to insert into the collection. /// /// # Returns /// /// A new `LanguageDefinitions` instance with the added language definition. /// /// # Examples /// /// ``` /// use smolguess::core::{LanguageDefinition, LanguageDefinitions}; /// /// let languages = LanguageDefinitions::default() /// .insert(("Rust", "rs")) /// .insert(LanguageDefinition::new("Markdown", "md")); /// /// assert_eq!(languages.items.len(), 2); /// assert_eq!((languages.items[0].name.as_str(), languages.items[0].extension.as_str()), ("Rust", "rs")); /// assert_eq!((languages.items[1].name.as_str(), languages.items[1].extension.as_str()), ("Markdown", "md")); /// ``` pub fn insert(mut self, definition: T) -> Self where T: Into, { self.items.push(definition.into()); self } /// Loads a set of built-in language definitions into the collection. /// /// The `load_builtins` function is used to add predefined language definitions to the /// `LanguageDefinitions` collection. This can be convenient for initializing the collection /// with common languages and extensions. /// /// # Parameters /// /// - `self`: The current `LanguageDefinitions` instance. /// /// # Returns /// /// A new `LanguageDefinitions` instance with the built-in language definitions added. /// /// # Examples /// /// ``` /// use smolguess::core::{LanguageDefinition, LanguageDefinitions}; /// /// // Load built-in language definitions /// let languages = LanguageDefinitions::default().load_builtins(); /// /// assert_eq!(languages.items.iter().any(|lang| lang.name.eq("Rust")), true); /// ``` pub fn load_builtins(self) -> Self { self.insert(("Assembly", "asm")) .insert(("Batchfile", "bat")) .insert(("C", "c")) .insert(("C#", "cs")) .insert(("C++", "cpp")) .insert(("Clojure", "clj")) .insert(("CMake", "cmake")) .insert(("COBOL", "cbl")) .insert(("CoffeeScript", "coffee")) .insert(("CSS", "css")) .insert(("CSV", "csv")) .insert(("Dart", "dart")) .insert(("DM", "dm")) .insert(("Dockerfile", "dockerfile")) .insert(("Elixir", "ex")) .insert(("Erlang", "erl")) .insert(("Fortran", "f90")) .insert(("GDScript", "gd")) .insert(("Go", "go")) .insert(("Groovy", "groovy")) .insert(("Haskell", "hs")) .insert(("HTML", "html")) .insert(("INI", "ini")) .insert(("Java", "java")) .insert(("JavaScript", "js")) .insert(("JSON", "json")) .insert(("Julia", "jl")) .insert(("Kotlin", "kt")) .insert(("Lisp", "lisp")) .insert(("Lua", "lua")) .insert(("Makefile", "makefile")) .insert(("Markdown", "md")) .insert(("Matlab", "matlab")) .insert(("Objective-C", "mm")) .insert(("OCaml", "ml")) .insert(("Pascal", "pas")) .insert(("Perl", "pm")) .insert(("PHP", "php")) .insert(("PowerShell", "ps1")) .insert(("Prolog", "prolog")) .insert(("Python", "py")) .insert(("R", "r")) .insert(("Ruby", "rb")) .insert(("Rust", "rs")) .insert(("Scala", "scala")) .insert(("SCSS", "scss")) .insert(("Shell", "sh")) .insert(("SQL", "sql")) .insert(("Swift", "swift")) .insert(("TeX", "tex")) .insert(("TOML", "toml")) .insert(("TypeScrpit", "ts")) .insert(("Verilog", "v")) .insert(("Visual Basic", "vba")) .insert(("XML", "xml")) .insert(("YAML", "yaml")) .insert(("C", "h")) .insert(("YAML", "yml")) .insert(("Documentation", "txt")) .insert(("Documentation", "rst")) .insert(("Documentation", "readme")) .insert(("Configuration", "config")) .insert(("Git", ".gitignore")) .insert(("Config", "kconfig")) } } /// Represents a language definition, consisting of a name and its corresponding file extension. /// /// # Fields /// /// - `name`: The name of the programming language. /// - `extension`: The file extension associated with the programming language. /// /// # Examples /// /// Creating a new `LanguageDefinition` instance using the `new` method: /// /// ``` /// use smolguess::core::LanguageDefinition; /// /// let definition = LanguageDefinition::new("Rust", "rs"); /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` /// /// Creating a new `LanguageDefinition` instance using struct initialization: /// /// ``` /// use smolguess::core::LanguageDefinition; /// /// let definition = LanguageDefinition { name: "Rust".into(), extension: "rs".into() }; /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` #[derive(Debug, Clone, Serialize)] pub struct LanguageDefinition { pub name: String, pub extension: String, } impl LanguageDefinition { /// Creates a new `LanguageDefinition` instance with the specified name and file extension. /// /// # Parameters /// /// - `name`: The name of the programming language. /// - `extension`: The file extension associated with the programming language. /// /// # Returns /// /// A new `LanguageDefinition` instance. /// /// # Examples /// /// ``` /// use smolguess::core::LanguageDefinition; /// /// let definition = LanguageDefinition::new("Rust", "rs"); /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` pub fn new(name: impl Into, extension: impl Into) -> Self { LanguageDefinition { name: name.into(), extension: extension.into(), } } } impl From<(T, T)> for LanguageDefinition where T: Into, { /// Converts a tuple of two values into a `LanguageDefinition` instance. /// /// The first value in the tuple is used as the name, and the second value is used as the extension. /// /// # Parameters /// /// - `value`: A tuple containing the name and extension values. /// /// # Returns /// /// A new `LanguageDefinition` instance. /// /// # Examples /// /// ``` /// use smolguess::core::LanguageDefinition; /// /// let definition = LanguageDefinition::from(("Rust", "rs")); /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` fn from((name, extension): (T, T)) -> Self { LanguageDefinition::new(name, extension) } } /// Represents a pairing of a file and its identified programming language. /// /// The `LanguageMatch` struct is used to associate a `PathBuf` representing a file /// with its corresponding `LanguageDefinition`. This pairing provides information about /// a file and its identified programming language. /// /// # Fields /// /// - `file`: The path to the identified file. /// - `language`: The identified programming language associated with the file. /// /// # Examples /// /// ``` /// use std::path::PathBuf; /// use smolguess::core::{LanguageDefinition, LanguageMatch}; /// /// let file_path = PathBuf::from("example.rs"); /// let language = LanguageDefinition::new("Rust", "rs"); /// /// let language_match = LanguageMatch { file: file_path.clone(), language }; /// /// assert_eq!(language_match.file, file_path); /// assert_eq!(language_match.language.name, "Rust"); /// assert_eq!(language_match.language.extension, "rs"); /// ``` #[derive(Debug, Serialize, Clone)] pub struct LanguageMatch { pub file: PathBuf, pub language: LanguageDefinition, }