From a00fc5493ef4b504ed1c87b3ffae60907cc9e16e Mon Sep 17 00:00:00 2001 From: holly sparkles Date: Mon, 8 Jan 2024 14:16:39 +0100 Subject: [PATCH] feat: add repo language grouping and counting --- Cargo.lock | 16 ++++++++++++++++ Cargo.toml | 1 + src/core/mod.rs | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ src/main.rs | 41 ++++++++++++++++++++++------------------- 4 files changed, 143 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44b4fd4..a0adbe7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,6 +19,12 @@ dependencies = [ ] [[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] name = "form_urlencoded" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -53,6 +59,15 @@ dependencies = [ ] [[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + +[[package]] name = "jobserver" version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -148,6 +163,7 @@ name = "smolguess" version = "0.1.0" dependencies = [ "git2", + "itertools", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index a64a203..a89c501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" [dependencies] git2 = "0.18.1" +itertools = "0.12.0" diff --git a/src/core/mod.rs b/src/core/mod.rs index 0b23564..694447b 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -75,6 +75,72 @@ impl LanguageDefinitions { .cloned() } + /// Identifies the programming languages of a list of files based on their extensions. + /// + /// The `identify_files` function takes a vector of `PathBuf` representing files and + /// identifies the programming languages for each file based on their extensions. It returns + /// a vector of `LanguageMatch` instances containing information about each identified file + /// and its corresponding programming language. + /// + /// # Parameters + /// + /// - `files`: A vector of `PathBuf` representing the paths to the files to be identified. + /// + /// # Returns + /// + /// A vector of `LanguageMatch` instances, each containing information about an identified file + /// and its corresponding programming language. + /// + /// # Examples + /// + /// ``` + /// use std::path::PathBuf; + /// use smolguess::core::{LanguageDefinition, LanguageDefinitions, LanguageMatch}; + /// + /// let languages = LanguageDefinitions::default() + /// .insert(("Rust", "rs")) + /// .insert(("Markdown", "md")); + /// + /// let file_paths = vec![ + /// PathBuf::from("example.rs"), + /// PathBuf::from("README.md"), + /// PathBuf::from("unknown_file.txt"), + /// ]; + /// + /// let identified_files = languages.identify_files(file_paths); + /// + /// let test = identified_files + /// .iter() + /// .find(|e| e.language.name.eq(&String::from("Rust"))) + /// .unwrap(); + /// + /// assert_eq!(identified_files.len(), 3); + /// assert_eq!(test.language.name, "Rust"); + /// ``` + pub fn identify_files(self, files: Vec) -> Vec { + let mut matches: Vec = Vec::new(); + + for file in files { + matches.push(LanguageMatch { + file: file.clone(), + language: self + .identify_file(file.clone()) + .unwrap_or(LanguageDefinition { + name: String::from("Unknown"), + extension: file + .extension() + .unwrap_or(file.clone().file_name().unwrap_or_default()) + .to_string_lossy() + .to_string() + .to_lowercase(), + }), + }); + } + + matches.sort_by(|first, second| first.language.name.cmp(&second.language.name)); + matches + } + /// Insert a language definition into the collection. /// /// This method takes a generic parameter `T` that can be converted into a `LanguageDefinition`. @@ -251,17 +317,17 @@ impl LanguageDefinition { /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` - pub fn new(name: &str, extension: &str) -> Self { + pub fn new(name: impl Into, extension: impl Into) -> Self { LanguageDefinition { - name: name.to_string(), - extension: extension.to_string(), + name: name.into(), + extension: extension.into(), } } } impl From<(T, T)> for LanguageDefinition where - T: AsRef, + T: Into, { /// Converts a tuple of two values into a `LanguageDefinition` instance. /// @@ -284,7 +350,39 @@ where /// /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs")); /// ``` - fn from(value: (T, T)) -> Self { - LanguageDefinition::new(value.0.as_ref(), value.1.as_ref()) + fn from((name, extension): (T, T)) -> Self { + LanguageDefinition::new(name, extension) } } + +/// Represents a pairing of a file and its identified programming language. +/// +/// The `LanguageMatch` struct is used to associate a `PathBuf` representing a file +/// with its corresponding `LanguageDefinition`. This pairing provides information about +/// a file and its identified programming language. +/// +/// # Fields +/// +/// - `file`: The path to the identified file. +/// - `language`: The identified programming language associated with the file. +/// +/// # Examples +/// +/// ``` +/// use std::path::PathBuf; +/// use smolguess::core::{LanguageDefinition, LanguageMatch}; +/// +/// let file_path = PathBuf::from("example.rs"); +/// let language = LanguageDefinition::new("Rust", "rs"); +/// +/// let language_match = LanguageMatch { file: file_path.clone(), language }; +/// +/// assert_eq!(language_match.file, file_path); +/// assert_eq!(language_match.language.name, "Rust"); +/// assert_eq!(language_match.language.extension, "rs"); +/// ``` +#[derive(Debug)] +pub struct LanguageMatch { + pub file: PathBuf, + pub language: LanguageDefinition, +} diff --git a/src/main.rs b/src/main.rs index ad51318..6b41103 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ -use std::{env, path::PathBuf}; +use std::{collections::HashMap, env, path::PathBuf}; +use itertools::Itertools; use smolguess::{ - core::{LanguageDefinition, LanguageDefinitions}, + core::{LanguageDefinitions, LanguageMatch}, repository, }; @@ -12,24 +13,26 @@ fn main() { std::process::exit(1); } - let files = repository::get_bare_repository_files(PathBuf::from(&args[1])); + let repo_languages: Vec = LanguageDefinitions::default() + .load_builtins() + .identify_files(repository::get_bare_repository_files(PathBuf::from( + &args[1], + ))); - println!("{:?}", files); - let definitions = LanguageDefinitions::default().load_builtins(); + let grouped_matches: HashMap<_, _> = repo_languages + .into_iter() + .group_by(|matched| matched.language.name.clone()) + .into_iter() + .map(|(group, items)| (group, items.collect_vec())) + .collect(); - for file in files { - println!( - "{:?}", - definitions.identify_file(file.clone()).unwrap_or_else(|| { - let extension = file - .extension() - .unwrap_or(file.file_name().unwrap_or_default()) - .to_string_lossy() - .to_string() - .to_lowercase(); + let mut sorted_groups: Vec<_> = grouped_matches.into_iter().collect(); + sorted_groups.sort_by(|(_, first), (_, second)| second.len().cmp(&first.len())); - LanguageDefinition::new("Unknown", &extension) - }) - ); - } + println!("{:#?}", sorted_groups); + println!(); + + sorted_groups + .iter() + .for_each(|item| println!("{}: {}", item.0, item.1.len())) } -- libgit2 1.7.2