index : smol-guess.git

ascending towards madness

author holly sparkles <sparkles@holly.sh> 2024-01-08 13:16:39.0 +00:00:00
committer holly sparkles <sparkles@holly.sh> 2024-01-08 14:21:02.0 +00:00:00
commit
a00fc5493ef4b504ed1c87b3ffae60907cc9e16e [patch]
tree
7fe5404fa9f7c7e7b93f3c14567408cadd101e9b
parent
b92cb4e3db802e045aa019c29b7f80088a632193
download
a00fc5493ef4b504ed1c87b3ffae60907cc9e16e.tar.gz

feat: add repo language grouping and counting



Diff

 Cargo.lock      |  16 ++++++++-
 Cargo.toml      |   1 +-
 src/core/mod.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 src/main.rs     |  41 +++++++++++----------
 4 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 44b4fd4..a0adbe7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -19,6 +19,12 @@ dependencies = [
]

[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"

[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -53,6 +59,15 @@ dependencies = [
]

[[package]]
name = "itertools"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
dependencies = [
 "either",
]

[[package]]
name = "jobserver"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -148,6 +163,7 @@ name = "smolguess"
version = "0.1.0"
dependencies = [
 "git2",
 "itertools",
]

[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index a64a203..a89c501 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,3 +7,4 @@ edition = "2021"

[dependencies]
git2 = "0.18.1"
itertools = "0.12.0"
diff --git a/src/core/mod.rs b/src/core/mod.rs
index 0b23564..694447b 100644
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -75,6 +75,72 @@ impl LanguageDefinitions {
            .cloned()
    }

    /// Identifies the programming languages of a list of files based on their extensions.
    ///
    /// The `identify_files` function takes a vector of `PathBuf` representing files and
    /// identifies the programming languages for each file based on their extensions. It returns
    /// a vector of `LanguageMatch` instances containing information about each identified file
    /// and its corresponding programming language.
    ///
    /// # Parameters
    ///
    /// - `files`: A vector of `PathBuf` representing the paths to the files to be identified.
    ///
    /// # Returns
    ///
    /// A vector of `LanguageMatch` instances, each containing information about an identified file
    /// and its corresponding programming language.
    ///
    /// # Examples
    ///
    /// ```
    /// use std::path::PathBuf;
    /// use smolguess::core::{LanguageDefinition, LanguageDefinitions, LanguageMatch};
    ///
    /// let languages = LanguageDefinitions::default()
    ///     .insert(("Rust", "rs"))
    ///     .insert(("Markdown", "md"));
    ///
    /// let file_paths = vec![
    ///     PathBuf::from("example.rs"),
    ///     PathBuf::from("README.md"),
    ///     PathBuf::from("unknown_file.txt"),
    /// ];
    ///
    /// let identified_files = languages.identify_files(file_paths);
    ///
    /// let test = identified_files
    /// 	.iter()
    /// 	.find(|e| e.language.name.eq(&String::from("Rust")))
    /// 	.unwrap();
    ///
    /// assert_eq!(identified_files.len(), 3);
    /// assert_eq!(test.language.name, "Rust");
    /// ```
    pub fn identify_files(self, files: Vec<PathBuf>) -> Vec<LanguageMatch> {
        let mut matches: Vec<LanguageMatch> = Vec::new();

        for file in files {
            matches.push(LanguageMatch {
                file: file.clone(),
                language: self
                    .identify_file(file.clone())
                    .unwrap_or(LanguageDefinition {
                        name: String::from("Unknown"),
                        extension: file
                            .extension()
                            .unwrap_or(file.clone().file_name().unwrap_or_default())
                            .to_string_lossy()
                            .to_string()
                            .to_lowercase(),
                    }),
            });
        }

        matches.sort_by(|first, second| first.language.name.cmp(&second.language.name));
        matches
    }

    /// Insert a language definition into the collection.
    ///
    /// This method takes a generic parameter `T` that can be converted into a `LanguageDefinition`.
@@ -251,17 +317,17 @@ impl LanguageDefinition {
    ///
    /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
    /// ```
    pub fn new(name: &str, extension: &str) -> Self {
    pub fn new(name: impl Into<String>, extension: impl Into<String>) -> Self {
        LanguageDefinition {
            name: name.to_string(),
            extension: extension.to_string(),
            name: name.into(),
            extension: extension.into(),
        }
    }
}

impl<T> From<(T, T)> for LanguageDefinition
where
    T: AsRef<str>,
    T: Into<String>,
{
    /// Converts a tuple of two values into a `LanguageDefinition` instance.
    ///
@@ -284,7 +350,39 @@ where
    ///
    /// assert_eq!((definition.name.as_str(), definition.extension.as_str()), ("Rust", "rs"));
    /// ```
    fn from(value: (T, T)) -> Self {
        LanguageDefinition::new(value.0.as_ref(), value.1.as_ref())
    fn from((name, extension): (T, T)) -> Self {
        LanguageDefinition::new(name, extension)
    }
}

/// Represents a pairing of a file and its identified programming language.
///
/// The `LanguageMatch` struct is used to associate a `PathBuf` representing a file
/// with its corresponding `LanguageDefinition`. This pairing provides information about
/// a file and its identified programming language.
///
/// # Fields
///
/// - `file`: The path to the identified file.
/// - `language`: The identified programming language associated with the file.
///
/// # Examples
///
/// ```
/// use std::path::PathBuf;
/// use smolguess::core::{LanguageDefinition, LanguageMatch};
///
/// let file_path = PathBuf::from("example.rs");
/// let language = LanguageDefinition::new("Rust", "rs");
///
/// let language_match = LanguageMatch { file: file_path.clone(), language };
///
/// assert_eq!(language_match.file, file_path);
/// assert_eq!(language_match.language.name, "Rust");
/// assert_eq!(language_match.language.extension, "rs");
/// ```
#[derive(Debug)]
pub struct LanguageMatch {
    pub file: PathBuf,
    pub language: LanguageDefinition,
}
diff --git a/src/main.rs b/src/main.rs
index ad51318..6b41103 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,8 @@
use std::{env, path::PathBuf};
use std::{collections::HashMap, env, path::PathBuf};

use itertools::Itertools;
use smolguess::{
    core::{LanguageDefinition, LanguageDefinitions},
    core::{LanguageDefinitions, LanguageMatch},
    repository,
};

@@ -12,24 +13,26 @@ fn main() {
        std::process::exit(1);
    }

    let files = repository::get_bare_repository_files(PathBuf::from(&args[1]));
    let repo_languages: Vec<LanguageMatch> = LanguageDefinitions::default()
        .load_builtins()
        .identify_files(repository::get_bare_repository_files(PathBuf::from(
            &args[1],
        )));

    println!("{:?}", files);
    let definitions = LanguageDefinitions::default().load_builtins();
    let grouped_matches: HashMap<_, _> = repo_languages
        .into_iter()
        .group_by(|matched| matched.language.name.clone())
        .into_iter()
        .map(|(group, items)| (group, items.collect_vec()))
        .collect();

    for file in files {
        println!(
            "{:?}",
            definitions.identify_file(file.clone()).unwrap_or_else(|| {
                let extension = file
                    .extension()
                    .unwrap_or(file.file_name().unwrap_or_default())
                    .to_string_lossy()
                    .to_string()
                    .to_lowercase();
    let mut sorted_groups: Vec<_> = grouped_matches.into_iter().collect();
    sorted_groups.sort_by(|(_, first), (_, second)| second.len().cmp(&first.len()));

                LanguageDefinition::new("Unknown", &extension)
            })
        );
    }
    println!("{:#?}", sorted_groups);
    println!();

    sorted_groups
        .iter()
        .for_each(|item| println!("{}: {}", item.0, item.1.len()))
}