feat: add repo language grouping and counting
Diff
Cargo.lock | 16 ++++++++-
Cargo.toml | 1 +-
src/core/mod.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
src/main.rs | 41 +++++++++++----------
4 files changed, 143 insertions(+), 25 deletions(-)
@@ -19,6 +19,12 @@ dependencies = [
]
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -53,6 +59,15 @@ dependencies = [
]
[[package]]
name = "itertools"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
dependencies = [
"either",
]
[[package]]
name = "jobserver"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -148,6 +163,7 @@ name = "smolguess"
version = "0.1.0"
dependencies = [
"git2",
"itertools",
]
[[package]]
@@ -7,3 +7,4 @@ edition = "2021"
[dependencies]
git2 = "0.18.1"
itertools = "0.12.0"
@@ -75,6 +75,72 @@ impl LanguageDefinitions {
.cloned()
}
pub fn identify_files(self, files: Vec<PathBuf>) -> Vec<LanguageMatch> {
let mut matches: Vec<LanguageMatch> = Vec::new();
for file in files {
matches.push(LanguageMatch {
file: file.clone(),
language: self
.identify_file(file.clone())
.unwrap_or(LanguageDefinition {
name: String::from("Unknown"),
extension: file
.extension()
.unwrap_or(file.clone().file_name().unwrap_or_default())
.to_string_lossy()
.to_string()
.to_lowercase(),
}),
});
}
matches.sort_by(|first, second| first.language.name.cmp(&second.language.name));
matches
}
@@ -251,17 +317,17 @@ impl LanguageDefinition {
pub fn new(name: &str, extension: &str) -> Self {
pub fn new(name: impl Into<String>, extension: impl Into<String>) -> Self {
LanguageDefinition {
name: name.to_string(),
extension: extension.to_string(),
name: name.into(),
extension: extension.into(),
}
}
}
impl<T> From<(T, T)> for LanguageDefinition
where
T: AsRef<str>,
T: Into<String>,
{
@@ -284,7 +350,39 @@ where
fn from(value: (T, T)) -> Self {
LanguageDefinition::new(value.0.as_ref(), value.1.as_ref())
fn from((name, extension): (T, T)) -> Self {
LanguageDefinition::new(name, extension)
}
}
#[derive(Debug)]
pub struct LanguageMatch {
pub file: PathBuf,
pub language: LanguageDefinition,
}
@@ -1,7 +1,8 @@
use std::{env, path::PathBuf};
use std::{collections::HashMap, env, path::PathBuf};
use itertools::Itertools;
use smolguess::{
core::{LanguageDefinition, LanguageDefinitions},
core::{LanguageDefinitions, LanguageMatch},
repository,
};
@@ -12,24 +13,26 @@ fn main() {
std::process::exit(1);
}
let files = repository::get_bare_repository_files(PathBuf::from(&args[1]));
let repo_languages: Vec<LanguageMatch> = LanguageDefinitions::default()
.load_builtins()
.identify_files(repository::get_bare_repository_files(PathBuf::from(
&args[1],
)));
println!("{:?}", files);
let definitions = LanguageDefinitions::default().load_builtins();
let grouped_matches: HashMap<_, _> = repo_languages
.into_iter()
.group_by(|matched| matched.language.name.clone())
.into_iter()
.map(|(group, items)| (group, items.collect_vec()))
.collect();
for file in files {
println!(
"{:?}",
definitions.identify_file(file.clone()).unwrap_or_else(|| {
let extension = file
.extension()
.unwrap_or(file.file_name().unwrap_or_default())
.to_string_lossy()
.to_string()
.to_lowercase();
let mut sorted_groups: Vec<_> = grouped_matches.into_iter().collect();
sorted_groups.sort_by(|(_, first), (_, second)| second.len().cmp(&first.len()));
LanguageDefinition::new("Unknown", &extension)
})
);
}
println!("{:#?}", sorted_groups);
println!();
sorted_groups
.iter()
.for_each(|item| println!("{}: {}", item.0, item.1.len()))
}