converted to rust

This commit is contained in:
Charlie Crossley
2025-08-20 14:04:17 +01:00
parent cac0876769
commit 2c80cc135b
2 changed files with 283 additions and 0 deletions

272
src/main.rs Normal file
View File

@@ -0,0 +1,272 @@
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use clap::Parser;
use atty::Stream;
use std::path::Path;
use fuzzy_matcher::skim::SkimMatcherV2;
use fuzzy_matcher::FuzzyMatcher;
use ansi_term::Style;
use ansi_term::Colour::Red;
use regex::RegexBuilder;
use std::collections::HashSet;
#[derive(Parser, Debug)]
#[command(author, version, about = "Context-aware markdown file search")]
struct Args {
#[arg(required = true, help = "One or more keywords to search for in the markdown text.")]
keywords: Vec<String>,
#[arg(long, value_name = "FILE", value_hint = clap::ValueHint::FilePath, help = "Optional input file path. Reads from stdin if omitted.")]
file: Option<String>,
#[arg(long, help = "Enable matching all keywords instead of any.")]
all: bool,
#[arg(long, help = "Enable fuzzy matching to allow approximate keyword matches.")]
fuzzy: bool,
}
fn open_input(file: &Option<String>) -> Box<dyn BufRead> {
if let Some(f) = file {
match File::open(f) {
Ok(file) => Box::new(BufReader::new(file)),
Err(e) => {
eprintln!("Failed to open file '{}': {}", f, e);
std::process::exit(1);
}
}
} else if !atty::is(Stream::Stdin) {
Box::new(BufReader::new(io::stdin()))
} else {
eprintln!("Error: No input file provided and no piped stdin detected.");
std::process::exit(1);
}
}
fn lines_from_reader<R: BufRead>(reader: R) -> Vec<String> {
reader
.lines()
.filter_map(|line| {
match line {
Ok(text) => Some(text),
Err(e) => {
eprintln!("Error reading line: {}", e);
None
}
}
})
.collect()
}
#[derive(Debug, Clone)]
struct IndexedMDLine {
text: String, // Line contents
index: usize, // Line number
level: i32, // Heirarchy Level
}
fn markdown_level(line: &str) -> i32 {
let trimmed = line.trim_start();
// Headings (1-10)
if trimmed.starts_with('#') {
let level = trimmed.chars().take_while(|&c| c == '#').count();
if level >= 1 && level <= 10 {
return level as i32;
}
}
// Lists (100+)
let indent = line.len() - trimmed.len();
if trimmed.starts_with("- ")
|| trimmed.starts_with("* ")
|| trimmed.starts_with("+ ")
|| trimmed.chars().next().map(|c| c.is_digit(10)).unwrap_or(false)
&& trimmed.contains(". ")
{
return 100 + (indent / 2) as i32;
}
// Body text
if !trimmed.is_empty() {
return 100;
}
-1
}
fn lines_to_imdl(lines: &Vec<String>) -> Vec<IndexedMDLine> {
lines
.into_iter()
.enumerate()
.map(|(i, line)| IndexedMDLine {
text: line.clone(),
index: i,
level: markdown_level(&line),
})
.collect()
}
#[derive(Debug, Clone)]
struct FullyQualifiedMDLine {
text: String,
index: Vec<usize>,
}
fn imdls_to_fqmdls(imdls: &Vec<IndexedMDLine>) -> Vec<FullyQualifiedMDLine> {
let mut stack: Vec<(i32, usize)> = Vec::new();
let mut fqmdls = Vec::new();
for line in imdls {
if line.text.trim().is_empty() {
continue;
}
while let Some(&(prev_level, _)) = stack.last() {
if prev_level < line.level {
break;
}
stack.pop();
}
stack.push((line.level, line.index));
let fqindex = stack.iter().map(|&(_, i)| i).collect();
fqmdls.push(FullyQualifiedMDLine {
text: line.text.clone(),
index: fqindex,
});
}
fqmdls
}
fn imdl_search(imdls: &[IndexedMDLine], kwords: &[String], match_all: bool, fuzzy: bool) -> Vec<usize> {
let matcher = SkimMatcherV2::default();
imdls
.iter()
.filter(|line| {
let text = &line.text;
if fuzzy {
if match_all {
kwords.iter().all(|term| matcher.fuzzy_match(text, term).is_some())
} else {
kwords.iter().any(|term| matcher.fuzzy_match(text, term).is_some())
}
} else {
let text_lower = text.to_lowercase();
if match_all {
kwords.iter().all(|term| text_lower.contains(&term.to_lowercase()))
} else {
kwords.iter().any(|term| text_lower.contains(&term.to_lowercase()))
}
}
})
.map(|line| line.index)
.collect()
}
fn associated_indices(fqmdls: &[FullyQualifiedMDLine], matches: &[usize]) -> Vec<usize> {
let matched: HashSet<usize> = matches.iter().copied().collect();
let mut result: HashSet<usize> = matched.clone();
for line in fqmdls {
if line.index.iter().any(|i| matched.contains(i)) {
result.extend(&line.index);
}
}
let mut result_vec: Vec<usize> = result.into_iter().collect();
result_vec.sort_unstable();
result_vec
}
fn filter_imdls(imdls: &[IndexedMDLine], matches: &[usize]) -> Vec<IndexedMDLine> {
let index_set: HashSet<usize> = matches.iter().copied().collect();
imdls
.iter()
.filter(|line| index_set.contains(&line.index))
.cloned()
.collect()
}
fn highlight_terms(text: &str, terms: &[String], fuzzy: bool) -> String {
if fuzzy {
highlight_fuzzy(text, terms)
} else {
highlight_exact(text, terms)
}
}
fn highlight_exact(text: &str, terms: &[String]) -> String {
let mut highlighted = text.to_string();
for term in terms {
let pattern = regex::escape(term);
let re = RegexBuilder::new(&pattern)
.case_insensitive(true)
.build()
.unwrap();
highlighted = re
.replace_all(&highlighted, |caps: &regex::Captures| {
Style::new().bold().fg(Red).paint(&caps[0]).to_string()
})
.to_string();
}
highlighted
}
fn highlight_fuzzy(text: &str, terms: &[String]) -> String {
let matcher = SkimMatcherV2::default();
let mut matched_indices = vec![false; text.len()];
for term in terms {
if let Some((_score, indices)) = matcher.fuzzy_indices(text, term) {
for i in indices {
if i < matched_indices.len() {
matched_indices[i] = true;
}
}
}
}
// Apply highlighting to matched characters
let mut result = String::new();
let mut chars = text.chars().enumerate().peekable();
while let Some((i, c)) = chars.next() {
if matched_indices[i] {
result.push_str(&Style::new().bold().fg(Red).paint(c.to_string()).to_string());
} else {
result.push(c);
}
}
result
}
fn main() {
let args = Args::parse();
let reader = open_input(&args.file);
// All lines
let lines = lines_from_reader(reader);
// Lines as struct with index and level
let imdls = lines_to_imdl(&lines);
// Lines with index lists as fully qualified heirarchy
let fqmdls = imdls_to_fqmdls(&imdls);
// for line in &fqmdls {
// println!("{:?}", line);
// }
// Line indexes containing keyword matching
let matches = imdl_search(&imdls, &args.keywords, args.all, args.fuzzy);
// println!("Matches: {:?}", matches);
// Line indexes that share a heirarchy with matching lines
let associated_matches = associated_indices(&fqmdls, &matches);
// println!("Associated Matches: {:?}", associated_matches);
// The lines from the associated matching indices
let matching_imdls = filter_imdls(&imdls, &associated_matches);
for line in matching_imdls {
let highlighted = highlight_terms(&line.text, &args.keywords, args.fuzzy);
println!("{}", highlighted);
}
}