diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0f58f87 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "mdq" +version = "0.1.0" +edition = "2024" + +[dependencies] +ansi_term = "0.12.1" +atty = "0.2.14" +clap = { version = "4.5.45", features = ["derive"] } +fuzzy-matcher = "0.3.7" +regex = "1.11.1" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..998f937 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,272 @@ +use std::fs::File; +use std::io::{self, BufRead, BufReader}; +use clap::Parser; +use atty::Stream; +use std::path::Path; +use fuzzy_matcher::skim::SkimMatcherV2; +use fuzzy_matcher::FuzzyMatcher; +use ansi_term::Style; +use ansi_term::Colour::Red; +use regex::RegexBuilder; +use std::collections::HashSet; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Context-aware markdown file search")] +struct Args { + #[arg(required = true, help = "One or more keywords to search for in the markdown text.")] + keywords: Vec, + #[arg(long, value_name = "FILE", value_hint = clap::ValueHint::FilePath, help = "Optional input file path. Reads from stdin if omitted.")] + file: Option, + #[arg(long, help = "Enable matching all keywords instead of any.")] + all: bool, + #[arg(long, help = "Enable fuzzy matching to allow approximate keyword matches.")] + fuzzy: bool, +} + +fn open_input(file: &Option) -> Box { + if let Some(f) = file { + match File::open(f) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + eprintln!("Failed to open file '{}': {}", f, e); + std::process::exit(1); + } + } + } else if !atty::is(Stream::Stdin) { + Box::new(BufReader::new(io::stdin())) + } else { + eprintln!("Error: No input file provided and no piped stdin detected."); + std::process::exit(1); + } +} + +fn lines_from_reader(reader: R) -> Vec { + reader + .lines() + .filter_map(|line| { + match line { + Ok(text) => Some(text), + Err(e) => { + eprintln!("Error reading line: {}", e); + None + } + } + }) + .collect() +} + +#[derive(Debug, Clone)] +struct IndexedMDLine { + text: String, // Line contents + index: usize, // Line number + level: i32, // Heirarchy Level +} + +fn markdown_level(line: &str) -> i32 { + let trimmed = line.trim_start(); + + // Headings (1-10) + if trimmed.starts_with('#') { + let level = trimmed.chars().take_while(|&c| c == '#').count(); + if level >= 1 && level <= 10 { + return level as i32; + } + } + + // Lists (100+) + let indent = line.len() - trimmed.len(); + if trimmed.starts_with("- ") + || trimmed.starts_with("* ") + || trimmed.starts_with("+ ") + || trimmed.chars().next().map(|c| c.is_digit(10)).unwrap_or(false) + && trimmed.contains(". ") + { + return 100 + (indent / 2) as i32; + } + + // Body text + if !trimmed.is_empty() { + return 100; + } + + -1 +} + +fn lines_to_imdl(lines: &Vec) -> Vec { + lines + .into_iter() + .enumerate() + .map(|(i, line)| IndexedMDLine { + text: line.clone(), + index: i, + level: markdown_level(&line), + }) + .collect() +} + +#[derive(Debug, Clone)] +struct FullyQualifiedMDLine { + text: String, + index: Vec, +} + +fn imdls_to_fqmdls(imdls: &Vec) -> Vec { + let mut stack: Vec<(i32, usize)> = Vec::new(); + let mut fqmdls = Vec::new(); + + for line in imdls { + if line.text.trim().is_empty() { + continue; + } + + while let Some(&(prev_level, _)) = stack.last() { + if prev_level < line.level { + break; + } + stack.pop(); + } + + stack.push((line.level, line.index)); + let fqindex = stack.iter().map(|&(_, i)| i).collect(); + + fqmdls.push(FullyQualifiedMDLine { + text: line.text.clone(), + index: fqindex, + }); + } + fqmdls +} + +fn imdl_search(imdls: &[IndexedMDLine], kwords: &[String], match_all: bool, fuzzy: bool) -> Vec { + let matcher = SkimMatcherV2::default(); + imdls + .iter() + .filter(|line| { + let text = &line.text; + + if fuzzy { + if match_all { + kwords.iter().all(|term| matcher.fuzzy_match(text, term).is_some()) + } else { + kwords.iter().any(|term| matcher.fuzzy_match(text, term).is_some()) + } + } else { + let text_lower = text.to_lowercase(); + if match_all { + kwords.iter().all(|term| text_lower.contains(&term.to_lowercase())) + } else { + kwords.iter().any(|term| text_lower.contains(&term.to_lowercase())) + } + } + }) + .map(|line| line.index) + .collect() + +} + +fn associated_indices(fqmdls: &[FullyQualifiedMDLine], matches: &[usize]) -> Vec { + let matched: HashSet = matches.iter().copied().collect(); + let mut result: HashSet = matched.clone(); + + for line in fqmdls { + if line.index.iter().any(|i| matched.contains(i)) { + result.extend(&line.index); + } + } + + let mut result_vec: Vec = result.into_iter().collect(); + result_vec.sort_unstable(); + result_vec +} + +fn filter_imdls(imdls: &[IndexedMDLine], matches: &[usize]) -> Vec { + let index_set: HashSet = matches.iter().copied().collect(); + + imdls + .iter() + .filter(|line| index_set.contains(&line.index)) + .cloned() + .collect() +} + +fn highlight_terms(text: &str, terms: &[String], fuzzy: bool) -> String { + if fuzzy { + highlight_fuzzy(text, terms) + } else { + highlight_exact(text, terms) + } +} + +fn highlight_exact(text: &str, terms: &[String]) -> String { + let mut highlighted = text.to_string(); + for term in terms { + let pattern = regex::escape(term); + let re = RegexBuilder::new(&pattern) + .case_insensitive(true) + .build() + .unwrap(); + highlighted = re + .replace_all(&highlighted, |caps: ®ex::Captures| { + Style::new().bold().fg(Red).paint(&caps[0]).to_string() + }) + .to_string(); + } + highlighted +} + +fn highlight_fuzzy(text: &str, terms: &[String]) -> String { + let matcher = SkimMatcherV2::default(); + let mut matched_indices = vec![false; text.len()]; + + for term in terms { + if let Some((_score, indices)) = matcher.fuzzy_indices(text, term) { + for i in indices { + if i < matched_indices.len() { + matched_indices[i] = true; + } + } + } + } + + // Apply highlighting to matched characters + let mut result = String::new(); + let mut chars = text.chars().enumerate().peekable(); + + while let Some((i, c)) = chars.next() { + if matched_indices[i] { + result.push_str(&Style::new().bold().fg(Red).paint(c.to_string()).to_string()); + } else { + result.push(c); + } + } + + result +} + +fn main() { + let args = Args::parse(); + + let reader = open_input(&args.file); + // All lines + let lines = lines_from_reader(reader); + // Lines as struct with index and level + let imdls = lines_to_imdl(&lines); + // Lines with index lists as fully qualified heirarchy + let fqmdls = imdls_to_fqmdls(&imdls); + // for line in &fqmdls { + // println!("{:?}", line); + // } + // Line indexes containing keyword matching + let matches = imdl_search(&imdls, &args.keywords, args.all, args.fuzzy); + // println!("Matches: {:?}", matches); + // Line indexes that share a heirarchy with matching lines + let associated_matches = associated_indices(&fqmdls, &matches); + // println!("Associated Matches: {:?}", associated_matches); + // The lines from the associated matching indices + let matching_imdls = filter_imdls(&imdls, &associated_matches); + for line in matching_imdls { + let highlighted = highlight_terms(&line.text, &args.keywords, args.fuzzy); + println!("{}", highlighted); + } + +}