Compare commits

...

3 Commits

Author SHA1 Message Date
Charlie Crossley
3793c69cec updated README 2025-08-20 14:07:12 +01:00
Charlie Crossley
5353b0e27f updated README 2025-08-20 14:05:42 +01:00
Charlie Crossley
2c80cc135b converted to rust 2025-08-20 14:04:17 +01:00
3 changed files with 297 additions and 0 deletions

11
Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "mdq"
version = "0.1.0"
edition = "2024"
[dependencies]
ansi_term = "0.12.1"
atty = "0.2.14"
clap = { version = "4.5.45", features = ["derive"] }
fuzzy-matcher = "0.3.7"
regex = "1.11.1"

View File

@@ -8,6 +8,8 @@ Features
- Highlight results: Search results are highlighted for better visibility using ANSI color codes.
- Contextual results: Display surrounding context around matches, such as parent headings or list items.
- Support for multiple search terms: Search for more than one term at once.
<<<<<<< HEAD
=======
- No dependencies: `mdq` is built using Python's standard libraries, so its easy to set up and use.
Installation
@@ -58,6 +60,7 @@ sudo pip install . --break-system-packages
```
Note: This method is not recommended, as it may interfere with system-managed Python packages.
>>>>>>> 38b7a94f22720c5ce085c217edbc49a93f326c9c
Usage
@@ -66,7 +69,11 @@ Once installed, you can use the `mdq` command to search through markdown files.
Basic Search
```bash
<<<<<<< HEAD
mdq "search term" --file /path/to/your/markdown/files/*.md
=======
cat /path/to/your/markdown/files/*.md | mdq search_term
>>>>>>> 38b7a94f22720c5ce085c217edbc49a93f326c9c
```
This will search for the term `"search_term"` in all `.md` files in the specified directory.
@@ -74,7 +81,11 @@ This will search for the term `"search_term"` in all `.md` files in the specifie
Multiple Search Terms
```bash
<<<<<<< HEAD
mdq "term1" "term2" --file /path/to/your/markdown/files/*.md
=======
cat /path/to/your/markdown/files/*.md | mdq search_term_1 search_term_2
>>>>>>> 38b7a94f22720c5ce085c217edbc49a93f326c9c
```
This will search for both `"search_term_1"` and `"search_term_2"` in the specified markdown files and show the results for both.
@@ -83,6 +94,8 @@ Search with Context
By default, the results will show the keyword matches with surrounding context (headings and list items). You can adjust the context size and highlighting options through various flags.
<<<<<<< HEAD
=======
Flags
- `--raw`: Output results without any highlighting (for raw output).
@@ -98,6 +111,7 @@ Flags
```bash
mdq --help
```
>>>>>>> 38b7a94f22720c5ce085c217edbc49a93f326c9c
Example

272
src/main.rs Normal file
View File

@@ -0,0 +1,272 @@
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use clap::Parser;
use atty::Stream;
use std::path::Path;
use fuzzy_matcher::skim::SkimMatcherV2;
use fuzzy_matcher::FuzzyMatcher;
use ansi_term::Style;
use ansi_term::Colour::Red;
use regex::RegexBuilder;
use std::collections::HashSet;
#[derive(Parser, Debug)]
#[command(author, version, about = "Context-aware markdown file search")]
struct Args {
#[arg(required = true, help = "One or more keywords to search for in the markdown text.")]
keywords: Vec<String>,
#[arg(long, value_name = "FILE", value_hint = clap::ValueHint::FilePath, help = "Optional input file path. Reads from stdin if omitted.")]
file: Option<String>,
#[arg(long, help = "Enable matching all keywords instead of any.")]
all: bool,
#[arg(long, help = "Enable fuzzy matching to allow approximate keyword matches.")]
fuzzy: bool,
}
fn open_input(file: &Option<String>) -> Box<dyn BufRead> {
if let Some(f) = file {
match File::open(f) {
Ok(file) => Box::new(BufReader::new(file)),
Err(e) => {
eprintln!("Failed to open file '{}': {}", f, e);
std::process::exit(1);
}
}
} else if !atty::is(Stream::Stdin) {
Box::new(BufReader::new(io::stdin()))
} else {
eprintln!("Error: No input file provided and no piped stdin detected.");
std::process::exit(1);
}
}
fn lines_from_reader<R: BufRead>(reader: R) -> Vec<String> {
reader
.lines()
.filter_map(|line| {
match line {
Ok(text) => Some(text),
Err(e) => {
eprintln!("Error reading line: {}", e);
None
}
}
})
.collect()
}
#[derive(Debug, Clone)]
struct IndexedMDLine {
text: String, // Line contents
index: usize, // Line number
level: i32, // Heirarchy Level
}
fn markdown_level(line: &str) -> i32 {
let trimmed = line.trim_start();
// Headings (1-10)
if trimmed.starts_with('#') {
let level = trimmed.chars().take_while(|&c| c == '#').count();
if level >= 1 && level <= 10 {
return level as i32;
}
}
// Lists (100+)
let indent = line.len() - trimmed.len();
if trimmed.starts_with("- ")
|| trimmed.starts_with("* ")
|| trimmed.starts_with("+ ")
|| trimmed.chars().next().map(|c| c.is_digit(10)).unwrap_or(false)
&& trimmed.contains(". ")
{
return 100 + (indent / 2) as i32;
}
// Body text
if !trimmed.is_empty() {
return 100;
}
-1
}
fn lines_to_imdl(lines: &Vec<String>) -> Vec<IndexedMDLine> {
lines
.into_iter()
.enumerate()
.map(|(i, line)| IndexedMDLine {
text: line.clone(),
index: i,
level: markdown_level(&line),
})
.collect()
}
#[derive(Debug, Clone)]
struct FullyQualifiedMDLine {
text: String,
index: Vec<usize>,
}
fn imdls_to_fqmdls(imdls: &Vec<IndexedMDLine>) -> Vec<FullyQualifiedMDLine> {
let mut stack: Vec<(i32, usize)> = Vec::new();
let mut fqmdls = Vec::new();
for line in imdls {
if line.text.trim().is_empty() {
continue;
}
while let Some(&(prev_level, _)) = stack.last() {
if prev_level < line.level {
break;
}
stack.pop();
}
stack.push((line.level, line.index));
let fqindex = stack.iter().map(|&(_, i)| i).collect();
fqmdls.push(FullyQualifiedMDLine {
text: line.text.clone(),
index: fqindex,
});
}
fqmdls
}
fn imdl_search(imdls: &[IndexedMDLine], kwords: &[String], match_all: bool, fuzzy: bool) -> Vec<usize> {
let matcher = SkimMatcherV2::default();
imdls
.iter()
.filter(|line| {
let text = &line.text;
if fuzzy {
if match_all {
kwords.iter().all(|term| matcher.fuzzy_match(text, term).is_some())
} else {
kwords.iter().any(|term| matcher.fuzzy_match(text, term).is_some())
}
} else {
let text_lower = text.to_lowercase();
if match_all {
kwords.iter().all(|term| text_lower.contains(&term.to_lowercase()))
} else {
kwords.iter().any(|term| text_lower.contains(&term.to_lowercase()))
}
}
})
.map(|line| line.index)
.collect()
}
fn associated_indices(fqmdls: &[FullyQualifiedMDLine], matches: &[usize]) -> Vec<usize> {
let matched: HashSet<usize> = matches.iter().copied().collect();
let mut result: HashSet<usize> = matched.clone();
for line in fqmdls {
if line.index.iter().any(|i| matched.contains(i)) {
result.extend(&line.index);
}
}
let mut result_vec: Vec<usize> = result.into_iter().collect();
result_vec.sort_unstable();
result_vec
}
fn filter_imdls(imdls: &[IndexedMDLine], matches: &[usize]) -> Vec<IndexedMDLine> {
let index_set: HashSet<usize> = matches.iter().copied().collect();
imdls
.iter()
.filter(|line| index_set.contains(&line.index))
.cloned()
.collect()
}
fn highlight_terms(text: &str, terms: &[String], fuzzy: bool) -> String {
if fuzzy {
highlight_fuzzy(text, terms)
} else {
highlight_exact(text, terms)
}
}
fn highlight_exact(text: &str, terms: &[String]) -> String {
let mut highlighted = text.to_string();
for term in terms {
let pattern = regex::escape(term);
let re = RegexBuilder::new(&pattern)
.case_insensitive(true)
.build()
.unwrap();
highlighted = re
.replace_all(&highlighted, |caps: &regex::Captures| {
Style::new().bold().fg(Red).paint(&caps[0]).to_string()
})
.to_string();
}
highlighted
}
fn highlight_fuzzy(text: &str, terms: &[String]) -> String {
let matcher = SkimMatcherV2::default();
let mut matched_indices = vec![false; text.len()];
for term in terms {
if let Some((_score, indices)) = matcher.fuzzy_indices(text, term) {
for i in indices {
if i < matched_indices.len() {
matched_indices[i] = true;
}
}
}
}
// Apply highlighting to matched characters
let mut result = String::new();
let mut chars = text.chars().enumerate().peekable();
while let Some((i, c)) = chars.next() {
if matched_indices[i] {
result.push_str(&Style::new().bold().fg(Red).paint(c.to_string()).to_string());
} else {
result.push(c);
}
}
result
}
fn main() {
let args = Args::parse();
let reader = open_input(&args.file);
// All lines
let lines = lines_from_reader(reader);
// Lines as struct with index and level
let imdls = lines_to_imdl(&lines);
// Lines with index lists as fully qualified heirarchy
let fqmdls = imdls_to_fqmdls(&imdls);
// for line in &fqmdls {
// println!("{:?}", line);
// }
// Line indexes containing keyword matching
let matches = imdl_search(&imdls, &args.keywords, args.all, args.fuzzy);
// println!("Matches: {:?}", matches);
// Line indexes that share a heirarchy with matching lines
let associated_matches = associated_indices(&fqmdls, &matches);
// println!("Associated Matches: {:?}", associated_matches);
// The lines from the associated matching indices
let matching_imdls = filter_imdls(&imdls, &associated_matches);
for line in matching_imdls {
let highlighted = highlight_terms(&line.text, &args.keywords, args.fuzzy);
println!("{}", highlighted);
}
}