commit f9f7558d74a230f3ddbf18088332f12b6f967955 Author: Charlie Crossley Date: Tue Aug 19 14:28:10 2025 +0100 Initial Commit - Basic app diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/mdq/__init__.py b/build/lib/mdq/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/build/lib/mdq/mdq.py b/build/lib/mdq/mdq.py new file mode 100644 index 0000000..c9b5255 --- /dev/null +++ b/build/lib/mdq/mdq.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +import sys +import re +import argparse + +# ANSI colors for terminal highlighting +ANSI_RED = '\033[91m' +ANSI_BOLD = '\033[1m' +ANSI_RESET = '\033[0m' + +# Offset for list items +LIST_OFFSET = 100 + +def classify_line(line): + """Classify line into heading, list, or text.""" + raw = line.rstrip('\n') + stripped = line.lstrip() + indent = len(line) - len(stripped) + + heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped) + list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped) + + if heading_match: + return { + 'type': 'heading', + 'level': len(heading_match.group(1)), # Level based on number of # + 'text': heading_match.group(2), + 'indent': 0, + 'raw': raw + } + elif list_match: + return { + 'type': 'list', + 'level': LIST_OFFSET + indent, # List level offset by 100 + 'text': list_match.group(2), + 'indent': indent, + 'raw': raw + } + else: + return { + 'type': 'text', + 'level': None, + 'text': stripped, + 'indent': indent, + 'raw': raw + } + +def build_structure(lines): + """Builds a structure with parent-child relationships based on headings and indents.""" + structure = [] + stack = [] + + for line in lines: + entry = classify_line(line) + entry['children'] = [] + entry['parent'] = None + + if entry['type'] == 'list' or entry['type'] == 'heading': + while stack and entry['level'] <= stack[-1]['level']: + stack.pop() # Pop if new level is less than or equal to parent + + if stack: + entry['parent'] = stack[-1] + stack[-1]['children'].append(entry) + else: + structure.append(entry) + + stack.append(entry) + + return structure + +def collect_ancestors(entry): + """Collect all ancestor headings of a given entry.""" + ancestors = [] + current = entry['parent'] + while current: + ancestors.append(current) + current = current['parent'] + return list(reversed(ancestors)) + +def collect_descendants(entry): + """Collect all descendants (children and deeper) of a given entry.""" + result = [] + + def recurse(e): + for child in e['children']: + result.append(child) + recurse(child) + + recurse(entry) + return result + +def deduplicate_entries(entries): + """Ensure no duplicate entries in the results.""" + seen = set() + result = [] + for e in entries: + if id(e) not in seen: + seen.add(id(e)) + result.append(e) + return result + +def highlight_text(text, keywords): + """Highlight the keyword(s) in the text using ANSI color (red) and bold.""" + for keyword in keywords: + text = re.sub( + re.escape(keyword), + lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}", + text, + flags=re.IGNORECASE + ) + return text + +def extract_markdown(structure, keywords, raw_output=False, match_all=False): + """Extract matched entries and include their ancestors and descendants.""" + matched_entries = [] + + def search(entries): + for entry in entries: + # Check if the entry matches the search criteria (AND/OR) + entry_text = entry['text'].lower() + + # Match if it's AND search or OR search + matches = [keyword.lower() in entry_text for keyword in keywords] + + # If it's AND search (all terms must match) + if match_all: + if all(matches): + # Match found for all terms + if not raw_output: + entry['raw'] = highlight_text(entry['raw'], keywords) + matched_entries.append(entry) + # If it's OR search (any term matches) + else: + if any(matches): + # Match found for any term + if not raw_output: + entry['raw'] = highlight_text(entry['raw'], keywords) + matched_entries.append(entry) + + # Collect and include only ancestors and descendants that match + ancestors = collect_ancestors(entry) + descendants = collect_descendants(entry) + + # Include ancestors only if they haven't been added yet + for ancestor in ancestors: + if ancestor not in matched_entries: + matched_entries.append(ancestor) + + # Include descendants only if they haven't been added yet + for descendant in descendants: + if descendant not in matched_entries: + matched_entries.append(descendant) + + search(entry['children']) + + search(structure) + + # Deduplicate and sort by original order + all_entries = deduplicate_entries(matched_entries) + all_entries.sort(key=lambda e: e.get('line_num', 0)) + return [e['raw'] for e in all_entries] + +def main(): + """Main function that reads input and runs the search.""" + + # Set up argument parser + parser = argparse.ArgumentParser( + description="Search through markdown files, highlighting matched terms." + ) + + parser.add_argument( + 'keywords', + nargs='+', + help="The search keywords (at least one required)." + ) + + parser.add_argument( + '--raw', + action='store_true', + help="Output without highlighting." + ) + + parser.add_argument( + '--all', + action='store_true', + help="Match all keywords (AND search). By default, it's OR search." + ) + + # Parse arguments + args = parser.parse_args() + + input_lines = sys.stdin.readlines() + structure = build_structure(input_lines) + + # Annotate line numbers for sorting + for idx, entry in enumerate(flatten_structure(structure)): + entry['line_num'] = idx + + results = extract_markdown(structure, args.keywords, args.raw, args.all) + + if results: + print('\n'.join(results)) + else: + print(f"No matches found for keywords: {', '.join(args.keywords)}.") + +def flatten_structure(entries): + """Flattens a nested structure to list, preserving original order.""" + result = [] + for e in entries: + result.append(e) + result.extend(flatten_structure(e['children'])) + return result + +if __name__ == '__main__': + main() + diff --git a/mdq.egg-info/PKG-INFO b/mdq.egg-info/PKG-INFO new file mode 100644 index 0000000..c542602 --- /dev/null +++ b/mdq.egg-info/PKG-INFO @@ -0,0 +1,11 @@ +Metadata-Version: 2.1 +Name: mdq +Version: 0.1 +Summary: Markdown Query: Search and highlight content in markdown files. +Home-page: https://example.com +Author: Charlie Crossley +Author-email: charlie.crossley@iceelectronics.net +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Description-Content-Type: text/markdown diff --git a/mdq.egg-info/SOURCES.txt b/mdq.egg-info/SOURCES.txt new file mode 100644 index 0000000..b385e4e --- /dev/null +++ b/mdq.egg-info/SOURCES.txt @@ -0,0 +1,9 @@ +README.md +setup.py +mdq/__init__.py +mdq/mdq.py +mdq.egg-info/PKG-INFO +mdq.egg-info/SOURCES.txt +mdq.egg-info/dependency_links.txt +mdq.egg-info/entry_points.txt +mdq.egg-info/top_level.txt \ No newline at end of file diff --git a/mdq.egg-info/dependency_links.txt b/mdq.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/mdq.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/mdq.egg-info/entry_points.txt b/mdq.egg-info/entry_points.txt new file mode 100644 index 0000000..c93b868 --- /dev/null +++ b/mdq.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +mdq = mdq.mdq:main diff --git a/mdq.egg-info/top_level.txt b/mdq.egg-info/top_level.txt new file mode 100644 index 0000000..35ac98a --- /dev/null +++ b/mdq.egg-info/top_level.txt @@ -0,0 +1 @@ +mdq diff --git a/mdq/__init__.py b/mdq/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mdq/mdq.py b/mdq/mdq.py new file mode 100755 index 0000000..c9b5255 --- /dev/null +++ b/mdq/mdq.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 + +import sys +import re +import argparse + +# ANSI colors for terminal highlighting +ANSI_RED = '\033[91m' +ANSI_BOLD = '\033[1m' +ANSI_RESET = '\033[0m' + +# Offset for list items +LIST_OFFSET = 100 + +def classify_line(line): + """Classify line into heading, list, or text.""" + raw = line.rstrip('\n') + stripped = line.lstrip() + indent = len(line) - len(stripped) + + heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped) + list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped) + + if heading_match: + return { + 'type': 'heading', + 'level': len(heading_match.group(1)), # Level based on number of # + 'text': heading_match.group(2), + 'indent': 0, + 'raw': raw + } + elif list_match: + return { + 'type': 'list', + 'level': LIST_OFFSET + indent, # List level offset by 100 + 'text': list_match.group(2), + 'indent': indent, + 'raw': raw + } + else: + return { + 'type': 'text', + 'level': None, + 'text': stripped, + 'indent': indent, + 'raw': raw + } + +def build_structure(lines): + """Builds a structure with parent-child relationships based on headings and indents.""" + structure = [] + stack = [] + + for line in lines: + entry = classify_line(line) + entry['children'] = [] + entry['parent'] = None + + if entry['type'] == 'list' or entry['type'] == 'heading': + while stack and entry['level'] <= stack[-1]['level']: + stack.pop() # Pop if new level is less than or equal to parent + + if stack: + entry['parent'] = stack[-1] + stack[-1]['children'].append(entry) + else: + structure.append(entry) + + stack.append(entry) + + return structure + +def collect_ancestors(entry): + """Collect all ancestor headings of a given entry.""" + ancestors = [] + current = entry['parent'] + while current: + ancestors.append(current) + current = current['parent'] + return list(reversed(ancestors)) + +def collect_descendants(entry): + """Collect all descendants (children and deeper) of a given entry.""" + result = [] + + def recurse(e): + for child in e['children']: + result.append(child) + recurse(child) + + recurse(entry) + return result + +def deduplicate_entries(entries): + """Ensure no duplicate entries in the results.""" + seen = set() + result = [] + for e in entries: + if id(e) not in seen: + seen.add(id(e)) + result.append(e) + return result + +def highlight_text(text, keywords): + """Highlight the keyword(s) in the text using ANSI color (red) and bold.""" + for keyword in keywords: + text = re.sub( + re.escape(keyword), + lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}", + text, + flags=re.IGNORECASE + ) + return text + +def extract_markdown(structure, keywords, raw_output=False, match_all=False): + """Extract matched entries and include their ancestors and descendants.""" + matched_entries = [] + + def search(entries): + for entry in entries: + # Check if the entry matches the search criteria (AND/OR) + entry_text = entry['text'].lower() + + # Match if it's AND search or OR search + matches = [keyword.lower() in entry_text for keyword in keywords] + + # If it's AND search (all terms must match) + if match_all: + if all(matches): + # Match found for all terms + if not raw_output: + entry['raw'] = highlight_text(entry['raw'], keywords) + matched_entries.append(entry) + # If it's OR search (any term matches) + else: + if any(matches): + # Match found for any term + if not raw_output: + entry['raw'] = highlight_text(entry['raw'], keywords) + matched_entries.append(entry) + + # Collect and include only ancestors and descendants that match + ancestors = collect_ancestors(entry) + descendants = collect_descendants(entry) + + # Include ancestors only if they haven't been added yet + for ancestor in ancestors: + if ancestor not in matched_entries: + matched_entries.append(ancestor) + + # Include descendants only if they haven't been added yet + for descendant in descendants: + if descendant not in matched_entries: + matched_entries.append(descendant) + + search(entry['children']) + + search(structure) + + # Deduplicate and sort by original order + all_entries = deduplicate_entries(matched_entries) + all_entries.sort(key=lambda e: e.get('line_num', 0)) + return [e['raw'] for e in all_entries] + +def main(): + """Main function that reads input and runs the search.""" + + # Set up argument parser + parser = argparse.ArgumentParser( + description="Search through markdown files, highlighting matched terms." + ) + + parser.add_argument( + 'keywords', + nargs='+', + help="The search keywords (at least one required)." + ) + + parser.add_argument( + '--raw', + action='store_true', + help="Output without highlighting." + ) + + parser.add_argument( + '--all', + action='store_true', + help="Match all keywords (AND search). By default, it's OR search." + ) + + # Parse arguments + args = parser.parse_args() + + input_lines = sys.stdin.readlines() + structure = build_structure(input_lines) + + # Annotate line numbers for sorting + for idx, entry in enumerate(flatten_structure(structure)): + entry['line_num'] = idx + + results = extract_markdown(structure, args.keywords, args.raw, args.all) + + if results: + print('\n'.join(results)) + else: + print(f"No matches found for keywords: {', '.join(args.keywords)}.") + +def flatten_structure(entries): + """Flattens a nested structure to list, preserving original order.""" + result = [] + for e in entries: + result.append(e) + result.extend(flatten_structure(e['children'])) + return result + +if __name__ == '__main__': + main() + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..56bae91 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup, find_packages + +setup( + name='mdq', # The name of your package + version='0.1', # Package version + packages=find_packages(), # Find all packages + install_requires=[], # Add dependencies here if needed + entry_points={ + 'console_scripts': [ + 'mdq = mdq.mdq:main', # Command 'mdq' runs the 'main' function in searchmd.py + ], + }, + # Other metadata (optional) + description="Markdown Query: Search and highlight content in markdown files.", + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + author='Charlie Crossley', + author_email='charlie.crossley@iceelectronics.net', + url='https://example.com', # Replace with your repo URL + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], +) +