Initial Commit - Basic app

2025-08-19 14:28:10 +01:00
commit f9f7558d74
11 changed files with 486 additions and 0 deletions
--- a/README.md
+++ b/README.md
--- a/build/lib/mdq/init.py
+++ b/build/lib/mdq/init.py
--- a/build/lib/mdq/mdq.py
+++ b/build/lib/mdq/mdq.py
@@ -0,0 +1,218 @@
 #!/usr/bin/env python3
 import sys
 import re
 import argparse
 # ANSI colors for terminal highlighting
 ANSI_RED = '\033[91m'
 ANSI_BOLD = '\033[1m'
 ANSI_RESET = '\033[0m'
 # Offset for list items
 LIST_OFFSET = 100
 def classify_line(line):
    """Classify line into heading, list, or text."""
    raw = line.rstrip('\n')
    stripped = line.lstrip()
    indent = len(line) - len(stripped)
    heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
    list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
    if heading_match:
        return {
            'type': 'heading',
            'level': len(heading_match.group(1)),  # Level based on number of #
            'text': heading_match.group(2),
            'indent': 0,
            'raw': raw
        }
    elif list_match:
        return {
            'type': 'list',
            'level': LIST_OFFSET + indent,  # List level offset by 100
            'text': list_match.group(2),
            'indent': indent,
            'raw': raw
        }
    else:
        return {
            'type': 'text',
            'level': None,
            'text': stripped,
            'indent': indent,
            'raw': raw
        }
 def build_structure(lines):
    """Builds a structure with parent-child relationships based on headings and indents."""
    structure = []
    stack = []
    for line in lines:
        entry = classify_line(line)
        entry['children'] = []
        entry['parent'] = None
        if entry['type'] == 'list' or entry['type'] == 'heading':
            while stack and entry['level'] <= stack[-1]['level']:
                stack.pop()  # Pop if new level is less than or equal to parent
            if stack:
                entry['parent'] = stack[-1]
                stack[-1]['children'].append(entry)
            else:
                structure.append(entry)
            stack.append(entry)
    return structure
 def collect_ancestors(entry):
    """Collect all ancestor headings of a given entry."""
    ancestors = []
    current = entry['parent']
    while current:
        ancestors.append(current)
        current = current['parent']
    return list(reversed(ancestors))
 def collect_descendants(entry):
    """Collect all descendants (children and deeper) of a given entry."""
    result = []
    def recurse(e):
        for child in e['children']:
            result.append(child)
            recurse(child)
    recurse(entry)
    return result
 def deduplicate_entries(entries):
    """Ensure no duplicate entries in the results."""
    seen = set()
    result = []
    for e in entries:
        if id(e) not in seen:
            seen.add(id(e))
            result.append(e)
    return result
 def highlight_text(text, keywords):
    """Highlight the keyword(s) in the text using ANSI color (red) and bold."""
    for keyword in keywords:
        text = re.sub(
            re.escape(keyword),
            lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
            text,
            flags=re.IGNORECASE
        )
    return text
 def extract_markdown(structure, keywords, raw_output=False, match_all=False):
    """Extract matched entries and include their ancestors and descendants."""
    matched_entries = []
    def search(entries):
        for entry in entries:
            # Check if the entry matches the search criteria (AND/OR)
            entry_text = entry['text'].lower()
            # Match if it's AND search or OR search
            matches = [keyword.lower() in entry_text for keyword in keywords]
            # If it's AND search (all terms must match)
            if match_all:
                if all(matches):
                    # Match found for all terms
                    if not raw_output:
                        entry['raw'] = highlight_text(entry['raw'], keywords)
                    matched_entries.append(entry)
            # If it's OR search (any term matches)
            else:
                if any(matches):
                    # Match found for any term
                    if not raw_output:
                        entry['raw'] = highlight_text(entry['raw'], keywords)
                    matched_entries.append(entry)
                    # Collect and include only ancestors and descendants that match
                    ancestors = collect_ancestors(entry)
                    descendants = collect_descendants(entry)
                    # Include ancestors only if they haven't been added yet
                    for ancestor in ancestors:
                        if ancestor not in matched_entries:
                            matched_entries.append(ancestor)
                    # Include descendants only if they haven't been added yet
                    for descendant in descendants:
                        if descendant not in matched_entries:
                            matched_entries.append(descendant)
            search(entry['children'])
    search(structure)
    # Deduplicate and sort by original order
    all_entries = deduplicate_entries(matched_entries)
    all_entries.sort(key=lambda e: e.get('line_num', 0))
    return [e['raw'] for e in all_entries]
 def main():
    """Main function that reads input and runs the search."""
    # Set up argument parser
    parser = argparse.ArgumentParser(
        description="Search through markdown files, highlighting matched terms."
    )
    parser.add_argument(
        'keywords',
        nargs='+',
        help="The search keywords (at least one required)."
    )
    parser.add_argument(
        '--raw',
        action='store_true',
        help="Output without highlighting."
    )
    parser.add_argument(
        '--all',
        action='store_true',
        help="Match all keywords (AND search). By default, it's OR search."
    )
    # Parse arguments
    args = parser.parse_args()
    input_lines = sys.stdin.readlines()
    structure = build_structure(input_lines)
    # Annotate line numbers for sorting
    for idx, entry in enumerate(flatten_structure(structure)):
        entry['line_num'] = idx
    results = extract_markdown(structure, args.keywords, args.raw, args.all)
    if results:
        print('\n'.join(results))
    else:
        print(f"No matches found for keywords: {', '.join(args.keywords)}.")
 def flatten_structure(entries):
    """Flattens a nested structure to list, preserving original order."""
    result = []
    for e in entries:
        result.append(e)
        result.extend(flatten_structure(e['children']))
    return result
 if __name__ == '__main__':
    main()
--- a/mdq.egg-info/PKG-INFO
+++ b/mdq.egg-info/PKG-INFO
@@ -0,0 +1,11 @@
 Metadata-Version: 2.1
 Name: mdq
 Version: 0.1
 Summary: Markdown Query: Search and highlight content in markdown files.
 Home-page: https://example.com
 Author: Charlie Crossley
 Author-email: charlie.crossley@iceelectronics.net
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Description-Content-Type: text/markdown
--- a/mdq.egg-info/SOURCES.txt
+++ b/mdq.egg-info/SOURCES.txt
@@ -0,0 +1,9 @@
 README.md
 setup.py
 mdq/__init__.py
 mdq/mdq.py
 mdq.egg-info/PKG-INFO
 mdq.egg-info/SOURCES.txt
 mdq.egg-info/dependency_links.txt
 mdq.egg-info/entry_points.txt
 mdq.egg-info/top_level.txt
--- a/mdq.egg-info/dependency_links.txt
+++ b/mdq.egg-info/dependency_links.txt
@@ -0,0 +1 @@
--- a/mdq.egg-info/entry_points.txt
+++ b/mdq.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
 [console_scripts]
 mdq = mdq.mdq:main
--- a/mdq.egg-info/top_level.txt
+++ b/mdq.egg-info/top_level.txt
@@ -0,0 +1 @@
 mdq
--- a/mdq/init.py
+++ b/mdq/init.py
--- a/mdq/mdq.py
+++ b/mdq/mdq.py
@@ -0,0 +1,218 @@
 #!/usr/bin/env python3
 import sys
 import re
 import argparse
 # ANSI colors for terminal highlighting
 ANSI_RED = '\033[91m'
 ANSI_BOLD = '\033[1m'
 ANSI_RESET = '\033[0m'
 # Offset for list items
 LIST_OFFSET = 100
 def classify_line(line):
    """Classify line into heading, list, or text."""
    raw = line.rstrip('\n')
    stripped = line.lstrip()
    indent = len(line) - len(stripped)
    heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
    list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
    if heading_match:
        return {
            'type': 'heading',
            'level': len(heading_match.group(1)),  # Level based on number of #
            'text': heading_match.group(2),
            'indent': 0,
            'raw': raw
        }
    elif list_match:
        return {
            'type': 'list',
            'level': LIST_OFFSET + indent,  # List level offset by 100
            'text': list_match.group(2),
            'indent': indent,
            'raw': raw
        }
    else:
        return {
            'type': 'text',
            'level': None,
            'text': stripped,
            'indent': indent,
            'raw': raw
        }
 def build_structure(lines):
    """Builds a structure with parent-child relationships based on headings and indents."""
    structure = []
    stack = []
    for line in lines:
        entry = classify_line(line)
        entry['children'] = []
        entry['parent'] = None
        if entry['type'] == 'list' or entry['type'] == 'heading':
            while stack and entry['level'] <= stack[-1]['level']:
                stack.pop()  # Pop if new level is less than or equal to parent
            if stack:
                entry['parent'] = stack[-1]
                stack[-1]['children'].append(entry)
            else:
                structure.append(entry)
            stack.append(entry)
    return structure
 def collect_ancestors(entry):
    """Collect all ancestor headings of a given entry."""
    ancestors = []
    current = entry['parent']
    while current:
        ancestors.append(current)
        current = current['parent']
    return list(reversed(ancestors))
 def collect_descendants(entry):
    """Collect all descendants (children and deeper) of a given entry."""
    result = []
    def recurse(e):
        for child in e['children']:
            result.append(child)
            recurse(child)
    recurse(entry)
    return result
 def deduplicate_entries(entries):
    """Ensure no duplicate entries in the results."""
    seen = set()
    result = []
    for e in entries:
        if id(e) not in seen:
            seen.add(id(e))
            result.append(e)
    return result
 def highlight_text(text, keywords):
    """Highlight the keyword(s) in the text using ANSI color (red) and bold."""
    for keyword in keywords:
        text = re.sub(
            re.escape(keyword),
            lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
            text,
            flags=re.IGNORECASE
        )
    return text
 def extract_markdown(structure, keywords, raw_output=False, match_all=False):
    """Extract matched entries and include their ancestors and descendants."""
    matched_entries = []
    def search(entries):
        for entry in entries:
            # Check if the entry matches the search criteria (AND/OR)
            entry_text = entry['text'].lower()
            # Match if it's AND search or OR search
            matches = [keyword.lower() in entry_text for keyword in keywords]
            # If it's AND search (all terms must match)
            if match_all:
                if all(matches):
                    # Match found for all terms
                    if not raw_output:
                        entry['raw'] = highlight_text(entry['raw'], keywords)
                    matched_entries.append(entry)
            # If it's OR search (any term matches)
            else:
                if any(matches):
                    # Match found for any term
                    if not raw_output:
                        entry['raw'] = highlight_text(entry['raw'], keywords)
                    matched_entries.append(entry)
                    # Collect and include only ancestors and descendants that match
                    ancestors = collect_ancestors(entry)
                    descendants = collect_descendants(entry)
                    # Include ancestors only if they haven't been added yet
                    for ancestor in ancestors:
                        if ancestor not in matched_entries:
                            matched_entries.append(ancestor)
                    # Include descendants only if they haven't been added yet
                    for descendant in descendants:
                        if descendant not in matched_entries:
                            matched_entries.append(descendant)
            search(entry['children'])
    search(structure)
    # Deduplicate and sort by original order
    all_entries = deduplicate_entries(matched_entries)
    all_entries.sort(key=lambda e: e.get('line_num', 0))
    return [e['raw'] for e in all_entries]
 def main():
    """Main function that reads input and runs the search."""
    # Set up argument parser
    parser = argparse.ArgumentParser(
        description="Search through markdown files, highlighting matched terms."
    )
    parser.add_argument(
        'keywords',
        nargs='+',
        help="The search keywords (at least one required)."
    )
    parser.add_argument(
        '--raw',
        action='store_true',
        help="Output without highlighting."
    )
    parser.add_argument(
        '--all',
        action='store_true',
        help="Match all keywords (AND search). By default, it's OR search."
    )
    # Parse arguments
    args = parser.parse_args()
    input_lines = sys.stdin.readlines()
    structure = build_structure(input_lines)
    # Annotate line numbers for sorting
    for idx, entry in enumerate(flatten_structure(structure)):
        entry['line_num'] = idx
    results = extract_markdown(structure, args.keywords, args.raw, args.all)
    if results:
        print('\n'.join(results))
    else:
        print(f"No matches found for keywords: {', '.join(args.keywords)}.")
 def flatten_structure(entries):
    """Flattens a nested structure to list, preserving original order."""
    result = []
    for e in entries:
        result.append(e)
        result.extend(flatten_structure(e['children']))
    return result
 if __name__ == '__main__':
    main()
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,26 @@
 from setuptools import setup, find_packages
 setup(
    name='mdq',  # The name of your package
    version='0.1',  # Package version
    packages=find_packages(),  # Find all packages
    install_requires=[],  # Add dependencies here if needed
    entry_points={
        'console_scripts': [
            'mdq = mdq.mdq:main',  # Command 'mdq' runs the 'main' function in searchmd.py
        ],
    },
    # Other metadata (optional)
    description="Markdown Query: Search and highlight content in markdown files.",
    long_description=open('README.md').read(),
    long_description_content_type='text/markdown',
    author='Charlie Crossley',
    author_email='charlie.crossley@iceelectronics.net',
    url='https://example.com',  # Replace with your repo URL
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
 )