Initial Commit - Basic app

2025-08-19 14:28:10 +01:00
commit f9f7558d74
11 changed files with 486 additions and 0 deletions
--- a/README.md
+++ b/README.md
--- a/build/lib/mdq/init.py
+++ b/build/lib/mdq/init.py
--- a/build/lib/mdq/mdq.py
+++ b/build/lib/mdq/mdq.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import argparse
+
+# ANSI colors for terminal highlighting
+ANSI_RED = '\033[91m'
+ANSI_BOLD = '\033[1m'
+ANSI_RESET = '\033[0m'
+
+# Offset for list items
+LIST_OFFSET = 100
+
+def classify_line(line):
+    """Classify line into heading, list, or text."""
+    raw = line.rstrip('\n')
+    stripped = line.lstrip()
+    indent = len(line) - len(stripped)
+
+    heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
+    list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
+
+    if heading_match:
+        return {
+            'type': 'heading',
+            'level': len(heading_match.group(1)),  # Level based on number of #
+            'text': heading_match.group(2),
+            'indent': 0,
+            'raw': raw
+        }
+    elif list_match:
+        return {
+            'type': 'list',
+            'level': LIST_OFFSET + indent,  # List level offset by 100
+            'text': list_match.group(2),
+            'indent': indent,
+            'raw': raw
+        }
+    else:
+        return {
+            'type': 'text',
+            'level': None,
+            'text': stripped,
+            'indent': indent,
+            'raw': raw
+        }
+
+def build_structure(lines):
+    """Builds a structure with parent-child relationships based on headings and indents."""
+    structure = []
+    stack = []
+
+    for line in lines:
+        entry = classify_line(line)
+        entry['children'] = []
+        entry['parent'] = None
+
+        if entry['type'] == 'list' or entry['type'] == 'heading':
+            while stack and entry['level'] <= stack[-1]['level']:
+                stack.pop()  # Pop if new level is less than or equal to parent
+
+            if stack:
+                entry['parent'] = stack[-1]
+                stack[-1]['children'].append(entry)
+            else:
+                structure.append(entry)
+
+            stack.append(entry)
+
+    return structure
+
+def collect_ancestors(entry):
+    """Collect all ancestor headings of a given entry."""
+    ancestors = []
+    current = entry['parent']
+    while current:
+        ancestors.append(current)
+        current = current['parent']
+    return list(reversed(ancestors))
+
+def collect_descendants(entry):
+    """Collect all descendants (children and deeper) of a given entry."""
+    result = []
+
+    def recurse(e):
+        for child in e['children']:
+            result.append(child)
+            recurse(child)
+
+    recurse(entry)
+    return result
+
+def deduplicate_entries(entries):
+    """Ensure no duplicate entries in the results."""
+    seen = set()
+    result = []
+    for e in entries:
+        if id(e) not in seen:
+            seen.add(id(e))
+            result.append(e)
+    return result
+
+def highlight_text(text, keywords):
+    """Highlight the keyword(s) in the text using ANSI color (red) and bold."""
+    for keyword in keywords:
+        text = re.sub(
+            re.escape(keyword),
+            lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
+            text,
+            flags=re.IGNORECASE
+        )
+    return text
+
+def extract_markdown(structure, keywords, raw_output=False, match_all=False):
+    """Extract matched entries and include their ancestors and descendants."""
+    matched_entries = []
+
+    def search(entries):
+        for entry in entries:
+            # Check if the entry matches the search criteria (AND/OR)
+            entry_text = entry['text'].lower()
+
+            # Match if it's AND search or OR search
+            matches = [keyword.lower() in entry_text for keyword in keywords]
+
+            # If it's AND search (all terms must match)
+            if match_all:
+                if all(matches):
+                    # Match found for all terms
+                    if not raw_output:
+                        entry['raw'] = highlight_text(entry['raw'], keywords)
+                    matched_entries.append(entry)
+            # If it's OR search (any term matches)
+            else:
+                if any(matches):
+                    # Match found for any term
+                    if not raw_output:
+                        entry['raw'] = highlight_text(entry['raw'], keywords)
+                    matched_entries.append(entry)
+
+                    # Collect and include only ancestors and descendants that match
+                    ancestors = collect_ancestors(entry)
+                    descendants = collect_descendants(entry)
+
+                    # Include ancestors only if they haven't been added yet
+                    for ancestor in ancestors:
+                        if ancestor not in matched_entries:
+                            matched_entries.append(ancestor)
+
+                    # Include descendants only if they haven't been added yet
+                    for descendant in descendants:
+                        if descendant not in matched_entries:
+                            matched_entries.append(descendant)
+
+            search(entry['children'])
+
+    search(structure)
+
+    # Deduplicate and sort by original order
+    all_entries = deduplicate_entries(matched_entries)
+    all_entries.sort(key=lambda e: e.get('line_num', 0))
+    return [e['raw'] for e in all_entries]
+
+def main():
+    """Main function that reads input and runs the search."""
+    
+    # Set up argument parser
+    parser = argparse.ArgumentParser(
+        description="Search through markdown files, highlighting matched terms."
+    )
+    
+    parser.add_argument(
+        'keywords',
+        nargs='+',
+        help="The search keywords (at least one required)."
+    )
+    
+    parser.add_argument(
+        '--raw',
+        action='store_true',
+        help="Output without highlighting."
+    )
+    
+    parser.add_argument(
+        '--all',
+        action='store_true',
+        help="Match all keywords (AND search). By default, it's OR search."
+    )
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    input_lines = sys.stdin.readlines()
+    structure = build_structure(input_lines)
+
+    # Annotate line numbers for sorting
+    for idx, entry in enumerate(flatten_structure(structure)):
+        entry['line_num'] = idx
+
+    results = extract_markdown(structure, args.keywords, args.raw, args.all)
+
+    if results:
+        print('\n'.join(results))
+    else:
+        print(f"No matches found for keywords: {', '.join(args.keywords)}.")
+
+def flatten_structure(entries):
+    """Flattens a nested structure to list, preserving original order."""
+    result = []
+    for e in entries:
+        result.append(e)
+        result.extend(flatten_structure(e['children']))
+    return result
+
+if __name__ == '__main__':
+    main()
+
--- a/mdq.egg-info/PKG-INFO
+++ b/mdq.egg-info/PKG-INFO
@@ -0,0 +1,11 @@
+Metadata-Version: 2.1
+Name: mdq
+Version: 0.1
+Summary: Markdown Query: Search and highlight content in markdown files.
+Home-page: https://example.com
+Author: Charlie Crossley
+Author-email: charlie.crossley@iceelectronics.net
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
--- a/mdq.egg-info/SOURCES.txt
+++ b/mdq.egg-info/SOURCES.txt
@@ -0,0 +1,9 @@
+README.md
+setup.py
+mdq/__init__.py
+mdq/mdq.py
+mdq.egg-info/PKG-INFO
+mdq.egg-info/SOURCES.txt
+mdq.egg-info/dependency_links.txt
+mdq.egg-info/entry_points.txt
+mdq.egg-info/top_level.txt
--- a/mdq.egg-info/dependency_links.txt
+++ b/mdq.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
--- a/mdq.egg-info/entry_points.txt
+++ b/mdq.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+mdq = mdq.mdq:main
--- a/mdq.egg-info/top_level.txt
+++ b/mdq.egg-info/top_level.txt
@@ -0,0 +1 @@
+mdq
--- a/mdq/init.py
+++ b/mdq/init.py
--- a/mdq/mdq.py
+++ b/mdq/mdq.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import argparse
+
+# ANSI colors for terminal highlighting
+ANSI_RED = '\033[91m'
+ANSI_BOLD = '\033[1m'
+ANSI_RESET = '\033[0m'
+
+# Offset for list items
+LIST_OFFSET = 100
+
+def classify_line(line):
+    """Classify line into heading, list, or text."""
+    raw = line.rstrip('\n')
+    stripped = line.lstrip()
+    indent = len(line) - len(stripped)
+
+    heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
+    list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
+
+    if heading_match:
+        return {
+            'type': 'heading',
+            'level': len(heading_match.group(1)),  # Level based on number of #
+            'text': heading_match.group(2),
+            'indent': 0,
+            'raw': raw
+        }
+    elif list_match:
+        return {
+            'type': 'list',
+            'level': LIST_OFFSET + indent,  # List level offset by 100
+            'text': list_match.group(2),
+            'indent': indent,
+            'raw': raw
+        }
+    else:
+        return {
+            'type': 'text',
+            'level': None,
+            'text': stripped,
+            'indent': indent,
+            'raw': raw
+        }
+
+def build_structure(lines):
+    """Builds a structure with parent-child relationships based on headings and indents."""
+    structure = []
+    stack = []
+
+    for line in lines:
+        entry = classify_line(line)
+        entry['children'] = []
+        entry['parent'] = None
+
+        if entry['type'] == 'list' or entry['type'] == 'heading':
+            while stack and entry['level'] <= stack[-1]['level']:
+                stack.pop()  # Pop if new level is less than or equal to parent
+
+            if stack:
+                entry['parent'] = stack[-1]
+                stack[-1]['children'].append(entry)
+            else:
+                structure.append(entry)
+
+            stack.append(entry)
+
+    return structure
+
+def collect_ancestors(entry):
+    """Collect all ancestor headings of a given entry."""
+    ancestors = []
+    current = entry['parent']
+    while current:
+        ancestors.append(current)
+        current = current['parent']
+    return list(reversed(ancestors))
+
+def collect_descendants(entry):
+    """Collect all descendants (children and deeper) of a given entry."""
+    result = []
+
+    def recurse(e):
+        for child in e['children']:
+            result.append(child)
+            recurse(child)
+
+    recurse(entry)
+    return result
+
+def deduplicate_entries(entries):
+    """Ensure no duplicate entries in the results."""
+    seen = set()
+    result = []
+    for e in entries:
+        if id(e) not in seen:
+            seen.add(id(e))
+            result.append(e)
+    return result
+
+def highlight_text(text, keywords):
+    """Highlight the keyword(s) in the text using ANSI color (red) and bold."""
+    for keyword in keywords:
+        text = re.sub(
+            re.escape(keyword),
+            lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
+            text,
+            flags=re.IGNORECASE
+        )
+    return text
+
+def extract_markdown(structure, keywords, raw_output=False, match_all=False):
+    """Extract matched entries and include their ancestors and descendants."""
+    matched_entries = []
+
+    def search(entries):
+        for entry in entries:
+            # Check if the entry matches the search criteria (AND/OR)
+            entry_text = entry['text'].lower()
+
+            # Match if it's AND search or OR search
+            matches = [keyword.lower() in entry_text for keyword in keywords]
+
+            # If it's AND search (all terms must match)
+            if match_all:
+                if all(matches):
+                    # Match found for all terms
+                    if not raw_output:
+                        entry['raw'] = highlight_text(entry['raw'], keywords)
+                    matched_entries.append(entry)
+            # If it's OR search (any term matches)
+            else:
+                if any(matches):
+                    # Match found for any term
+                    if not raw_output:
+                        entry['raw'] = highlight_text(entry['raw'], keywords)
+                    matched_entries.append(entry)
+
+                    # Collect and include only ancestors and descendants that match
+                    ancestors = collect_ancestors(entry)
+                    descendants = collect_descendants(entry)
+
+                    # Include ancestors only if they haven't been added yet
+                    for ancestor in ancestors:
+                        if ancestor not in matched_entries:
+                            matched_entries.append(ancestor)
+
+                    # Include descendants only if they haven't been added yet
+                    for descendant in descendants:
+                        if descendant not in matched_entries:
+                            matched_entries.append(descendant)
+
+            search(entry['children'])
+
+    search(structure)
+
+    # Deduplicate and sort by original order
+    all_entries = deduplicate_entries(matched_entries)
+    all_entries.sort(key=lambda e: e.get('line_num', 0))
+    return [e['raw'] for e in all_entries]
+
+def main():
+    """Main function that reads input and runs the search."""
+    
+    # Set up argument parser
+    parser = argparse.ArgumentParser(
+        description="Search through markdown files, highlighting matched terms."
+    )
+    
+    parser.add_argument(
+        'keywords',
+        nargs='+',
+        help="The search keywords (at least one required)."
+    )
+    
+    parser.add_argument(
+        '--raw',
+        action='store_true',
+        help="Output without highlighting."
+    )
+    
+    parser.add_argument(
+        '--all',
+        action='store_true',
+        help="Match all keywords (AND search). By default, it's OR search."
+    )
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    input_lines = sys.stdin.readlines()
+    structure = build_structure(input_lines)
+
+    # Annotate line numbers for sorting
+    for idx, entry in enumerate(flatten_structure(structure)):
+        entry['line_num'] = idx
+
+    results = extract_markdown(structure, args.keywords, args.raw, args.all)
+
+    if results:
+        print('\n'.join(results))
+    else:
+        print(f"No matches found for keywords: {', '.join(args.keywords)}.")
+
+def flatten_structure(entries):
+    """Flattens a nested structure to list, preserving original order."""
+    result = []
+    for e in entries:
+        result.append(e)
+        result.extend(flatten_structure(e['children']))
+    return result
+
+if __name__ == '__main__':
+    main()
+
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='mdq',  # The name of your package
+    version='0.1',  # Package version
+    packages=find_packages(),  # Find all packages
+    install_requires=[],  # Add dependencies here if needed
+    entry_points={
+        'console_scripts': [
+            'mdq = mdq.mdq:main',  # Command 'mdq' runs the 'main' function in searchmd.py
+        ],
+    },
+    # Other metadata (optional)
+    description="Markdown Query: Search and highlight content in markdown files.",
+    long_description=open('README.md').read(),
+    long_description_content_type='text/markdown',
+    author='Charlie Crossley',
+    author_email='charlie.crossley@iceelectronics.net',
+    url='https://example.com',  # Replace with your repo URL
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+)
+