Initial Commit - Basic app

This commit is contained in:
Charlie Crossley
2025-08-19 14:28:10 +01:00
commit f9f7558d74
11 changed files with 486 additions and 0 deletions

0
README.md Normal file
View File

View File

218
build/lib/mdq/mdq.py Normal file
View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
import sys
import re
import argparse
# ANSI colors for terminal highlighting
ANSI_RED = '\033[91m'
ANSI_BOLD = '\033[1m'
ANSI_RESET = '\033[0m'
# Offset for list items
LIST_OFFSET = 100
def classify_line(line):
"""Classify line into heading, list, or text."""
raw = line.rstrip('\n')
stripped = line.lstrip()
indent = len(line) - len(stripped)
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
if heading_match:
return {
'type': 'heading',
'level': len(heading_match.group(1)), # Level based on number of #
'text': heading_match.group(2),
'indent': 0,
'raw': raw
}
elif list_match:
return {
'type': 'list',
'level': LIST_OFFSET + indent, # List level offset by 100
'text': list_match.group(2),
'indent': indent,
'raw': raw
}
else:
return {
'type': 'text',
'level': None,
'text': stripped,
'indent': indent,
'raw': raw
}
def build_structure(lines):
"""Builds a structure with parent-child relationships based on headings and indents."""
structure = []
stack = []
for line in lines:
entry = classify_line(line)
entry['children'] = []
entry['parent'] = None
if entry['type'] == 'list' or entry['type'] == 'heading':
while stack and entry['level'] <= stack[-1]['level']:
stack.pop() # Pop if new level is less than or equal to parent
if stack:
entry['parent'] = stack[-1]
stack[-1]['children'].append(entry)
else:
structure.append(entry)
stack.append(entry)
return structure
def collect_ancestors(entry):
"""Collect all ancestor headings of a given entry."""
ancestors = []
current = entry['parent']
while current:
ancestors.append(current)
current = current['parent']
return list(reversed(ancestors))
def collect_descendants(entry):
"""Collect all descendants (children and deeper) of a given entry."""
result = []
def recurse(e):
for child in e['children']:
result.append(child)
recurse(child)
recurse(entry)
return result
def deduplicate_entries(entries):
"""Ensure no duplicate entries in the results."""
seen = set()
result = []
for e in entries:
if id(e) not in seen:
seen.add(id(e))
result.append(e)
return result
def highlight_text(text, keywords):
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
for keyword in keywords:
text = re.sub(
re.escape(keyword),
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
text,
flags=re.IGNORECASE
)
return text
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
"""Extract matched entries and include their ancestors and descendants."""
matched_entries = []
def search(entries):
for entry in entries:
# Check if the entry matches the search criteria (AND/OR)
entry_text = entry['text'].lower()
# Match if it's AND search or OR search
matches = [keyword.lower() in entry_text for keyword in keywords]
# If it's AND search (all terms must match)
if match_all:
if all(matches):
# Match found for all terms
if not raw_output:
entry['raw'] = highlight_text(entry['raw'], keywords)
matched_entries.append(entry)
# If it's OR search (any term matches)
else:
if any(matches):
# Match found for any term
if not raw_output:
entry['raw'] = highlight_text(entry['raw'], keywords)
matched_entries.append(entry)
# Collect and include only ancestors and descendants that match
ancestors = collect_ancestors(entry)
descendants = collect_descendants(entry)
# Include ancestors only if they haven't been added yet
for ancestor in ancestors:
if ancestor not in matched_entries:
matched_entries.append(ancestor)
# Include descendants only if they haven't been added yet
for descendant in descendants:
if descendant not in matched_entries:
matched_entries.append(descendant)
search(entry['children'])
search(structure)
# Deduplicate and sort by original order
all_entries = deduplicate_entries(matched_entries)
all_entries.sort(key=lambda e: e.get('line_num', 0))
return [e['raw'] for e in all_entries]
def main():
"""Main function that reads input and runs the search."""
# Set up argument parser
parser = argparse.ArgumentParser(
description="Search through markdown files, highlighting matched terms."
)
parser.add_argument(
'keywords',
nargs='+',
help="The search keywords (at least one required)."
)
parser.add_argument(
'--raw',
action='store_true',
help="Output without highlighting."
)
parser.add_argument(
'--all',
action='store_true',
help="Match all keywords (AND search). By default, it's OR search."
)
# Parse arguments
args = parser.parse_args()
input_lines = sys.stdin.readlines()
structure = build_structure(input_lines)
# Annotate line numbers for sorting
for idx, entry in enumerate(flatten_structure(structure)):
entry['line_num'] = idx
results = extract_markdown(structure, args.keywords, args.raw, args.all)
if results:
print('\n'.join(results))
else:
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
def flatten_structure(entries):
"""Flattens a nested structure to list, preserving original order."""
result = []
for e in entries:
result.append(e)
result.extend(flatten_structure(e['children']))
return result
if __name__ == '__main__':
main()

11
mdq.egg-info/PKG-INFO Normal file
View File

@@ -0,0 +1,11 @@
Metadata-Version: 2.1
Name: mdq
Version: 0.1
Summary: Markdown Query: Search and highlight content in markdown files.
Home-page: https://example.com
Author: Charlie Crossley
Author-email: charlie.crossley@iceelectronics.net
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Description-Content-Type: text/markdown

9
mdq.egg-info/SOURCES.txt Normal file
View File

@@ -0,0 +1,9 @@
README.md
setup.py
mdq/__init__.py
mdq/mdq.py
mdq.egg-info/PKG-INFO
mdq.egg-info/SOURCES.txt
mdq.egg-info/dependency_links.txt
mdq.egg-info/entry_points.txt
mdq.egg-info/top_level.txt

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,2 @@
[console_scripts]
mdq = mdq.mdq:main

View File

@@ -0,0 +1 @@
mdq

0
mdq/__init__.py Normal file
View File

218
mdq/mdq.py Executable file
View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
import sys
import re
import argparse
# ANSI colors for terminal highlighting
ANSI_RED = '\033[91m'
ANSI_BOLD = '\033[1m'
ANSI_RESET = '\033[0m'
# Offset for list items
LIST_OFFSET = 100
def classify_line(line):
"""Classify line into heading, list, or text."""
raw = line.rstrip('\n')
stripped = line.lstrip()
indent = len(line) - len(stripped)
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
if heading_match:
return {
'type': 'heading',
'level': len(heading_match.group(1)), # Level based on number of #
'text': heading_match.group(2),
'indent': 0,
'raw': raw
}
elif list_match:
return {
'type': 'list',
'level': LIST_OFFSET + indent, # List level offset by 100
'text': list_match.group(2),
'indent': indent,
'raw': raw
}
else:
return {
'type': 'text',
'level': None,
'text': stripped,
'indent': indent,
'raw': raw
}
def build_structure(lines):
"""Builds a structure with parent-child relationships based on headings and indents."""
structure = []
stack = []
for line in lines:
entry = classify_line(line)
entry['children'] = []
entry['parent'] = None
if entry['type'] == 'list' or entry['type'] == 'heading':
while stack and entry['level'] <= stack[-1]['level']:
stack.pop() # Pop if new level is less than or equal to parent
if stack:
entry['parent'] = stack[-1]
stack[-1]['children'].append(entry)
else:
structure.append(entry)
stack.append(entry)
return structure
def collect_ancestors(entry):
"""Collect all ancestor headings of a given entry."""
ancestors = []
current = entry['parent']
while current:
ancestors.append(current)
current = current['parent']
return list(reversed(ancestors))
def collect_descendants(entry):
"""Collect all descendants (children and deeper) of a given entry."""
result = []
def recurse(e):
for child in e['children']:
result.append(child)
recurse(child)
recurse(entry)
return result
def deduplicate_entries(entries):
"""Ensure no duplicate entries in the results."""
seen = set()
result = []
for e in entries:
if id(e) not in seen:
seen.add(id(e))
result.append(e)
return result
def highlight_text(text, keywords):
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
for keyword in keywords:
text = re.sub(
re.escape(keyword),
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
text,
flags=re.IGNORECASE
)
return text
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
"""Extract matched entries and include their ancestors and descendants."""
matched_entries = []
def search(entries):
for entry in entries:
# Check if the entry matches the search criteria (AND/OR)
entry_text = entry['text'].lower()
# Match if it's AND search or OR search
matches = [keyword.lower() in entry_text for keyword in keywords]
# If it's AND search (all terms must match)
if match_all:
if all(matches):
# Match found for all terms
if not raw_output:
entry['raw'] = highlight_text(entry['raw'], keywords)
matched_entries.append(entry)
# If it's OR search (any term matches)
else:
if any(matches):
# Match found for any term
if not raw_output:
entry['raw'] = highlight_text(entry['raw'], keywords)
matched_entries.append(entry)
# Collect and include only ancestors and descendants that match
ancestors = collect_ancestors(entry)
descendants = collect_descendants(entry)
# Include ancestors only if they haven't been added yet
for ancestor in ancestors:
if ancestor not in matched_entries:
matched_entries.append(ancestor)
# Include descendants only if they haven't been added yet
for descendant in descendants:
if descendant not in matched_entries:
matched_entries.append(descendant)
search(entry['children'])
search(structure)
# Deduplicate and sort by original order
all_entries = deduplicate_entries(matched_entries)
all_entries.sort(key=lambda e: e.get('line_num', 0))
return [e['raw'] for e in all_entries]
def main():
"""Main function that reads input and runs the search."""
# Set up argument parser
parser = argparse.ArgumentParser(
description="Search through markdown files, highlighting matched terms."
)
parser.add_argument(
'keywords',
nargs='+',
help="The search keywords (at least one required)."
)
parser.add_argument(
'--raw',
action='store_true',
help="Output without highlighting."
)
parser.add_argument(
'--all',
action='store_true',
help="Match all keywords (AND search). By default, it's OR search."
)
# Parse arguments
args = parser.parse_args()
input_lines = sys.stdin.readlines()
structure = build_structure(input_lines)
# Annotate line numbers for sorting
for idx, entry in enumerate(flatten_structure(structure)):
entry['line_num'] = idx
results = extract_markdown(structure, args.keywords, args.raw, args.all)
if results:
print('\n'.join(results))
else:
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
def flatten_structure(entries):
"""Flattens a nested structure to list, preserving original order."""
result = []
for e in entries:
result.append(e)
result.extend(flatten_structure(e['children']))
return result
if __name__ == '__main__':
main()

26
setup.py Normal file
View File

@@ -0,0 +1,26 @@
from setuptools import setup, find_packages
setup(
name='mdq', # The name of your package
version='0.1', # Package version
packages=find_packages(), # Find all packages
install_requires=[], # Add dependencies here if needed
entry_points={
'console_scripts': [
'mdq = mdq.mdq:main', # Command 'mdq' runs the 'main' function in searchmd.py
],
},
# Other metadata (optional)
description="Markdown Query: Search and highlight content in markdown files.",
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
author='Charlie Crossley',
author_email='charlie.crossley@iceelectronics.net',
url='https://example.com', # Replace with your repo URL
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
)