Initial Commit - Basic app
This commit is contained in:
0
build/lib/mdq/__init__.py
Normal file
0
build/lib/mdq/__init__.py
Normal file
218
build/lib/mdq/mdq.py
Normal file
218
build/lib/mdq/mdq.py
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# ANSI colors for terminal highlighting
|
||||||
|
ANSI_RED = '\033[91m'
|
||||||
|
ANSI_BOLD = '\033[1m'
|
||||||
|
ANSI_RESET = '\033[0m'
|
||||||
|
|
||||||
|
# Offset for list items
|
||||||
|
LIST_OFFSET = 100
|
||||||
|
|
||||||
|
def classify_line(line):
|
||||||
|
"""Classify line into heading, list, or text."""
|
||||||
|
raw = line.rstrip('\n')
|
||||||
|
stripped = line.lstrip()
|
||||||
|
indent = len(line) - len(stripped)
|
||||||
|
|
||||||
|
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
|
||||||
|
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
|
||||||
|
|
||||||
|
if heading_match:
|
||||||
|
return {
|
||||||
|
'type': 'heading',
|
||||||
|
'level': len(heading_match.group(1)), # Level based on number of #
|
||||||
|
'text': heading_match.group(2),
|
||||||
|
'indent': 0,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
elif list_match:
|
||||||
|
return {
|
||||||
|
'type': 'list',
|
||||||
|
'level': LIST_OFFSET + indent, # List level offset by 100
|
||||||
|
'text': list_match.group(2),
|
||||||
|
'indent': indent,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'type': 'text',
|
||||||
|
'level': None,
|
||||||
|
'text': stripped,
|
||||||
|
'indent': indent,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_structure(lines):
|
||||||
|
"""Builds a structure with parent-child relationships based on headings and indents."""
|
||||||
|
structure = []
|
||||||
|
stack = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
entry = classify_line(line)
|
||||||
|
entry['children'] = []
|
||||||
|
entry['parent'] = None
|
||||||
|
|
||||||
|
if entry['type'] == 'list' or entry['type'] == 'heading':
|
||||||
|
while stack and entry['level'] <= stack[-1]['level']:
|
||||||
|
stack.pop() # Pop if new level is less than or equal to parent
|
||||||
|
|
||||||
|
if stack:
|
||||||
|
entry['parent'] = stack[-1]
|
||||||
|
stack[-1]['children'].append(entry)
|
||||||
|
else:
|
||||||
|
structure.append(entry)
|
||||||
|
|
||||||
|
stack.append(entry)
|
||||||
|
|
||||||
|
return structure
|
||||||
|
|
||||||
|
def collect_ancestors(entry):
|
||||||
|
"""Collect all ancestor headings of a given entry."""
|
||||||
|
ancestors = []
|
||||||
|
current = entry['parent']
|
||||||
|
while current:
|
||||||
|
ancestors.append(current)
|
||||||
|
current = current['parent']
|
||||||
|
return list(reversed(ancestors))
|
||||||
|
|
||||||
|
def collect_descendants(entry):
|
||||||
|
"""Collect all descendants (children and deeper) of a given entry."""
|
||||||
|
result = []
|
||||||
|
|
||||||
|
def recurse(e):
|
||||||
|
for child in e['children']:
|
||||||
|
result.append(child)
|
||||||
|
recurse(child)
|
||||||
|
|
||||||
|
recurse(entry)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def deduplicate_entries(entries):
|
||||||
|
"""Ensure no duplicate entries in the results."""
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for e in entries:
|
||||||
|
if id(e) not in seen:
|
||||||
|
seen.add(id(e))
|
||||||
|
result.append(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def highlight_text(text, keywords):
|
||||||
|
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
|
||||||
|
for keyword in keywords:
|
||||||
|
text = re.sub(
|
||||||
|
re.escape(keyword),
|
||||||
|
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
|
||||||
|
text,
|
||||||
|
flags=re.IGNORECASE
|
||||||
|
)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
|
||||||
|
"""Extract matched entries and include their ancestors and descendants."""
|
||||||
|
matched_entries = []
|
||||||
|
|
||||||
|
def search(entries):
|
||||||
|
for entry in entries:
|
||||||
|
# Check if the entry matches the search criteria (AND/OR)
|
||||||
|
entry_text = entry['text'].lower()
|
||||||
|
|
||||||
|
# Match if it's AND search or OR search
|
||||||
|
matches = [keyword.lower() in entry_text for keyword in keywords]
|
||||||
|
|
||||||
|
# If it's AND search (all terms must match)
|
||||||
|
if match_all:
|
||||||
|
if all(matches):
|
||||||
|
# Match found for all terms
|
||||||
|
if not raw_output:
|
||||||
|
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||||
|
matched_entries.append(entry)
|
||||||
|
# If it's OR search (any term matches)
|
||||||
|
else:
|
||||||
|
if any(matches):
|
||||||
|
# Match found for any term
|
||||||
|
if not raw_output:
|
||||||
|
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||||
|
matched_entries.append(entry)
|
||||||
|
|
||||||
|
# Collect and include only ancestors and descendants that match
|
||||||
|
ancestors = collect_ancestors(entry)
|
||||||
|
descendants = collect_descendants(entry)
|
||||||
|
|
||||||
|
# Include ancestors only if they haven't been added yet
|
||||||
|
for ancestor in ancestors:
|
||||||
|
if ancestor not in matched_entries:
|
||||||
|
matched_entries.append(ancestor)
|
||||||
|
|
||||||
|
# Include descendants only if they haven't been added yet
|
||||||
|
for descendant in descendants:
|
||||||
|
if descendant not in matched_entries:
|
||||||
|
matched_entries.append(descendant)
|
||||||
|
|
||||||
|
search(entry['children'])
|
||||||
|
|
||||||
|
search(structure)
|
||||||
|
|
||||||
|
# Deduplicate and sort by original order
|
||||||
|
all_entries = deduplicate_entries(matched_entries)
|
||||||
|
all_entries.sort(key=lambda e: e.get('line_num', 0))
|
||||||
|
return [e['raw'] for e in all_entries]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function that reads input and runs the search."""
|
||||||
|
|
||||||
|
# Set up argument parser
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Search through markdown files, highlighting matched terms."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'keywords',
|
||||||
|
nargs='+',
|
||||||
|
help="The search keywords (at least one required)."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--raw',
|
||||||
|
action='store_true',
|
||||||
|
help="Output without highlighting."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--all',
|
||||||
|
action='store_true',
|
||||||
|
help="Match all keywords (AND search). By default, it's OR search."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
input_lines = sys.stdin.readlines()
|
||||||
|
structure = build_structure(input_lines)
|
||||||
|
|
||||||
|
# Annotate line numbers for sorting
|
||||||
|
for idx, entry in enumerate(flatten_structure(structure)):
|
||||||
|
entry['line_num'] = idx
|
||||||
|
|
||||||
|
results = extract_markdown(structure, args.keywords, args.raw, args.all)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
print('\n'.join(results))
|
||||||
|
else:
|
||||||
|
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
|
||||||
|
|
||||||
|
def flatten_structure(entries):
|
||||||
|
"""Flattens a nested structure to list, preserving original order."""
|
||||||
|
result = []
|
||||||
|
for e in entries:
|
||||||
|
result.append(e)
|
||||||
|
result.extend(flatten_structure(e['children']))
|
||||||
|
return result
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
11
mdq.egg-info/PKG-INFO
Normal file
11
mdq.egg-info/PKG-INFO
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
Metadata-Version: 2.1
|
||||||
|
Name: mdq
|
||||||
|
Version: 0.1
|
||||||
|
Summary: Markdown Query: Search and highlight content in markdown files.
|
||||||
|
Home-page: https://example.com
|
||||||
|
Author: Charlie Crossley
|
||||||
|
Author-email: charlie.crossley@iceelectronics.net
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Operating System :: OS Independent
|
||||||
|
Description-Content-Type: text/markdown
|
||||||
9
mdq.egg-info/SOURCES.txt
Normal file
9
mdq.egg-info/SOURCES.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
README.md
|
||||||
|
setup.py
|
||||||
|
mdq/__init__.py
|
||||||
|
mdq/mdq.py
|
||||||
|
mdq.egg-info/PKG-INFO
|
||||||
|
mdq.egg-info/SOURCES.txt
|
||||||
|
mdq.egg-info/dependency_links.txt
|
||||||
|
mdq.egg-info/entry_points.txt
|
||||||
|
mdq.egg-info/top_level.txt
|
||||||
1
mdq.egg-info/dependency_links.txt
Normal file
1
mdq.egg-info/dependency_links.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
2
mdq.egg-info/entry_points.txt
Normal file
2
mdq.egg-info/entry_points.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[console_scripts]
|
||||||
|
mdq = mdq.mdq:main
|
||||||
1
mdq.egg-info/top_level.txt
Normal file
1
mdq.egg-info/top_level.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
mdq
|
||||||
0
mdq/__init__.py
Normal file
0
mdq/__init__.py
Normal file
218
mdq/mdq.py
Executable file
218
mdq/mdq.py
Executable file
@@ -0,0 +1,218 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# ANSI colors for terminal highlighting
|
||||||
|
ANSI_RED = '\033[91m'
|
||||||
|
ANSI_BOLD = '\033[1m'
|
||||||
|
ANSI_RESET = '\033[0m'
|
||||||
|
|
||||||
|
# Offset for list items
|
||||||
|
LIST_OFFSET = 100
|
||||||
|
|
||||||
|
def classify_line(line):
|
||||||
|
"""Classify line into heading, list, or text."""
|
||||||
|
raw = line.rstrip('\n')
|
||||||
|
stripped = line.lstrip()
|
||||||
|
indent = len(line) - len(stripped)
|
||||||
|
|
||||||
|
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
|
||||||
|
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
|
||||||
|
|
||||||
|
if heading_match:
|
||||||
|
return {
|
||||||
|
'type': 'heading',
|
||||||
|
'level': len(heading_match.group(1)), # Level based on number of #
|
||||||
|
'text': heading_match.group(2),
|
||||||
|
'indent': 0,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
elif list_match:
|
||||||
|
return {
|
||||||
|
'type': 'list',
|
||||||
|
'level': LIST_OFFSET + indent, # List level offset by 100
|
||||||
|
'text': list_match.group(2),
|
||||||
|
'indent': indent,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'type': 'text',
|
||||||
|
'level': None,
|
||||||
|
'text': stripped,
|
||||||
|
'indent': indent,
|
||||||
|
'raw': raw
|
||||||
|
}
|
||||||
|
|
||||||
|
def build_structure(lines):
|
||||||
|
"""Builds a structure with parent-child relationships based on headings and indents."""
|
||||||
|
structure = []
|
||||||
|
stack = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
entry = classify_line(line)
|
||||||
|
entry['children'] = []
|
||||||
|
entry['parent'] = None
|
||||||
|
|
||||||
|
if entry['type'] == 'list' or entry['type'] == 'heading':
|
||||||
|
while stack and entry['level'] <= stack[-1]['level']:
|
||||||
|
stack.pop() # Pop if new level is less than or equal to parent
|
||||||
|
|
||||||
|
if stack:
|
||||||
|
entry['parent'] = stack[-1]
|
||||||
|
stack[-1]['children'].append(entry)
|
||||||
|
else:
|
||||||
|
structure.append(entry)
|
||||||
|
|
||||||
|
stack.append(entry)
|
||||||
|
|
||||||
|
return structure
|
||||||
|
|
||||||
|
def collect_ancestors(entry):
|
||||||
|
"""Collect all ancestor headings of a given entry."""
|
||||||
|
ancestors = []
|
||||||
|
current = entry['parent']
|
||||||
|
while current:
|
||||||
|
ancestors.append(current)
|
||||||
|
current = current['parent']
|
||||||
|
return list(reversed(ancestors))
|
||||||
|
|
||||||
|
def collect_descendants(entry):
|
||||||
|
"""Collect all descendants (children and deeper) of a given entry."""
|
||||||
|
result = []
|
||||||
|
|
||||||
|
def recurse(e):
|
||||||
|
for child in e['children']:
|
||||||
|
result.append(child)
|
||||||
|
recurse(child)
|
||||||
|
|
||||||
|
recurse(entry)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def deduplicate_entries(entries):
|
||||||
|
"""Ensure no duplicate entries in the results."""
|
||||||
|
seen = set()
|
||||||
|
result = []
|
||||||
|
for e in entries:
|
||||||
|
if id(e) not in seen:
|
||||||
|
seen.add(id(e))
|
||||||
|
result.append(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def highlight_text(text, keywords):
|
||||||
|
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
|
||||||
|
for keyword in keywords:
|
||||||
|
text = re.sub(
|
||||||
|
re.escape(keyword),
|
||||||
|
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
|
||||||
|
text,
|
||||||
|
flags=re.IGNORECASE
|
||||||
|
)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
|
||||||
|
"""Extract matched entries and include their ancestors and descendants."""
|
||||||
|
matched_entries = []
|
||||||
|
|
||||||
|
def search(entries):
|
||||||
|
for entry in entries:
|
||||||
|
# Check if the entry matches the search criteria (AND/OR)
|
||||||
|
entry_text = entry['text'].lower()
|
||||||
|
|
||||||
|
# Match if it's AND search or OR search
|
||||||
|
matches = [keyword.lower() in entry_text for keyword in keywords]
|
||||||
|
|
||||||
|
# If it's AND search (all terms must match)
|
||||||
|
if match_all:
|
||||||
|
if all(matches):
|
||||||
|
# Match found for all terms
|
||||||
|
if not raw_output:
|
||||||
|
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||||
|
matched_entries.append(entry)
|
||||||
|
# If it's OR search (any term matches)
|
||||||
|
else:
|
||||||
|
if any(matches):
|
||||||
|
# Match found for any term
|
||||||
|
if not raw_output:
|
||||||
|
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||||
|
matched_entries.append(entry)
|
||||||
|
|
||||||
|
# Collect and include only ancestors and descendants that match
|
||||||
|
ancestors = collect_ancestors(entry)
|
||||||
|
descendants = collect_descendants(entry)
|
||||||
|
|
||||||
|
# Include ancestors only if they haven't been added yet
|
||||||
|
for ancestor in ancestors:
|
||||||
|
if ancestor not in matched_entries:
|
||||||
|
matched_entries.append(ancestor)
|
||||||
|
|
||||||
|
# Include descendants only if they haven't been added yet
|
||||||
|
for descendant in descendants:
|
||||||
|
if descendant not in matched_entries:
|
||||||
|
matched_entries.append(descendant)
|
||||||
|
|
||||||
|
search(entry['children'])
|
||||||
|
|
||||||
|
search(structure)
|
||||||
|
|
||||||
|
# Deduplicate and sort by original order
|
||||||
|
all_entries = deduplicate_entries(matched_entries)
|
||||||
|
all_entries.sort(key=lambda e: e.get('line_num', 0))
|
||||||
|
return [e['raw'] for e in all_entries]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function that reads input and runs the search."""
|
||||||
|
|
||||||
|
# Set up argument parser
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Search through markdown files, highlighting matched terms."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'keywords',
|
||||||
|
nargs='+',
|
||||||
|
help="The search keywords (at least one required)."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--raw',
|
||||||
|
action='store_true',
|
||||||
|
help="Output without highlighting."
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--all',
|
||||||
|
action='store_true',
|
||||||
|
help="Match all keywords (AND search). By default, it's OR search."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
input_lines = sys.stdin.readlines()
|
||||||
|
structure = build_structure(input_lines)
|
||||||
|
|
||||||
|
# Annotate line numbers for sorting
|
||||||
|
for idx, entry in enumerate(flatten_structure(structure)):
|
||||||
|
entry['line_num'] = idx
|
||||||
|
|
||||||
|
results = extract_markdown(structure, args.keywords, args.raw, args.all)
|
||||||
|
|
||||||
|
if results:
|
||||||
|
print('\n'.join(results))
|
||||||
|
else:
|
||||||
|
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
|
||||||
|
|
||||||
|
def flatten_structure(entries):
|
||||||
|
"""Flattens a nested structure to list, preserving original order."""
|
||||||
|
result = []
|
||||||
|
for e in entries:
|
||||||
|
result.append(e)
|
||||||
|
result.extend(flatten_structure(e['children']))
|
||||||
|
return result
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
||||||
26
setup.py
Normal file
26
setup.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='mdq', # The name of your package
|
||||||
|
version='0.1', # Package version
|
||||||
|
packages=find_packages(), # Find all packages
|
||||||
|
install_requires=[], # Add dependencies here if needed
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'mdq = mdq.mdq:main', # Command 'mdq' runs the 'main' function in searchmd.py
|
||||||
|
],
|
||||||
|
},
|
||||||
|
# Other metadata (optional)
|
||||||
|
description="Markdown Query: Search and highlight content in markdown files.",
|
||||||
|
long_description=open('README.md').read(),
|
||||||
|
long_description_content_type='text/markdown',
|
||||||
|
author='Charlie Crossley',
|
||||||
|
author_email='charlie.crossley@iceelectronics.net',
|
||||||
|
url='https://example.com', # Replace with your repo URL
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: MIT License",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
Reference in New Issue
Block a user