Initial Commit - Basic app
This commit is contained in:
0
build/lib/mdq/__init__.py
Normal file
0
build/lib/mdq/__init__.py
Normal file
218
build/lib/mdq/mdq.py
Normal file
218
build/lib/mdq/mdq.py
Normal file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
import argparse
|
||||
|
||||
# ANSI colors for terminal highlighting
|
||||
ANSI_RED = '\033[91m'
|
||||
ANSI_BOLD = '\033[1m'
|
||||
ANSI_RESET = '\033[0m'
|
||||
|
||||
# Offset for list items
|
||||
LIST_OFFSET = 100
|
||||
|
||||
def classify_line(line):
|
||||
"""Classify line into heading, list, or text."""
|
||||
raw = line.rstrip('\n')
|
||||
stripped = line.lstrip()
|
||||
indent = len(line) - len(stripped)
|
||||
|
||||
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
|
||||
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
|
||||
|
||||
if heading_match:
|
||||
return {
|
||||
'type': 'heading',
|
||||
'level': len(heading_match.group(1)), # Level based on number of #
|
||||
'text': heading_match.group(2),
|
||||
'indent': 0,
|
||||
'raw': raw
|
||||
}
|
||||
elif list_match:
|
||||
return {
|
||||
'type': 'list',
|
||||
'level': LIST_OFFSET + indent, # List level offset by 100
|
||||
'text': list_match.group(2),
|
||||
'indent': indent,
|
||||
'raw': raw
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'type': 'text',
|
||||
'level': None,
|
||||
'text': stripped,
|
||||
'indent': indent,
|
||||
'raw': raw
|
||||
}
|
||||
|
||||
def build_structure(lines):
|
||||
"""Builds a structure with parent-child relationships based on headings and indents."""
|
||||
structure = []
|
||||
stack = []
|
||||
|
||||
for line in lines:
|
||||
entry = classify_line(line)
|
||||
entry['children'] = []
|
||||
entry['parent'] = None
|
||||
|
||||
if entry['type'] == 'list' or entry['type'] == 'heading':
|
||||
while stack and entry['level'] <= stack[-1]['level']:
|
||||
stack.pop() # Pop if new level is less than or equal to parent
|
||||
|
||||
if stack:
|
||||
entry['parent'] = stack[-1]
|
||||
stack[-1]['children'].append(entry)
|
||||
else:
|
||||
structure.append(entry)
|
||||
|
||||
stack.append(entry)
|
||||
|
||||
return structure
|
||||
|
||||
def collect_ancestors(entry):
|
||||
"""Collect all ancestor headings of a given entry."""
|
||||
ancestors = []
|
||||
current = entry['parent']
|
||||
while current:
|
||||
ancestors.append(current)
|
||||
current = current['parent']
|
||||
return list(reversed(ancestors))
|
||||
|
||||
def collect_descendants(entry):
|
||||
"""Collect all descendants (children and deeper) of a given entry."""
|
||||
result = []
|
||||
|
||||
def recurse(e):
|
||||
for child in e['children']:
|
||||
result.append(child)
|
||||
recurse(child)
|
||||
|
||||
recurse(entry)
|
||||
return result
|
||||
|
||||
def deduplicate_entries(entries):
|
||||
"""Ensure no duplicate entries in the results."""
|
||||
seen = set()
|
||||
result = []
|
||||
for e in entries:
|
||||
if id(e) not in seen:
|
||||
seen.add(id(e))
|
||||
result.append(e)
|
||||
return result
|
||||
|
||||
def highlight_text(text, keywords):
|
||||
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
|
||||
for keyword in keywords:
|
||||
text = re.sub(
|
||||
re.escape(keyword),
|
||||
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
|
||||
text,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
return text
|
||||
|
||||
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
|
||||
"""Extract matched entries and include their ancestors and descendants."""
|
||||
matched_entries = []
|
||||
|
||||
def search(entries):
|
||||
for entry in entries:
|
||||
# Check if the entry matches the search criteria (AND/OR)
|
||||
entry_text = entry['text'].lower()
|
||||
|
||||
# Match if it's AND search or OR search
|
||||
matches = [keyword.lower() in entry_text for keyword in keywords]
|
||||
|
||||
# If it's AND search (all terms must match)
|
||||
if match_all:
|
||||
if all(matches):
|
||||
# Match found for all terms
|
||||
if not raw_output:
|
||||
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||
matched_entries.append(entry)
|
||||
# If it's OR search (any term matches)
|
||||
else:
|
||||
if any(matches):
|
||||
# Match found for any term
|
||||
if not raw_output:
|
||||
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||
matched_entries.append(entry)
|
||||
|
||||
# Collect and include only ancestors and descendants that match
|
||||
ancestors = collect_ancestors(entry)
|
||||
descendants = collect_descendants(entry)
|
||||
|
||||
# Include ancestors only if they haven't been added yet
|
||||
for ancestor in ancestors:
|
||||
if ancestor not in matched_entries:
|
||||
matched_entries.append(ancestor)
|
||||
|
||||
# Include descendants only if they haven't been added yet
|
||||
for descendant in descendants:
|
||||
if descendant not in matched_entries:
|
||||
matched_entries.append(descendant)
|
||||
|
||||
search(entry['children'])
|
||||
|
||||
search(structure)
|
||||
|
||||
# Deduplicate and sort by original order
|
||||
all_entries = deduplicate_entries(matched_entries)
|
||||
all_entries.sort(key=lambda e: e.get('line_num', 0))
|
||||
return [e['raw'] for e in all_entries]
|
||||
|
||||
def main():
|
||||
"""Main function that reads input and runs the search."""
|
||||
|
||||
# Set up argument parser
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Search through markdown files, highlighting matched terms."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'keywords',
|
||||
nargs='+',
|
||||
help="The search keywords (at least one required)."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--raw',
|
||||
action='store_true',
|
||||
help="Output without highlighting."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--all',
|
||||
action='store_true',
|
||||
help="Match all keywords (AND search). By default, it's OR search."
|
||||
)
|
||||
|
||||
# Parse arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
input_lines = sys.stdin.readlines()
|
||||
structure = build_structure(input_lines)
|
||||
|
||||
# Annotate line numbers for sorting
|
||||
for idx, entry in enumerate(flatten_structure(structure)):
|
||||
entry['line_num'] = idx
|
||||
|
||||
results = extract_markdown(structure, args.keywords, args.raw, args.all)
|
||||
|
||||
if results:
|
||||
print('\n'.join(results))
|
||||
else:
|
||||
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
|
||||
|
||||
def flatten_structure(entries):
|
||||
"""Flattens a nested structure to list, preserving original order."""
|
||||
result = []
|
||||
for e in entries:
|
||||
result.append(e)
|
||||
result.extend(flatten_structure(e['children']))
|
||||
return result
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
11
mdq.egg-info/PKG-INFO
Normal file
11
mdq.egg-info/PKG-INFO
Normal file
@@ -0,0 +1,11 @@
|
||||
Metadata-Version: 2.1
|
||||
Name: mdq
|
||||
Version: 0.1
|
||||
Summary: Markdown Query: Search and highlight content in markdown files.
|
||||
Home-page: https://example.com
|
||||
Author: Charlie Crossley
|
||||
Author-email: charlie.crossley@iceelectronics.net
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Description-Content-Type: text/markdown
|
||||
9
mdq.egg-info/SOURCES.txt
Normal file
9
mdq.egg-info/SOURCES.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
README.md
|
||||
setup.py
|
||||
mdq/__init__.py
|
||||
mdq/mdq.py
|
||||
mdq.egg-info/PKG-INFO
|
||||
mdq.egg-info/SOURCES.txt
|
||||
mdq.egg-info/dependency_links.txt
|
||||
mdq.egg-info/entry_points.txt
|
||||
mdq.egg-info/top_level.txt
|
||||
1
mdq.egg-info/dependency_links.txt
Normal file
1
mdq.egg-info/dependency_links.txt
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
2
mdq.egg-info/entry_points.txt
Normal file
2
mdq.egg-info/entry_points.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
[console_scripts]
|
||||
mdq = mdq.mdq:main
|
||||
1
mdq.egg-info/top_level.txt
Normal file
1
mdq.egg-info/top_level.txt
Normal file
@@ -0,0 +1 @@
|
||||
mdq
|
||||
0
mdq/__init__.py
Normal file
0
mdq/__init__.py
Normal file
218
mdq/mdq.py
Executable file
218
mdq/mdq.py
Executable file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import re
|
||||
import argparse
|
||||
|
||||
# ANSI colors for terminal highlighting
|
||||
ANSI_RED = '\033[91m'
|
||||
ANSI_BOLD = '\033[1m'
|
||||
ANSI_RESET = '\033[0m'
|
||||
|
||||
# Offset for list items
|
||||
LIST_OFFSET = 100
|
||||
|
||||
def classify_line(line):
|
||||
"""Classify line into heading, list, or text."""
|
||||
raw = line.rstrip('\n')
|
||||
stripped = line.lstrip()
|
||||
indent = len(line) - len(stripped)
|
||||
|
||||
heading_match = re.match(r'^(#{1,6})\s+(.*)', stripped)
|
||||
list_match = re.match(r'^([-*+]|\d+\.)\s+(.*)', stripped)
|
||||
|
||||
if heading_match:
|
||||
return {
|
||||
'type': 'heading',
|
||||
'level': len(heading_match.group(1)), # Level based on number of #
|
||||
'text': heading_match.group(2),
|
||||
'indent': 0,
|
||||
'raw': raw
|
||||
}
|
||||
elif list_match:
|
||||
return {
|
||||
'type': 'list',
|
||||
'level': LIST_OFFSET + indent, # List level offset by 100
|
||||
'text': list_match.group(2),
|
||||
'indent': indent,
|
||||
'raw': raw
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'type': 'text',
|
||||
'level': None,
|
||||
'text': stripped,
|
||||
'indent': indent,
|
||||
'raw': raw
|
||||
}
|
||||
|
||||
def build_structure(lines):
|
||||
"""Builds a structure with parent-child relationships based on headings and indents."""
|
||||
structure = []
|
||||
stack = []
|
||||
|
||||
for line in lines:
|
||||
entry = classify_line(line)
|
||||
entry['children'] = []
|
||||
entry['parent'] = None
|
||||
|
||||
if entry['type'] == 'list' or entry['type'] == 'heading':
|
||||
while stack and entry['level'] <= stack[-1]['level']:
|
||||
stack.pop() # Pop if new level is less than or equal to parent
|
||||
|
||||
if stack:
|
||||
entry['parent'] = stack[-1]
|
||||
stack[-1]['children'].append(entry)
|
||||
else:
|
||||
structure.append(entry)
|
||||
|
||||
stack.append(entry)
|
||||
|
||||
return structure
|
||||
|
||||
def collect_ancestors(entry):
|
||||
"""Collect all ancestor headings of a given entry."""
|
||||
ancestors = []
|
||||
current = entry['parent']
|
||||
while current:
|
||||
ancestors.append(current)
|
||||
current = current['parent']
|
||||
return list(reversed(ancestors))
|
||||
|
||||
def collect_descendants(entry):
|
||||
"""Collect all descendants (children and deeper) of a given entry."""
|
||||
result = []
|
||||
|
||||
def recurse(e):
|
||||
for child in e['children']:
|
||||
result.append(child)
|
||||
recurse(child)
|
||||
|
||||
recurse(entry)
|
||||
return result
|
||||
|
||||
def deduplicate_entries(entries):
|
||||
"""Ensure no duplicate entries in the results."""
|
||||
seen = set()
|
||||
result = []
|
||||
for e in entries:
|
||||
if id(e) not in seen:
|
||||
seen.add(id(e))
|
||||
result.append(e)
|
||||
return result
|
||||
|
||||
def highlight_text(text, keywords):
|
||||
"""Highlight the keyword(s) in the text using ANSI color (red) and bold."""
|
||||
for keyword in keywords:
|
||||
text = re.sub(
|
||||
re.escape(keyword),
|
||||
lambda match: f"{ANSI_RED}{ANSI_BOLD}{match.group(0)}{ANSI_RESET}",
|
||||
text,
|
||||
flags=re.IGNORECASE
|
||||
)
|
||||
return text
|
||||
|
||||
def extract_markdown(structure, keywords, raw_output=False, match_all=False):
|
||||
"""Extract matched entries and include their ancestors and descendants."""
|
||||
matched_entries = []
|
||||
|
||||
def search(entries):
|
||||
for entry in entries:
|
||||
# Check if the entry matches the search criteria (AND/OR)
|
||||
entry_text = entry['text'].lower()
|
||||
|
||||
# Match if it's AND search or OR search
|
||||
matches = [keyword.lower() in entry_text for keyword in keywords]
|
||||
|
||||
# If it's AND search (all terms must match)
|
||||
if match_all:
|
||||
if all(matches):
|
||||
# Match found for all terms
|
||||
if not raw_output:
|
||||
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||
matched_entries.append(entry)
|
||||
# If it's OR search (any term matches)
|
||||
else:
|
||||
if any(matches):
|
||||
# Match found for any term
|
||||
if not raw_output:
|
||||
entry['raw'] = highlight_text(entry['raw'], keywords)
|
||||
matched_entries.append(entry)
|
||||
|
||||
# Collect and include only ancestors and descendants that match
|
||||
ancestors = collect_ancestors(entry)
|
||||
descendants = collect_descendants(entry)
|
||||
|
||||
# Include ancestors only if they haven't been added yet
|
||||
for ancestor in ancestors:
|
||||
if ancestor not in matched_entries:
|
||||
matched_entries.append(ancestor)
|
||||
|
||||
# Include descendants only if they haven't been added yet
|
||||
for descendant in descendants:
|
||||
if descendant not in matched_entries:
|
||||
matched_entries.append(descendant)
|
||||
|
||||
search(entry['children'])
|
||||
|
||||
search(structure)
|
||||
|
||||
# Deduplicate and sort by original order
|
||||
all_entries = deduplicate_entries(matched_entries)
|
||||
all_entries.sort(key=lambda e: e.get('line_num', 0))
|
||||
return [e['raw'] for e in all_entries]
|
||||
|
||||
def main():
|
||||
"""Main function that reads input and runs the search."""
|
||||
|
||||
# Set up argument parser
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Search through markdown files, highlighting matched terms."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'keywords',
|
||||
nargs='+',
|
||||
help="The search keywords (at least one required)."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--raw',
|
||||
action='store_true',
|
||||
help="Output without highlighting."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--all',
|
||||
action='store_true',
|
||||
help="Match all keywords (AND search). By default, it's OR search."
|
||||
)
|
||||
|
||||
# Parse arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
input_lines = sys.stdin.readlines()
|
||||
structure = build_structure(input_lines)
|
||||
|
||||
# Annotate line numbers for sorting
|
||||
for idx, entry in enumerate(flatten_structure(structure)):
|
||||
entry['line_num'] = idx
|
||||
|
||||
results = extract_markdown(structure, args.keywords, args.raw, args.all)
|
||||
|
||||
if results:
|
||||
print('\n'.join(results))
|
||||
else:
|
||||
print(f"No matches found for keywords: {', '.join(args.keywords)}.")
|
||||
|
||||
def flatten_structure(entries):
|
||||
"""Flattens a nested structure to list, preserving original order."""
|
||||
result = []
|
||||
for e in entries:
|
||||
result.append(e)
|
||||
result.extend(flatten_structure(e['children']))
|
||||
return result
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
26
setup.py
Normal file
26
setup.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='mdq', # The name of your package
|
||||
version='0.1', # Package version
|
||||
packages=find_packages(), # Find all packages
|
||||
install_requires=[], # Add dependencies here if needed
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'mdq = mdq.mdq:main', # Command 'mdq' runs the 'main' function in searchmd.py
|
||||
],
|
||||
},
|
||||
# Other metadata (optional)
|
||||
description="Markdown Query: Search and highlight content in markdown files.",
|
||||
long_description=open('README.md').read(),
|
||||
long_description_content_type='text/markdown',
|
||||
author='Charlie Crossley',
|
||||
author_email='charlie.crossley@iceelectronics.net',
|
||||
url='https://example.com', # Replace with your repo URL
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user