-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcode2llm.py
More file actions
136 lines (111 loc) · 4.77 KB
/
Copy pathcode2llm.py
File metadata and controls
136 lines (111 loc) · 4.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
import argparse
from pathlib import Path
import pathspec
from typing import Dict, Any, List
class TreeNode:
def __init__(self, name: str, is_dir: bool = False):
self.name = name
self.is_dir = is_dir
self.children: List[TreeNode] = []
self.ignored = False
self.size_exceeded = False
def build_directory_tree(root: Path, ignore_spec: pathspec.PathSpec, max_size: int, target_dir: Path) -> TreeNode:
node = TreeNode(root.name, is_dir=True)
for entry in root.iterdir():
rel_path = entry.relative_to(target_dir)
ignored = ignore_spec.match_file(rel_path)
size_exceeded = entry.is_file() and entry.stat().st_size > max_size
if entry.is_dir():
child = build_directory_tree(entry, ignore_spec, max_size, target_dir)
child.ignored = ignored
node.children.append(child)
else:
child = TreeNode(entry.name)
child.ignored = ignored
child.size_exceeded = size_exceeded
node.children.append(child)
return node
def format_tree(node: TreeNode, prefix: str = "", is_last: bool = True) -> str:
connector = "└── " if is_last else "├── "
result = prefix + connector
if node.is_dir:
result += f"📁 {node.name}"
else:
result += f"📄 {node.name}"
if node.ignored or (not node.is_dir and node.size_exceeded):
reasons = []
if node.ignored:
reasons.append("ignored")
if node.size_exceeded:
reasons.append("size exceeded")
result += f" ({', '.join(reasons)})"
result += "\n"
if node.is_dir:
children = sorted(node.children, key=lambda x: (x.is_dir, x.name), reverse=True)
for i, child in enumerate(children):
extension = " " if is_last else "│ "
new_prefix = prefix + extension
result += format_tree(child, new_prefix, i == len(children)-1)
return result
def main():
parser = argparse.ArgumentParser(
description='Generate a LLM-friendly context file from codebase',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--target-dir', type=str, default='.',
help='Directory to process')
parser.add_argument('--ignore-file', type=str, default='.context.ignore',
help='Path to ignore rules file')
parser.add_argument('--output', type=str, default='context.md',
help='Output file path')
parser.add_argument('--max-size', type=int, default=102400,
help='Maximum file size in bytes (100KB default)')
parser.add_argument('--context-file', type=str,
help='Path to optional context description file')
args = parser.parse_args()
target_dir = Path(args.target_dir).resolve()
ignore_file = Path(args.ignore_file)
# Load ignore patterns
if ignore_file.exists():
with open(ignore_file, 'r', encoding='utf-8') as f:
ignore_spec = pathspec.PathSpec.from_lines('gitwildmatch', f)
else:
ignore_spec = pathspec.PathSpec([])
# Collect valid files and build tree
included_files = []
tree_root = build_directory_tree(target_dir, ignore_spec, args.max_size, target_dir)
for file_path in target_dir.rglob('*'):
if file_path.is_dir() or file_path.is_symlink():
continue
rel_path = file_path.relative_to(target_dir)
if ignore_spec.match_file(rel_path):
continue
if file_path.stat().st_size > args.max_size:
continue
included_files.append(file_path)
# Generate output
with open(args.output, 'w', encoding='utf-8') as out_file:
# Add context description
if args.context_file:
with open(args.context_file, 'r', encoding='utf-8') as cf:
out_file.write(f"# Context Overview\n\n{cf.read()}\n\n")
# Directory tree section
out_file.write("# Codebase Structure\n\n")
out_file.write("```\n")
out_file.write(format_tree(tree_root))
out_file.write("```\n\n")
# File contents section
out_file.write("# File Contents\n\n")
for path in sorted(included_files):
rel_path = path.relative_to(target_dir)
try:
content = path.read_text(encoding='utf-8')
except UnicodeDecodeError:
continue
out_file.write(f"## File: {rel_path}\n")
out_file.write(f"```{path.suffix.lstrip('.')}\n")
out_file.write(f"{content}\n")
out_file.write("```\n\n")
if __name__ == '__main__':
main()