-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcall_graph_processor.py
More file actions
556 lines (446 loc) · 20.7 KB
/
Copy pathcall_graph_processor.py
File metadata and controls
556 lines (446 loc) · 20.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
import os
import re
import pydot
from typing import Optional, List, Dict, Set, Tuple, Any
def process_dot_file(dot_file_path: str) -> Optional[str]:
"""
Process a .dot file and generate a call graph image.
Args:
dot_file_path: Path to the .dot file
Returns:
Path to the generated PNG image
"""
try:
# Read the dot file
graphs = pydot.graph_from_dot_file(dot_file_path)
if not graphs:
print(f"Error: No graphs found in {dot_file_path}")
return None
graph = graphs[0]
# Set graph attributes for better visualization
graph.set_bgcolor('white')
graph.set_rankdir('LR')
# Set node attributes
for node in graph.get_nodes():
node.set_style('filled')
node.set_fillcolor('#E8F8F5')
node.set_color('black')
node.set_fontname('Arial')
# Set edge attributes
for edge in graph.get_edges():
edge.set_color('#2471A3')
edge.set_fontname('Arial')
# Generate output filename - keeping the PNG in the same location as the .dot file
output_path = os.path.splitext(dot_file_path)[0] + ".png"
# Save the graph as PNG
graph.write_png(output_path)
print(f"Call graph generated: {output_path}")
# Generate text file with function relationships - save in parent directory of dot_files if applicable
generate_function_relationships_text(graph, dot_file_path)
return output_path
except Exception as e:
print(f"Error processing dot file: {e}")
return None
def extract_function_name(node_obj: pydot.Node) -> str:
"""
Extracts the actual function name from a node object.
Args:
node_obj: A pydot Node object
Returns:
The extracted function name as a string
"""
try:
# Get the node name (typically the node ID)
node_name = node_obj.get_name().strip('"')
# Get the node label if it exists, which often contains the actual function name
node_label = None
if node_obj.get_label():
node_label = node_obj.get_label().strip('"')
# Extract function name from the label if available
if node_label:
# Remove Dot language line breaks (\l, \n, etc.) and everything after them
# This handles cases like "external_signal_handler\l_adv_mode"
if '\\l' in node_label:
node_label = node_label.split('\\l')[0]
if '\\n' in node_label:
node_label = node_label.split('\\n')[0]
# Handle HTML-like labels
if '<' in node_label and '>' in node_label:
# Try to extract from HTML-like label
match = re.search(r'<[^>]*>([^<]+)</[^>]*>', node_label)
if match:
return match.group(1)
# Return the cleaned up label
return node_label
# If no usable label, try to clean up node name
# Node names are often in format like "Node123" or have some prefix/suffix
# Try to extract meaningful part using regex
match = re.search(r'[a-zA-Z_][a-zA-Z0-9_]*', node_name)
if match:
return match.group(0)
# Fall back to the node name if all else fails
return node_name
except Exception as e:
print(f"Error extracting function name: {e}")
return node_name # Fall back to node name
def get_output_directory(dot_file_path: str) -> str:
"""
Determine the appropriate output directory for relationship files.
If the dot file is in a dot_files subdirectory, use the parent directory.
Args:
dot_file_path: Path to the .dot file
Returns:
Directory path where relationship files should be saved
"""
dir_path = os.path.dirname(dot_file_path)
dir_name = os.path.basename(dir_path)
# If the file is in a dot_files subdirectory, move up one level
if dir_name == "dot_files":
return os.path.dirname(dir_path)
# Otherwise, use the same directory
return dir_path
def create_node_to_function_mapping(graph: pydot.Graph) -> Dict[str, str]:
"""
Creates a mapping from node IDs to function names.
Args:
graph: The pydot Graph object
Returns:
Dictionary mapping node IDs to function names
"""
node_to_function: Dict[str, str] = {}
for node in graph.get_nodes():
node_id = node.get_name().strip('"')
function_name = extract_function_name(node)
node_to_function[node_id] = function_name
return node_to_function
def extract_relationships_from_edges(
graph: pydot.Graph,
node_to_function: Dict[str, str],
is_icgraph: bool
) -> List[str]:
"""
Extracts relationships from graph edges.
Args:
graph: The pydot Graph object
node_to_function: Dictionary mapping node IDs to function names
is_icgraph: Whether this is an inverse caller graph
Returns:
List of relationship strings
"""
relationships: List[str] = []
for edge in graph.get_edges():
source_id = edge.get_source().strip('"')
dest_id = edge.get_destination().strip('"')
# Get function names using the mapping
source_func = node_to_function.get(source_id, source_id)
dest_func = node_to_function.get(dest_id, dest_id)
# For icgraph files, invert the relationship direction to match the semantic meaning
if is_icgraph:
relationships.append(f"{dest_func} -> {source_func}")
else:
relationships.append(f"{source_func} -> {dest_func}")
return relationships
def write_relationships_to_file(
output_path: str,
relationships: List[str],
dot_file_path: str,
is_icgraph: bool
) -> None:
"""
Writes relationships to a text file.
Args:
output_path: Path to the output file
relationships: List of relationship strings
dot_file_path: Path to the original dot file
is_icgraph: Whether this is an inverse caller graph
"""
with open(output_path, 'w') as f:
f.write(f"# Function call relationships from {os.path.basename(dot_file_path)}\n")
f.write(f"# Total relationships: {len(relationships)}\n")
# Change the format description based on file type
if is_icgraph:
f.write("# Format: callee -> caller\n\n")
else:
f.write("# Format: caller -> callee\n\n")
for relationship in sorted(relationships):
f.write(f"{relationship}\n")
def generate_function_relationships_text(graph: pydot.Graph, dot_file_path: str) -> Optional[str]:
"""
Generates a text file listing all caller-callee function relationships.
Args:
graph: The pydot Graph object
dot_file_path: Path to the original .dot file (used to generate output filename)
Returns:
Path to the generated text file
"""
try:
# Determine the output directory
output_dir = get_output_directory(dot_file_path)
# Generate output filename
filename = os.path.basename(os.path.splitext(dot_file_path)[0]) + "_relationships.txt"
output_path = os.path.join(output_dir, filename)
# Create a dictionary to map node IDs to function names
node_to_function = create_node_to_function_mapping(graph)
# Check if this is an icgraph file (inverse caller graph)
is_icgraph = "_icgraph" in dot_file_path
# Extract relationships from graph edges
relationships = extract_relationships_from_edges(graph, node_to_function, is_icgraph)
# Write relationships to file
write_relationships_to_file(output_path, relationships, dot_file_path, is_icgraph)
print(f"Function relationships text file generated: {output_path}")
return output_path
except Exception as e:
print(f"Error generating function relationships text file: {e}")
return None
def find_dot_files(directory: str) -> List[str]:
"""
Find all .dot files in the specified directory and its dot_files subdirectory if it exists
Args:
directory: Directory to search for .dot files
Returns:
List of paths to .dot files
"""
dot_files: List[str] = []
# Check if this is a parent directory with a dot_files subdirectory
dot_files_subdir = os.path.join(directory, "dot_files")
if os.path.isdir(dot_files_subdir):
# If dot_files subdirectory exists, search only there
for file in os.listdir(dot_files_subdir):
if file.endswith(".dot"):
dot_files.append(os.path.join(dot_files_subdir, file))
else:
# Otherwise search the provided directory normally
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(".dot"):
dot_files.append(os.path.join(root, file))
return dot_files
def find_relationship_files(dot_files: List[str]) -> List[str]:
"""
Find all relationship files generated from dot files.
Args:
dot_files: List of dot file paths
Returns:
List of relationship file paths
"""
rel_files: List[str] = []
for dot_file in dot_files:
# Get the directory where relationship files are saved
rel_dir = get_output_directory(dot_file)
# Check for relationship files in that directory
dot_basename = os.path.basename(os.path.splitext(dot_file)[0])
rel_filename = dot_basename + "_relationships.txt"
rel_filepath = os.path.join(rel_dir, rel_filename)
if os.path.exists(rel_filepath):
rel_files.append(rel_filepath)
return rel_files
def collect_unique_relationships(rel_files: List[str]) -> Set[str]:
"""
Collect unique relationships from all relationship files.
Args:
rel_files: List of relationship file paths
Returns:
Set of unique relationships
"""
all_relationships: Set[str] = set()
for rel_file in rel_files:
with open(rel_file, 'r') as f:
for line in f:
# Skip comment lines
if line.startswith('#') or not line.strip():
continue
relationship = line.strip()
all_relationships.add(relationship)
return all_relationships
def get_output_dir_for_combined_file(directory: str) -> str:
"""
Determine the appropriate directory for the combined relationships file.
Args:
directory: Original directory
Returns:
Directory path where the combined file should be saved
"""
if os.path.basename(directory) == "dot_files":
return os.path.dirname(directory)
return directory
def generate_combined_relationships_file(directory: str, dot_files: List[str]) -> Optional[str]:
"""
Generates a combined text file with relationships from all .dot files
Args:
directory: Directory to save the combined file
dot_files: List of .dot files that were processed
Returns:
Path to the combined relationships file
"""
try:
# Determine if we're processing icgraph (callee->caller) or cgraph (caller->callee) files based on the directory name
is_callee_caller = "callee_caller" in os.path.basename(directory).lower()
# Determine output directory for the combined file
output_dir = get_output_dir_for_combined_file(directory)
output_path = os.path.join(output_dir, "all_function_relationships.txt")
# Find all relationship files
rel_files = find_relationship_files(dot_files)
# Collect unique relationships from all individual relationship files
all_relationships = collect_unique_relationships(rel_files)
# Write combined relationships to file
with open(output_path, 'w') as f:
f.write(f"# Combined function call relationships from all dot files in {os.path.basename(output_dir)}\n")
f.write(f"# Total unique relationships: {len(all_relationships)}\n")
# Use the appropriate format description based on the directory type
if is_callee_caller:
f.write("# Format: callee -> caller\n\n")
else:
f.write("# Format: caller -> callee\n\n")
for relationship in sorted(all_relationships):
f.write(f"{relationship}\n")
print(f"Combined function relationships file generated: {output_path}")
return output_path
except Exception as e:
print(f"Error generating combined relationships file: {e}")
return None
def read_relationships_from_file(file_path: str) -> Dict[str, bool]:
"""
Reads relationships from a file into a dictionary.
Args:
file_path: Path to the relationships file
Returns:
Dictionary containing unique relationships
"""
relationships: Dict[str, bool] = {}
if os.path.isfile(file_path):
with open(file_path, 'r') as f:
for line in f:
# Skip comment lines
if line.startswith('#') or not line.strip():
continue
relationship = line.strip()
relationships[relationship] = True
return relationships
def collect_relationships_from_directory(directory: str, file_pattern: str) -> Dict[str, bool]:
"""
Collects relationships from all relationship files in a directory.
Args:
directory: Directory to search
file_pattern: File pattern to match (e.g., "*_relationships.txt")
Returns:
Dictionary containing unique relationships
"""
relationships: Dict[str, bool] = {}
for root, _, files in os.walk(directory):
for file in files:
if file.endswith("_relationships.txt") and file != "all_function_relationships.txt":
rel_file = os.path.join(root, file)
file_relationships = read_relationships_from_file(rel_file)
relationships.update(file_relationships)
return relationships
def write_master_relationships_file(
output_path: str,
base_directory: str,
callee_caller_relationships: Dict[str, bool],
caller_callee_relationships: Dict[str, bool]
) -> None:
"""
Writes the master relationships file.
Args:
output_path: Path to the output file
base_directory: The base directory name
callee_caller_relationships: Dictionary of callee-caller relationships
caller_callee_relationships: Dictionary of caller-callee relationships
"""
with open(output_path, 'w') as f:
f.write("# MASTER FUNCTION RELATIONSHIP FILE\n")
f.write(f"# Generated on: {os.path.basename(base_directory)}\n\n")
# Write callee->caller relationships section
f.write("# ==========================================================\n")
f.write("# CALLEE -> CALLER RELATIONSHIPS\n")
f.write(f"# Total: {len(callee_caller_relationships)}\n")
f.write("# ==========================================================\n\n")
for relationship in sorted(callee_caller_relationships.keys()):
f.write(f"{relationship}\n")
# Write caller->callee relationships section
f.write("\n\n# ==========================================================\n")
f.write("# CALLER -> CALLEE RELATIONSHIPS\n")
f.write(f"# Total: {len(caller_callee_relationships)}\n")
f.write("# ==========================================================\n\n")
for relationship in sorted(caller_callee_relationships.keys()):
f.write(f"{relationship}\n")
# Write statistics
f.write("\n\n# ==========================================================\n")
f.write("# STATISTICS\n")
f.write("# ==========================================================\n")
f.write(f"# Total callee->caller relationships: {len(callee_caller_relationships)}\n")
f.write(f"# Total caller->callee relationships: {len(caller_callee_relationships)}\n")
f.write(f"# Total relationships: {len(callee_caller_relationships) + len(caller_callee_relationships)}\n")
def generate_master_relationships_file(base_directory: str) -> Optional[str]:
"""
Generates a master file with all function relationships from all subdirectories,
clearly separated by relationship type.
Args:
base_directory: The base directory where the graph subdirectories are located
Returns:
Path to the generated master relationships file or None if generation fails
"""
try:
# Determine parent directory where to save the master file
parent_directory = os.path.dirname(base_directory)
output_path = os.path.join(parent_directory, "master_function_relationships.txt")
# Define subdirectories to check with new names
callee_caller_dir = os.path.join(base_directory, "callee_caller_graph")
caller_callee_dir = os.path.join(base_directory, "caller_callee_graph")
# Check if directories exist
callee_caller_exists = os.path.isdir(callee_caller_dir)
caller_callee_exists = os.path.isdir(caller_callee_dir)
if not callee_caller_exists and not caller_callee_exists:
print("Error: Neither callee_caller_graph nor caller_callee_graph directories exist")
return None
# Dictionaries to store unique relationships by type
callee_caller_relationships: Dict[str, bool] = {} # callee -> caller
caller_callee_relationships: Dict[str, bool] = {} # caller -> callee
# Filenames for the combined relationship files
callee_caller_combined = os.path.join(callee_caller_dir, "all_function_relationships.txt") if callee_caller_exists else None
caller_callee_combined = os.path.join(caller_callee_dir, "all_function_relationships.txt") if caller_callee_exists else None
# Process callee_caller_graph combined file if it exists
if callee_caller_exists and os.path.isfile(callee_caller_combined):
callee_caller_relationships = read_relationships_from_file(callee_caller_combined)
elif callee_caller_exists:
# Look for individual relationship files if no combined file
callee_caller_relationships = collect_relationships_from_directory(callee_caller_dir, "*_relationships.txt")
# Process caller_callee_graph combined file if it exists
if caller_callee_exists and os.path.isfile(caller_callee_combined):
caller_callee_relationships = read_relationships_from_file(caller_callee_combined)
elif caller_callee_exists:
# Look for individual relationship files if no combined file
caller_callee_relationships = collect_relationships_from_directory(caller_callee_dir, "*_relationships.txt")
# Write the master relationships file
write_master_relationships_file(output_path, base_directory, callee_caller_relationships, caller_callee_relationships)
print(f"Master function relationships file generated: {output_path}")
return output_path
except Exception as e:
print(f"Error generating master relationships file: {e}")
return None
def main() -> None:
# Check for directory in environment variable first
directory = os.environ.get("DOT_FILES_DIRECTORY")
# If not in environment variable, get from user input or use current directory
if not directory:
directory = input("Enter directory to search for .dot files (press Enter for current directory): ")
if not directory:
directory = os.getcwd()
if not os.path.isdir(directory):
print(f"Error: {directory} is not a valid directory")
return
print(f"Processing dot files in: {directory}")
dot_files = find_dot_files(directory)
if not dot_files:
print(f"No .dot files found in {directory}")
return
print(f"Found {len(dot_files)} .dot files")
# Process each .dot file
for dot_file in dot_files:
process_dot_file(dot_file)
# Generate a combined relationships file with all function relationships
generate_combined_relationships_file(directory, dot_files)
# Generate a master relationships file with all function relationships
generate_master_relationships_file(directory)
if __name__ == "__main__":
main()