aboutsummaryrefslogtreecommitdiff
path: root/scripts/analyze-project-deps.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/analyze-project-deps.py')
-rw-r--r--scripts/analyze-project-deps.py206
1 files changed, 206 insertions, 0 deletions
diff --git a/scripts/analyze-project-deps.py b/scripts/analyze-project-deps.py
new file mode 100644
index 000000000000..e06d7035d43d
--- /dev/null
+++ b/scripts/analyze-project-deps.py
@@ -0,0 +1,206 @@
+import argparse
+import itertools
+import os
+import re
+import sys
+from collections import defaultdict
+
+from use_lldb_suite import lldb_root
+
+parser = argparse.ArgumentParser(
+ description='Analyze LLDB project #include dependencies.')
+parser.add_argument('--show-counts', default=False, action='store_true',
+ help='When true, show the number of dependencies from each subproject')
+parser.add_argument('--discover-cycles', default=False, action='store_true',
+ help='When true, find and display all project dependency cycles. Note,'
+ 'this option is very slow')
+
+args = parser.parse_args()
+
+src_dir = os.path.join(lldb_root, "source")
+inc_dir = os.path.join(lldb_root, "include")
+
+src_map = {}
+
+include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
+
+def is_sublist(small, big):
+ it = iter(big)
+ return all(c in it for c in small)
+
+def normalize_host(str):
+ if str.startswith("lldb/Host"):
+ return "lldb/Host"
+ if str.startswith("Plugins"):
+ return "lldb/" + str
+ if str.startswith("lldb/../../source"):
+ return str.replace("lldb/../../source", "lldb")
+ return str
+
+def scan_deps(this_dir, file):
+ global src_map
+ deps = {}
+ this_dir = normalize_host(this_dir)
+ if this_dir in src_map:
+ deps = src_map[this_dir]
+
+ with open(file) as f:
+ for line in list(f):
+ m = include_regex.match(line)
+ if m is None:
+ continue
+ relative = m.groups()[0].rstrip("/")
+ if relative == this_dir:
+ continue
+ relative = normalize_host(relative)
+ if relative in deps:
+ deps[relative] += 1
+ elif relative != this_dir:
+ deps[relative] = 1
+ if this_dir not in src_map and len(deps) > 0:
+ src_map[this_dir] = deps
+
+for (base, dirs, files) in os.walk(inc_dir):
+ dir = os.path.basename(base)
+ relative = os.path.relpath(base, inc_dir)
+ inc_files = filter(lambda x : os.path.splitext(x)[1] in [".h"], files)
+ relative = relative.replace("\\", "/")
+ for inc in inc_files:
+ inc_path = os.path.join(base, inc)
+ scan_deps(relative, inc_path)
+
+for (base, dirs, files) in os.walk(src_dir):
+ dir = os.path.basename(base)
+ relative = os.path.relpath(base, src_dir)
+ src_files = filter(lambda x : os.path.splitext(x)[1] in [".cpp", ".h", ".mm"], files)
+ norm_base_path = os.path.normpath(os.path.join("lldb", relative))
+ norm_base_path = norm_base_path.replace("\\", "/")
+ for src in src_files:
+ src_path = os.path.join(base, src)
+ scan_deps(norm_base_path, src_path)
+ pass
+
+def is_existing_cycle(path, cycles):
+ # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
+ # then we don't just want to check for an occurrence of A -> B -> C in the
+ # list of known cycles, but every possible rotation of A -> B -> C. For
+ # example, if we previously encountered B -> C -> A (with an implicit -> B
+ # at the end), then A -> B -> C is also a cycle. This is an important
+ # optimization which reduces the search space by multiple orders of
+ # magnitude.
+ for i in xrange(0,len(path)):
+ if any(is_sublist(x, path) for x in cycles):
+ return True
+ path = [path[-1]] + path[0:-1]
+ return False
+
+def expand(path_queue, path_lengths, cycles, src_map):
+ # We do a breadth first search, to make sure we visit all paths in order
+ # of ascending length. This is an important optimization to make sure that
+ # short cycles are discovered first, which will allow us to discard longer
+ # cycles which grow the search space exponentially the longer they get.
+ while len(path_queue) > 0:
+ cur_path = path_queue.pop(0)
+ if is_existing_cycle(cur_path, cycles):
+ continue
+
+ next_len = path_lengths.pop(0) + 1
+ last_component = cur_path[-1]
+
+ for item in src_map[last_component]:
+ if item.startswith("clang"):
+ continue
+
+ if item in cur_path:
+ # This is a cycle. Minimize it and then check if the result is
+ # already in the list of cycles. Insert it (or not) and then
+ # exit.
+ new_index = cur_path.index(item)
+ cycle = cur_path[new_index:]
+ if not is_existing_cycle(cycle, cycles):
+ cycles.append(cycle)
+ continue
+
+ path_lengths.append(next_len)
+ path_queue.append(cur_path + [item])
+ pass
+
+cycles = []
+
+path_queue = [[x] for x in src_map.iterkeys()]
+path_lens = [1] * len(path_queue)
+
+items = list(src_map.iteritems())
+items.sort(lambda A, B : cmp(A[0], B[0]))
+
+for (path, deps) in items:
+ print path + ":"
+ sorted_deps = list(deps.iteritems())
+ if args.show_counts:
+ sorted_deps.sort(lambda A, B: cmp(A[1], B[1]))
+ for dep in sorted_deps:
+ print "\t{} [{}]".format(dep[0], dep[1])
+ else:
+ sorted_deps.sort(lambda A, B: cmp(A[0], B[0]))
+ for dep in sorted_deps:
+ print "\t{}".format(dep[0])
+
+def iter_cycles(cycles):
+ global src_map
+ for cycle in cycles:
+ cycle.append(cycle[0])
+ zipper = list(zip(cycle[0:-1], cycle[1:]))
+ result = [(x, src_map[x][y], y) for (x,y) in zipper]
+ total = 0
+ smallest = result[0][1]
+ for (first, value, last) in result:
+ total += value
+ smallest = min(smallest, value)
+ yield (total, smallest, result)
+
+if args.discover_cycles:
+ print "Analyzing cycles..."
+
+ expand(path_queue, path_lens, cycles, src_map)
+
+ average = sum([len(x)+1 for x in cycles]) / len(cycles)
+
+ print "Found {} cycles. Average cycle length = {}.".format(len(cycles), average)
+ counted = list(iter_cycles(cycles))
+ if args.show_counts:
+ counted.sort(lambda A, B: cmp(A[0], B[0]))
+ for (total, smallest, cycle) in counted:
+ sys.stdout.write("{} deps to break: ".format(total))
+ sys.stdout.write(cycle[0][0])
+ for (first, count, last) in cycle:
+ sys.stdout.write(" [{}->] {}".format(count, last))
+ sys.stdout.write("\n")
+ else:
+ for cycle in cycles:
+ cycle.append(cycle[0])
+ print " -> ".join(cycle)
+
+ print "Analyzing islands..."
+ islands = []
+ outgoing_counts = defaultdict(int)
+ incoming_counts = defaultdict(int)
+ for (total, smallest, cycle) in counted:
+ for (first, count, last) in cycle:
+ outgoing_counts[first] += count
+ incoming_counts[last] += count
+ for cycle in cycles:
+ this_cycle = set(cycle)
+ disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
+ overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
+ islands = disjoints + [set.union(this_cycle, *overlaps)]
+ print "Found {} disjoint cycle islands...".format(len(islands))
+ for island in islands:
+ print "Island ({} elements)".format(len(island))
+ sorted = []
+ for node in island:
+ sorted.append((node, incoming_counts[node], outgoing_counts[node]))
+ sorted.sort(lambda x, y: cmp(x[1]+x[2], y[1]+y[2]))
+ for (node, inc, outg) in sorted:
+ print " {} [{} in, {} out]".format(node, inc, outg)
+ sys.stdout.flush()
+pass \ No newline at end of file