Skip to content

Fix the DFS implementation of Topological Sort #12791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 111 additions & 38 deletions sorts/topological_sort.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,114 @@
"""Topological Sort."""

# a
# / \
# b c
# / \
# d e
edges: dict[str, list[str]] = {
"a": ["c", "b"],
"b": ["d", "e"],
"c": [],
"d": [],
"e": [],
}
vertices: list[str] = ["a", "b", "c", "d", "e"]


def topological_sort(start: str, visited: list[str], sort: list[str]) -> list[str]:
"""Perform topological sort on a directed acyclic graph."""
current = start
# add current to visited
visited.append(current)
neighbors = edges[current]
for neighbor in neighbors:
# if neighbor not in visited, visit
if neighbor not in visited:
sort = topological_sort(neighbor, visited, sort)
# if all neighbors visited add current to sort
sort.append(current)
# if all vertices haven't been visited select a new one to visit
if len(visited) != len(vertices):
for vertice in vertices:
if vertice not in visited:
sort = topological_sort(vertice, visited, sort)
# return sort
return sort
"""
Topological sorting for Directed Acyclic Graph (DAG) is an ordering of vertices
such that for every directed edge u -> v, vertex u comes before v in the ordering.
Source: https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
"""


def topological_sort(graph: dict[str, list[str]]) -> list[str]:
"""
Performs topological sorting on a directed acyclic graph (DAG).

Args:
graph: A dictionary representing the adjacency lists of the graph
where keys are nodes and values are lists of adjacent nodes.

Returns:
A list of nodes in topologically sorted order.

Raises:
ValueError: If the graph contains a cycle (topological sort not possible).

Examples:
>>> # Simple linear graph
>>> # 1 -> 2 -> 3
>>> topological_sort({'1': ['2'], '2': ['3'], '3': []})
['1', '2', '3']

>>> # Graph with multiple possible orderings
>>> # A
>>> # ↙ ↘
>>> # B C
>>> # ↙ ↘
>>> # D E
>>> graph = {
... 'A': ['B', 'C'],
... 'B': ['D', 'E'],
... 'C': [],
... 'D': [],
... 'E': []
... }
>>> import random
>>> adjacency_lists = list(graph.items())
>>> random.shuffle(adjacency_lists)
>>> graph = dict(adjacency_lists)
>>> result = topological_sort(graph)
>>> result in (
... ['A', 'B', 'C', 'D', 'E'],
... ['A', 'B', 'C', 'E', 'D'],
... ['A', 'B', 'D', 'C', 'E'],
... ['A', 'B', 'D', 'E', 'C'],
... ['A', 'B', 'E', 'C', 'D'],
... ['A', 'B', 'E', 'D', 'C'],
... ['A', 'C', 'B', 'D', 'E'],
... ['A', 'C', 'B', 'E', 'D']
... )
True

>>> # Empty graph
>>> topological_sort({})
[]

>>> # Graph with cycle (should raise error)
>>> topological_sort({'A': ['B'], 'B': ['C'], 'C': ['A']})
Traceback (most recent call last):
...
ValueError: Graph contains a cycle, topological sort not possible
"""

is_being_visited = set() # To track nodes in current path being visited through DFS
visited = set() # To track and efficiently lookup if a node has been fully visited
result = []

def visit(node: str) -> None:
is_being_visited.add(node)

for neighbor in graph.get(node, []):
if neighbor in visited:
continue

if neighbor in is_being_visited:
# If the 'neighbor' is already in 'is_being_visited',
# it means we have found a cycle.
raise ValueError(
"Graph contains a cycle, topological sort not possible"
)

visit(neighbor)

is_being_visited.remove(node)
visited.add(node)
# Fully visited nodes are supposed to be prepended to the result list
# But since prepending to python lists is O(n), i.e., costly,
# we will append them and reverse the result at the end.
result.append(node)

for node in graph:
if node not in visited:
visit(node)

return result[::-1] # Reverse the result to get the correct topological order


if __name__ == "__main__":
sort = topological_sort("a", [], [])
print(sort)
graph: dict[str, list[str]] = {
"A": ["B", "C"],
"B": ["D", "E"],
"C": [],
"D": [],
"E": [],
}

sorted_values = topological_sort(graph)

print(f"Sorted values: {sorted_values}")