doc: Expand recursive query documentation

author Peter Eisentraut

Tue, 13 Oct 2020 04:29:06 +0000 (06:29 +0200)

committer Peter Eisentraut

Tue, 13 Oct 2020 04:54:20 +0000 (06:54 +0200)
author Peter Eisentraut
Tue, 13 Oct 2020 04:29:06 +0000 (06:29 +0200)
committer Peter Eisentraut
Tue, 13 Oct 2020 04:54:20 +0000 (06:54 +0200)
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml

index bad97a75b2759cc57bb4ecc1f61c3d5da89f3478..f06afe2c3fb1df39cf1f971c4f1d5f4bf505b14d 100644 (file)
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -2011,6 +2011,10 @@ GROUP BY region, product;
     but we'd have needed two levels of nested sub-SELECTs.  It's a bit
     easier to follow this way.
    
+ 
+
+ 
+  Recursive Queries
  
    
     
@@ -2114,6 +2118,120 @@ GROUP BY sub_part
  
    
  
+  
+   Search Order
+
+   
+    When computing a tree traversal using a recursive query, you might want to
+    order the results in either depth-first or breadth-first order.  This can
+    be done by computing an ordering column alongside the other data columns
+    and using that to sort the results at the end.  Note that this does not
+    actually control in which order the query evaluation visits the rows; that
+    is as always in SQL implementation-dependent.  This approach merely
+    provides a convenient way to order the results afterwards.
+   
+
+   
+    To create a depth-first order, we compute for each result row an array of
+    rows that we have visited so far.  For example, consider the following
+    query that searches a table tree using a
+    link field:
+
+
+WITH RECURSIVE search_tree(id, link, data) AS (
+    SELECT t.id, t.link, t.data
+    FROM tree t
+  UNION ALL
+    SELECT t.id, t.link, t.data
+    FROM tree t, search_tree st
+    WHERE t.id = st.link
+)
+SELECT * FROM search_tree;
+
+
+    To add depth-first ordering information, you can write this:
+
+
+WITH RECURSIVE search_tree(id, link, data, path) AS (
+    SELECT t.id, t.link, t.data, ARRAY[t.id]
+    FROM tree t
+  UNION ALL
+    SELECT t.id, t.link, t.data, path || t.id
+    FROM tree t, search_tree st
+    WHERE t.id = st.link
+)
+SELECT * FROM search_tree ORDER BY path;
+
+   
+
+   
+    In the general case where more than one field needs to be used to identify
+    a row, use an array of rows.  For example, if we needed to track fields
+    f1 and f2:
+
+
+WITH RECURSIVE search_tree(id, link, data, path) AS (
+    SELECT t.id, t.link, t.data, ARRAY[ROW(t.f1, t.f2)]
+    FROM tree t
+  UNION ALL
+    SELECT t.id, t.link, t.data, path || ROW(t.f1, t.f2)
+    FROM tree t, search_tree st
+    WHERE t.id = st.link
+)
+SELECT * FROM search_tree ORDER BY path;
+
+   
+
+   
+    
+     The queries shown in this and the following section involving
+     ROW constructors in the target list only support
+     UNION ALL (not plain UNION) in the
+     current implementation.
+    
+   
+
+   
+    
+     Omit the ROW() syntax in the common case where only one
+     field needs to be tracked.  This allows a simple array rather than a
+     composite-type array to be used, gaining efficiency.
+    
+   
+
+   
+    To create a breadth-first order, you can add a column that tracks the depth
+    of the search, for example:
+
+
+WITH RECURSIVE search_tree(id, link, data, depth) AS (
+    SELECT t.id, t.link, t.data, 0
+    FROM tree t
+  UNION ALL
+    SELECT t.id, t.link, t.data, depth + 1
+    FROM tree t, search_tree st
+    WHERE t.id = st.link
+)
+SELECT * FROM search_tree ORDER BY depth;
+
+
+    To get a stable sort, add data columns as secondary sorting columns.
+   
+
+   
+    
+     The recursive query evaluation algorithm produces its output in
+     breadth-first search order.  However, this is an implementation detail and
+     it is perhaps unsound to rely on it.  The order of the rows within each
+     level is certainly undefined, so some explicit ordering might be desired
+     in any case.
+    
+   
+  
+
+  
+   Cycle Detection
+
    
     When working with recursive queries it is important to be sure that
     the recursive part of the query will eventually return no tuples,
@@ -2123,13 +2241,13 @@ GROUP BY sub_part
     cycle does not involve output rows that are completely duplicate: it may be
     necessary to check just one or a few fields to see if the same point has
     been reached before.  The standard method for handling such situations is
-   to compute an array of the already-visited values.  For example, consider
+   to compute an array of the already-visited values.  For example, consider again
     the following query that searches a table graph using a
     link field:
  
  
  WITH RECURSIVE search_graph(id, link, data, depth) AS (
-    SELECT g.id, g.link, g.data, 1
+    SELECT g.id, g.link, g.data, 0
      FROM graph g
    UNION ALL
      SELECT g.id, g.link, g.data, sg.depth + 1
@@ -2147,17 +2265,17 @@ SELECT * FROM search_graph;
     is_cycle and path to the loop-prone query:
  
  
-WITH RECURSIVE search_graph(id, link, data, depth, is_cycle, path) AS (
-    SELECT g.id, g.link, g.data, 1,
-      false,
-      ARRAY[g.id]
+WITH RECURSIVE search_graph(id, link, data, depth, is_cycle, path) AS (
+    SELECT g.id, g.link, g.data, 0,
+      false,
+      ARRAY[g.id]
      FROM graph g
    UNION ALL
      SELECT g.id, g.link, g.data, sg.depth + 1,
-      g.id = ANY(path),
-      path || g.id
+      g.id = ANY(path),
+      path || g.id
      FROM graph g, search_graph sg
-    WHERE g.id = sg.link AND NOT is_cycle
+    WHERE g.id = sg.link AND NOT is_cycle
  )
  SELECT * FROM search_graph;
  
@@ -2172,17 +2290,17 @@ SELECT * FROM search_graph;
     compare fields f1 and f2:
  
  
-WITH RECURSIVE search_graph(id, link, data, depth, is_cycle, path) AS (
-    SELECT g.id, g.link, g.data, 1,
-      false,
-      ARRAY[ROW(g.f1, g.f2)]
+WITH RECURSIVE search_graph(id, link, data, depth, is_cycle, path) AS (
+    SELECT g.id, g.link, g.data, 0,
+      false,
+      ARRAY[ROW(g.f1, g.f2)]
      FROM graph g
    UNION ALL
      SELECT g.id, g.link, g.data, sg.depth + 1,
-      ROW(g.f1, g.f2) = ANY(path),
-      path || ROW(g.f1, g.f2)
+      ROW(g.f1, g.f2) = ANY(path),
+      path || ROW(g.f1, g.f2)
      FROM graph g, search_graph sg
-    WHERE g.id = sg.link AND NOT is_cycle
+    WHERE g.id = sg.link AND NOT is_cycle
  )
  SELECT * FROM search_graph;
  
@@ -2198,10 +2316,8 @@ SELECT * FROM search_graph;
  
    
     
-    The recursive query evaluation algorithm produces its output in
-    breadth-first search order.  You can display the results in depth-first
-    search order by making the outer query ORDER BY a
-    path column constructed in this way.
+    The cycle path column is computed in the same way as the depth-first
+    ordering column show in the previous section.
     
    
  
@@ -2217,7 +2333,7 @@ WITH RECURSIVE t(n) AS (
    UNION ALL
      SELECT n+1 FROM t
  )
-SELECT n FROM t LIMIT 100;
+SELECT n FROM t LIMIT 100;
  
  
     This works because PostgreSQL's implementation
@@ -2229,6 +2345,11 @@ SELECT n FROM t LIMIT 100;
     outer query will usually try to fetch all of the WITH query's
     output anyway.
    
+  
+ 
+
+ 
+  Common Table Expression Materialization
  
    
     A useful property of WITH queries is that they are
author	Peter Eisentraut
	Tue, 13 Oct 2020 04:29:06 +0000 (06:29 +0200)
committer	Peter Eisentraut
	Tue, 13 Oct 2020 04:54:20 +0000 (06:54 +0200)