doc: improve build for non-Latin1 characters

author Bruce Momjian

Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)

committer Bruce Momjian

Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)
author Bruce Momjian
Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)
committer Bruce Momjian
Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile

index 65ed32cd0aba7cd7ee563c23953a1f2201db2ade..12f506c9602580fbba035c79451ab7536cab3889 100644 (file)
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -59,7 +59,7 @@ GENERATED_SGML = version.sgml \
     features-supported.sgml features-unsupported.sgml errcodes-table.sgml \
     keywords-table.sgml targets-meson.sgml wait_event_types.sgml
  
-ALLSGML := $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml) $(GENERATED_SGML)
+ALL_SGML := $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml) $(GENERATED_SGML)
  
  ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
  
@@ -68,7 +68,7 @@ ALL_IMAGES := $(wildcard $(srcdir)/images/*.svg)
  # we're at it, also resolve all entities (that is, copy all included
  # files into one big file).  This helps tools that don't understand
  # vpath builds (such as dbtoepub).
-postgres-full.xml: postgres.sgml $(ALLSGML)
+postgres-full.xml: postgres.sgml $(ALL_SGML)
     $(XMLLINT) $(XMLINCLUDE) --output $@ --noent --valid $<
  
  
@@ -143,11 +143,12 @@ postgres.txt: postgres.html
  ## Print
  ##
  
-postgres.pdf:
+postgres.pdf pdf:
     $(error Invalid target;  use postgres-A4.pdf or postgres-US.pdf as targets)
  
  XSLTPROC_FO_FLAGS += --stringparam img.src.path '$(srcdir)/'
  
+# XSL Formatting Objects (FO), https://en.wikipedia.org/wiki/XSL_Formatting_Objects
  %-A4.fo: stylesheet-fo.xsl %-full.xml
     $(XSLTPROC) $(XMLINCLUDE) $(XSLTPROCFLAGS) $(XSLTPROC_FO_FLAGS) --stringparam paper.type A4 -o $@ $^
  
@@ -194,7 +195,7 @@ MAKEINFO = makeinfo
  ##
  
  # Quick syntax check without style processing
-check: postgres.sgml $(ALLSGML) check-tabs check-nbsp
+check: postgres.sgml $(ALL_SGML) check-tabs check-nbsp
     $(XMLLINT) $(XMLINCLUDE) --noout --valid $<
  
  
@@ -264,7 +265,7 @@ check-tabs:
  # Use perl command because non-GNU grep or sed could not have hex escape sequence.
  check-nbsp:
     @ ( $(PERL) -ne '/\xC2\xA0/ and print("$$ARGV:$$_"),$$n++; END {exit($$n>0)}' \
-     $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/*.xsl) ) || \
+     $(wildcard $(srcdir)/*.sgml $(srcdir)/ref/*.sgml $(srcdir)/images/*.svg $(srcdir)/*.xsl $(srcdir)/images/*.xsl) ) || \
     (echo "Non-breaking spaces appear in SGML/XML files" 1>&2;  exit 1)
  
  ##
diff --git a/doc/src/sgml/README.non-ASCII b/doc/src/sgml/README.non-ASCII

new file mode 100644 (file)

index 0000000..9c21e02
--- /dev/null
+++ b/doc/src/sgml/README.non-ASCII
@@ -0,0 +1,37 @@
+
+
+Representation of non-ASCII characters
+--------------------------------------
+
+Find non-ASCII characters using:
+
+        grep --recursive --color='auto' -P '[\x80-\xFF]' .
+
+Convert to HTML4 named entity (&) escapes
+-----------------------------------------
+
+We support several output formats:
+
+*  html (supports all Unicode characters)
+*  man (supports all Unicode characters)
+*  pdf (supports only Latin-1 characters)
+*  info
+
+While some output formatting tools support all Unicode characters,
+others only support Latin-1 characters.  Specifically, the PDF rendering
+engine can only display Latin-1 characters;  non-Latin-1 Unicode
+characters are displayed as "###".
+
+Therefore, in the SGML files, we only use Latin-1 characters.  We
+typically encode these characters as HTML entities, e.g., Álvaro.
+It is also possible to safely represent Latin-1 characters in UTF8
+encoding for all output formats.
+
+Do not use UTF numeric character escapes (&#nnn;).
+
+HTML entities
+        official:      http://www.w3.org/TR/html4/sgml/entities.html
+        one page:      http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html
+        other lists:   http://www.zipcon.net/~swhite/docs/computers/browsers/entities.html
+                       http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html
+                       https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml

index 1ef5322b912ee6f5e775cbd3859f34a99685d3e9..f5e115e8d6e6a2795eb8857f5cd8a9ab41e276a5 100644 (file)
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -1225,7 +1225,7 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
  
  -- ignore differences in accents and case
  CREATE COLLATION ignore_accent_case (provider = icu, deterministic = false, locale = 'und-u-ks-level1');
-SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true
+SELECT 'Å' = 'A' COLLATE ignore_accent_case; -- true
  SELECT 'z' = 'Z' COLLATE ignore_accent_case; -- true
  
  -- upper case letters sort before lower case.
@@ -1282,7 +1282,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
           'ab' = U&'a\2063b'
           'x-y' = 'x_y'
           'g' = 'G'
-         'n' = 'ñ'
+         'n' = 'ñ'
           'y' = 'z'
          
         
@@ -1346,7 +1346,7 @@ SELECT 'w;x*y-z' = 'wxyz' COLLATE num_ignore_punct; -- true
  
      
       At every level, even with full normalization off, basic normalization is
-     performed. For example, 'á' may be composed of the
+     performed. For example, 'á' may be composed of the
       code points U&'\0061\0301' or the single code
       point U&'\00E1', and those sequences will be
       considered equal even at the identic level. To treat
@@ -1430,8 +1430,8 @@ SELECT 'x-y' = 'x_y' COLLATE level4; -- false
           false
           
            Backwards comparison for the level 2 differences. For example,
-          locale und-u-kb sorts 'àe'
-          before 'aé'.
+          locale und-u-kb sorts 'àe'
+          before 'aé'.
           
          
  
diff --git a/doc/src/sgml/images/genetic-algorithm.svg b/doc/src/sgml/images/genetic-algorithm.svg

index fb9fdd1ba78081d968b1a1ddbfe8b13a4abaecf7..2ce5f1b27125e8bb98e44d1521e7e0a12d3432b8 100644 (file)
--- a/doc/src/sgml/images/genetic-algorithm.svg
+++ b/doc/src/sgml/images/genetic-algorithm.svg
@@ -72,7 +72,7 @@
  a4->end
  
  
-true  
+true
  
  
  
@@ -85,7 +85,7 @@
  a4->a5
  
  
-false   
+false
  
  
  
diff --git a/doc/src/sgml/release.sgml b/doc/src/sgml/release.sgml

index 8433690dead42e199d210e61fe08b15b227fd9be..cee577ff8d353960d95af8fa1f17286fb42e588a 100644 (file)
--- a/doc/src/sgml/release.sgml
+++ b/doc/src/sgml/release.sgml
@@ -16,24 +16,6 @@ pg_[A-Za-z0-9_]+                , 
  \<[a-z]+_[a-z_]+\>              , 
                                  
  
-non-ASCII characters            find using grep -P '[\x80-\xFF]' or
-                                  (remove 'X') grep -X-color='auto' -P -n "[\x80-\xFF]"
-                                convert to HTML4 named entity (&) escapes
-
-        official:      http://www.w3.org/TR/html4/sgml/entities.html
-        one page:      http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html
-        other lists:   http://www.zipcon.net/~swhite/docs/computers/browsers/entities.html
-                       http://www.zipcon.net/~swhite/docs/computers/browsers/entities_page.html
-                       https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
-
-        We cannot use UTF8 because rendering engines have to
-        support the referenced characters.
-
-        Do not use numeric _UTF_ numeric character escapes (&#nnn;),
-        we can only use Latin1.
-
-        Example: Alvaro Herrera is Álvaro Herrera
-
  wrap long lines
  
  For new features, add links to the documentation sections.
diff --git a/doc/src/sgml/stylesheet-man.xsl b/doc/src/sgml/stylesheet-man.xsl

index fcb485c29319d6a5205ccafbf1a27b11c53615e8..2e2564da683b48346b0c1d25fc4d594cc8c54a4f 100644 (file)
--- a/doc/src/sgml/stylesheet-man.xsl
+++ b/doc/src/sgml/stylesheet-man.xsl
@@ -213,12 +213,12 @@
      
      
-      %t, in the documentation"/>
-      “%t”, in the documentation"/>
-      “%t”, in the documentation"/>
-      “%t”, in the documentation"/>
-      “%t”, in the documentation"/>
-      “%t”, in the documentation"/>
+      "%t", in the documentation"/>
+      "%t", in the documentation"/>
+      "%t", in the documentation"/>
+      "%t", in the documentation"/>
+      "%t", in the documentation"/>
+      "%t", in the documentation"/>
author	Bruce Momjian
	Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)
committer	Bruce Momjian
	Fri, 1 Nov 2024 16:46:51 +0000 (12:46 -0400)
doc/src/sgml/Makefile		patch \| blob \| blame \| history
doc/src/sgml/README.non-ASCII	[new file with mode: 0644]	patch \| blob
doc/src/sgml/charset.sgml		patch \| blob \| blame \| history
doc/src/sgml/images/genetic-algorithm.svg		patch \| blob \| blame \| history
doc/src/sgml/release.sgml		patch \| blob \| blame \| history
doc/src/sgml/stylesheet-man.xsl		patch \| blob \| blame \| history