+ if (b2c3[i][1] == cns) {
+ return(b2c3[i][0]);
+ }
+ }
+ break;
+ case LC_CNS11643_4:
+ for (i=0;i
+ if (b1c4[i][1] == cns) {
+ return(b1c4[i][0]);
+ }
+ }
+ default:
+ break;
+ }
+ return big5;
+}
* conversion between client encoding and server internal encoding
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
- * $Id: conv.c,v 1.4 1998/12/14 04:59:58 momjian Exp $
+ * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
*/
#include
#include
*p = '\0';
}
+/*
+ * Big5 ---> MIC
+ */
+static void
+big52mic(unsigned char *big5, unsigned char *p, int len)
+{
+ unsigned short c1;
+ unsigned short big5buf, cnsBuf;
+ unsigned char lc;
+ char bogusBuf[2];
+ int i;
+
+ while (len > 0 && (c1 = *big5++))
+ {
+ if (c1 <= 0x007fU) { /* ASCII */
+ len--;
+ *p++ = c1;
+ } else {
+ len -= 2;
+ big5buf = c1 << 8;
+ c1 = *big5++;
+ big5buf |= c1;
+ cnsBuf = BIG5toCNS(big5buf, &lc);
+ if (lc != 0) {
+ if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) {
+ *p++ = 0x9d; /* LCPRV2 */
+ }
+ *p++ = lc; /* Plane No. */
+ *p++ = (cnsBuf >> 8) & 0x00ff;
+ *p++ = cnsBuf & 0x00ff;
+ } else { /* cannot convert */
+ big5 -= 2;
+ *p++ = '(';
+ for (i=0;i<2;i++) {
+ sprintf(bogusBuf,"%02x",*big5++);
+ *p++ = bogusBuf[0];
+ *p++ = bogusBuf[1];
+ }
+ *p++ = ')';
+ }
+ }
+ }
+ *p = '\0';
+}
+
+/*
+ * MIC ---> Big5
+ */
+static void
+mic2big5(unsigned char *mic, unsigned char *p, int len)
+{
+ int l;
+ unsigned short c1;
+ unsigned short big5buf, cnsBuf;
+
+ while (len > 0 && (c1 = *mic))
+ {
+ l = pg_mic_mblen(mic++);
+ len -= l;
+
+ /* 0x9d means LCPRV2 */
+ if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
+ {
+ if (c1 == 0x9d) {
+ c1 = *mic++; /* get plane no. */
+ }
+ cnsBuf = (*mic++)<<8;
+ cnsBuf |= (*mic++) & 0x00ff;
+ big5buf = CNStoBIG5(cnsBuf, c1);
+ if (big5buf == 0) { /* cannot convert to Big5! */
+ mic -= l;
+ printBogusChar(&mic, &p);
+ } else {
+ *p++ = (big5buf >> 8) & 0x00ff;
+ *p++ = big5buf & 0x00ff;
+ }
+ }
+ else if (c1 <= 0x7f) /* ASCII */
+ {
+ *p++ = c1;
+ } else { /* cannot convert to Big5! */
+ mic--;
+ printBogusChar(&mic, &p);
+ }
+ }
+ *p = '\0';
+}
+
/*
* LATINn ---> MIC
*/
{LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */
{LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */
+ {BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */
{-1, "", 0, 0, 0} /* end mark */
};
* client encoding and server internal encoding.
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
- * $Id: mbutils.c,v 1.4 1998/09/25 01:46:23 momjian Exp $ */
+ * $Id: mbutils.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ */
#include
#include
#include "mb/pg_wchar.h"
-static client_encoding = -1;
+static int client_encoding = -1;
static void (*client_to_mic) ();/* something to MIC */
static void (*client_from_mic) (); /* MIC to something */
static void (*server_to_mic) ();/* something to MIC */
/*
* conversion functions between pg_wchar and multi-byte streams.
* Tatsuo Ishii
- * $Id: wchar.c,v 1.4 1998/09/01 04:33:26 momjian Exp $
+ * $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
*/
#include "mb/pg_wchar.h"
return (len);
}
+/*
+ * Big5
+ */
+static int
+pg_big5_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s > 0x7f)
+ { /* kanji? */
+ len = 2;
+ }
+ else
+ { /* should be ASCII */
+ len = 1;
+ }
+ return (len);
+}
+
pg_wchar_tbl pg_wchar_table[] = {
{pg_ascii2wchar_with_len, pg_ascii_mblen},
{pg_eucjp2wchar_with_len, pg_eucjp_mblen},
{0, 0},
{0, 0},
{0, 0},
- {0, pg_sjis_mblen}
+ {0, pg_sjis_mblen},
+ {0, pg_big5_mblen}
};
/* returns the byte length of a word for mule internal code */
*
* Copyright (c) 1994, Regents of the University of California
*
- * $Id: psqlHelp.h,v 1.57 1999/02/02 18:41:17 momjian Exp $
+ * $Id: psqlHelp.h,v 1.58 1999/02/02 18:51:24 momjian Exp $
*
*-------------------------------------------------------------------------
*/
set GEQO TO 'ON[=#]'|'OFF'\n\
set R_PLANS TO 'ON'|'OFF'\n\
set QUERY_LIMIT TO #\n\
-set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"},
+set CLIENT_ENCODING TO 'EUC_JP'|'SJIS'|'EUC_CN'|'EUC_KR'|'EUC_TW'|'BIG5'|'MULE_INTERNAL'|'LATIN1'|'LATIN2'|'LATIN3'|'LATIN4'|'LATIN5'"},
#else
"\
\tSET DateStyle TO 'ISO'|'SQL'|'Postgres'|'European'|'US'|'NonEuropean'\n\
-/* $Id: pg_wchar.h,v 1.5 1998/09/25 01:46:25 momjian Exp $ */
+/* $Id: pg_wchar.h,v 1.6 1999/02/02 18:51:25 momjian Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
#define EUC_JP 1 /* EUC for Japanese */
#define EUC_CN 2 /* EUC for Chinese */
#define EUC_KR 3 /* EUC for Korean */
-#define EUC_TW 3 /* EUC for Taiwan */
+#define EUC_TW 4 /* EUC for Taiwan */
#define UNICODE 5 /* Unicode UTF-8 */
#define MULE_INTERNAL 6 /* Mule internal code */
#define LATIN1 7 /* ISO-8859 Latin 1 */
#define LATIN9 15 /* ISO-8859 Latin 9 */
/* followings are for client encoding only */
#define SJIS 32 /* Shift JIS */
+#define BIG5 33 /* Big5 */
#ifdef MULTIBYTE
typedef unsigned int pg_wchar;
extern void SetDatabaseEncoding(int);
extern void SetTemplateEncoding(int);
extern int GetTemplateEncoding(void);
+extern unsigned short BIG5toCNS(unsigned short, unsigned char *);
+extern unsigned short CNStoBIG5(unsigned short, unsigned char);
#endif /* MULTIBYTE */
# Copyright (c) 1994, Regents of the University of California
#
# IDENTIFICATION
-# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.42 1999/01/17 06:19:34 momjian Exp $
+# $Header: /cvsroot/pgsql/src/interfaces/libpq/Attic/Makefile.in,v 1.43 1999/02/02 18:51:29 momjian Exp $
#
#-------------------------------------------------------------------------
dllist.o pqsignal.o
ifdef MULTIBYTE
-OBJS+= common.o wchar.o conv.o
+OBJS+= common.o wchar.o conv.o big5.o
endif
# If crypt is a separate library, rather than part of libc,
conv.c: $(SRCDIR)/backend/utils/mb/conv.c
-$(LN_S) $(SRCDIR)/backend/utils/mb/conv.c .
+
+big5.c: $(SRCDIR)/backend/utils/mb/big5.c
+ -$(LN_S) $(SRCDIR)/backend/utils/mb/big5.c .
endif
# The following rules cause dependencies in the backend directory to
.PHONY: clean
clean:
rm -f libpq.a $(shlib) $(OBJS)
- rm -f dllist.c common.c wchar.c conv.c
+ rm -f dllist.c common.c wchar.c conv.c big5.c
ifeq ($(PORTNAME), win)
rm -f pq.def
endif
--- /dev/null
+QUERY: drop table ¼t°Ó¸ê®Æ;
+QUERY: create table ¼t°Ó¸ê®Æ (¦æ·~§O text, ¤½¥q©ïÀY varchar, ¦a§} varchar(16));
+QUERY: create index ¼t°Ó¸ê®Æindex1 on ¼t°Ó¸ê®Æ using btree (¦æ·~§O);
+QUERY: create index ¼t°Ó¸ê®Æindex2 on ¼t°Ó¸ê®Æ using hash (¤½¥q©ïÀY);
+QUERY: insert into ¼t°Ó¸ê®Æ values ('¹q¸£·~', '¹F¹F¬ì§Þ', '¥_A01¤¯');
+QUERY: insert into ¼t°Ó¸ê®Æ values ('»s³y·~', '°]·½¦³¤½¥q', '¤¤B10¤¤');
+QUERY: insert into ¼t°Ó¸ê®Æ values ('À\¶¼·~', '¬ü¨ýªÑ¥÷¦³¤½¥q', '°ªZ01¤E');
+QUERY: vacuum ¼t°Ó¸ê®Æ;
+QUERY: select * from ¼t°Ó¸ê®Æ;
+¦æ·~§O|¤½¥q©ïÀY |¦a§}
+------+----------------+-------
+¹q¸£·~|¹F¹F¬ì§Þ |¥_A01¤¯
+»s³y·~|°]·½¦³¤½¥q |¤¤B10¤¤
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E
+(3 rows)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¦a§} = '°ªZ01¤E';
+¦æ·~§O|¤½¥q©ïÀY |¦a§}
+------+----------------+-------
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E
+(1 row)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¦a§} ~* '°ªz01¤E';
+¦æ·~§O|¤½¥q©ïÀY |¦a§}
+------+----------------+-------
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E
+(1 row)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¦a§} like '_Z01_';
+¦æ·~§O|¤½¥q©ïÀY |¦a§}
+------+----------------+-------
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E
+(1 row)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¦a§} like '_Z%';
+¦æ·~§O|¤½¥q©ïÀY |¦a§}
+------+----------------+-------
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E
+(1 row)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¤½¥q©ïÀY ~ '¹F¹F¬ì[±H°O§Þ]';
+¦æ·~§O|¤½¥q©ïÀY|¦a§}
+------+--------+-------
+¹q¸£·~|¹F¹F¬ì§Þ|¥_A01¤¯
+(1 row)
+
+QUERY: select * from ¼t°Ó¸ê®Æ where ¤½¥q©ïÀY ~* '¹F¹F¬ì[±H°O§Þ]';
+¦æ·~§O|¤½¥q©ïÀY|¦a§}
+------+--------+-------
+¹q¸£·~|¹F¹F¬ì§Þ|¥_A01¤¯
+(1 row)
+
+QUERY: select *, character_length(¦æ·~§O) from ¼t°Ó¸ê®Æ;
+¦æ·~§O|¤½¥q©ïÀY |¦a§} |length
+------+----------------+-------+------
+¹q¸£·~|¹F¹F¬ì§Þ |¥_A01¤¯| 3
+»s³y·~|°]·½¦³¤½¥q |¤¤B10¤¤| 3
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E| 3
+(3 rows)
+
+QUERY: select *, octet_length(¦æ·~§O) from ¼t°Ó¸ê®Æ;
+¦æ·~§O|¤½¥q©ïÀY |¦a§} |octet_length
+------+----------------+-------+------------
+¹q¸£·~|¹F¹F¬ì§Þ |¥_A01¤¯| 6
+»s³y·~|°]·½¦³¤½¥q |¤¤B10¤¤| 6
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E| 6
+(3 rows)
+
+QUERY: select *, position('¦³' in ¤½¥q©ïÀY) from ¼t°Ó¸ê®Æ;
+¦æ·~§O|¤½¥q©ïÀY |¦a§} |strpos
+------+----------------+-------+------
+¹q¸£·~|¹F¹F¬ì§Þ |¥_A01¤¯| 0
+»s³y·~|°]·½¦³¤½¥q |¤¤B10¤¤| 3
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E| 5
+(3 rows)
+
+QUERY: select *, substring(¤½¥q©ïÀY from 3 for 6 ) from ¼t°Ó¸ê®Æ;
+¦æ·~§O|¤½¥q©ïÀY |¦a§} |substr
+------+----------------+-------+------------
+¹q¸£·~|¹F¹F¬ì§Þ |¥_A01¤¯|¬ì§Þ
+»s³y·~|°]·½¦³¤½¥q |¤¤B10¤¤|¦³¤½¥q
+À\¶¼·~|¬ü¨ýªÑ¥÷¦³¤½¥q|°ªZ01¤E|ªÑ¥÷¦³¤½¥q
+(3 rows)
+
#! /bin/sh
-# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.2 1998/07/26 04:31:38 scrappy Exp $
+# $Header: /cvsroot/pgsql/src/test/mb/mbregress.sh,v 1.3 1999/02/02 18:51:32 momjian Exp $
if echo '\c' | grep -s c >/dev/null 2>&1
then
fi
PSQL="psql -n -e -q"
-tests="euc_jp sjis euc_kr euc_cn unicode mule_internal"
+tests="euc_jp sjis euc_kr euc_cn euc_tw big5 unicode mule_internal"
unset PGCLIENTENCODING
for i in $tests
do
export PGCLIENTENCODING
$PSQL euc_jp < sql/sjis.sql > results/sjis.out 2>&1
unset PGCLIENTENCODING
+ elif [ $i = big5 ];then
+ PGCLIENTENCODING=BIG5
+ export PGCLIENTENCODING
+ $PSQL euc_tw < sql/big5.sql > results/big5.out 2>&1
+ unset PGCLIENTENCODING
else
destroydb $i >/dev/null 2>&1
createdb -E `echo $i|tr "[a-z]" "[A-Z]"` $i
--- /dev/null
+drop table ¼t°Ó¸ê®Æ;
+create table ¼t°Ó¸ê®Æ (¦æ·~§O text, ¤½¥q©ïÀY varchar, ¦a§} varchar(16));
+create index ¼t°Ó¸ê®Æindex1 on ¼t°Ó¸ê®Æ using btree (¦æ·~§O);
+create index ¼t°Ó¸ê®Æindex2 on ¼t°Ó¸ê®Æ using hash (¤½¥q©ïÀY);
+insert into ¼t°Ó¸ê®Æ values ('¹q¸£·~', '¹F¹F¬ì§Þ', '¥_A01¤¯');
+insert into ¼t°Ó¸ê®Æ values ('»s³y·~', '°]·½¦³¤½¥q', '¤¤B10¤¤');
+insert into ¼t°Ó¸ê®Æ values ('À\¶¼·~', '¬ü¨ýªÑ¥÷¦³¤½¥q', '°ªZ01¤E');
+vacuum ¼t°Ó¸ê®Æ;
+select * from ¼t°Ó¸ê®Æ;
+select * from ¼t°Ó¸ê®Æ where ¦a§} = '°ªZ01¤E';
+select * from ¼t°Ó¸ê®Æ where ¦a§} ~* '°ªz01¤E';
+select * from ¼t°Ó¸ê®Æ where ¦a§} like '_Z01_';
+select * from ¼t°Ó¸ê®Æ where ¦a§} like '_Z%';
+select * from ¼t°Ó¸ê®Æ where ¤½¥q©ïÀY ~ '¹F¹F¬ì[±H°O§Þ]';
+select * from ¼t°Ó¸ê®Æ where ¤½¥q©ïÀY ~* '¹F¹F¬ì[±H°O§Þ]';
+
+select *, character_length(¦æ·~§O) from ¼t°Ó¸ê®Æ;
+select *, octet_length(¦æ·~§O) from ¼t°Ó¸ê®Æ;
+select *, position('¦³' in ¤½¥q©ïÀY) from ¼t°Ó¸ê®Æ;
+select *, substring(¤½¥q©ïÀY from 3 for 6 ) from ¼t°Ó¸ê®Æ;