When re-reading an update involving both an old tuple and a new tuple from
disk, reorderbuffer.c was careless about whether the new tuple is suitably
aligned for direct access --- in general, it isn't. We'd missed seeing
this in the buildfarm because the contrib/test_decoding tests exercise this
code path only a few times, and by chance all of those cases have old
tuples with length a multiple of 4, which is usually enough to make the
access to the new tuple's t_len safe. For some still-not-entirely-clear
reason, however, Debian's sparc build gets a bus error, as reported by
Christoph Berg; perhaps it's assuming 8-byte alignment of the pointer?
The lack of previous field reports is probably because you need all of
these conditions to trigger a crash: an alignment-picky platform (not
Intel), a transaction large enough to spill to disk, an update within
that xact that changes a primary-key field and has an odd-length old tuple,
and of course logical decoding tracing the transaction.
Avoid the alignment assumption by using memcpy instead of fetching t_len
directly, and add a test case that exposes the crash on picky platforms.
Back-patch to 9.4 where the bug was introduced.
Discussion: <
20160413094117[email protected]>
INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i);
DELETE FROM tr_etoomuch WHERE id < 5000;
UPDATE tr_etoomuch SET data = - data WHERE id > 5000;
+CREATE TABLE tr_oddlength (id text primary key, data text);
+INSERT INTO tr_oddlength VALUES('ab', 'foo');
COMMIT;
/* display results, but hide most of the output */
SELECT count(*), min(data), max(data)
FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
GROUP BY substring(data, 1, 24)
ORDER BY 1,2;
- count | min | max
--------+-------------------------------------------------+------------------------------------------------------------------------
- 1 | BEGIN | BEGIN
- 1 | COMMIT | COMMIT
- 20467 | table public.tr_etoomuch: DELETE: id[integer]:1 | table public.tr_etoomuch: UPDATE: id[integer]:9999 data[integer]:-9999
-(3 rows)
+ count | min | max
+-------+-------------------------------------------------------------------+------------------------------------------------------------------------
+ 1 | BEGIN | BEGIN
+ 1 | COMMIT | COMMIT
+ 1 | table public.tr_oddlength: INSERT: id[text]:'ab' data[text]:'foo' | table public.tr_oddlength: INSERT: id[text]:'ab' data[text]:'foo'
+ 20467 | table public.tr_etoomuch: DELETE: id[integer]:1 | table public.tr_etoomuch: UPDATE: id[integer]:9999 data[integer]:-9999
+(4 rows)
-- check updates of primary keys work correctly
BEGIN;
CREATE TABLE spoolme AS SELECT g.i FROM generate_series(1, 5000) g(i);
UPDATE tr_etoomuch SET id = -id WHERE id = 5000;
+UPDATE tr_oddlength SET id = 'x', data = 'quux';
+UPDATE tr_oddlength SET id = 'yy', data = 'a';
DELETE FROM spoolme;
DROP TABLE spoolme;
COMMIT;
data
-------------------------------------------------------------------------------------------------------------
table public.tr_etoomuch: UPDATE: old-key: id[integer]:5000 new-tuple: id[integer]:-5000 data[integer]:5000
-(1 row)
+ table public.tr_oddlength: UPDATE: old-key: id[text]:'ab' new-tuple: id[text]:'x' data[text]:'quux'
+ table public.tr_oddlength: UPDATE: old-key: id[text]:'x' new-tuple: id[text]:'yy' data[text]:'a'
+(3 rows)
/*
* check whether we decode subtransactions correctly in relation with each
INSERT INTO tr_etoomuch(data) SELECT g.i FROM generate_series(1, 10234) g(i);
DELETE FROM tr_etoomuch WHERE id < 5000;
UPDATE tr_etoomuch SET data = - data WHERE id > 5000;
+CREATE TABLE tr_oddlength (id text primary key, data text);
+INSERT INTO tr_oddlength VALUES('ab', 'foo');
COMMIT;
/* display results, but hide most of the output */
BEGIN;
CREATE TABLE spoolme AS SELECT g.i FROM generate_series(1, 5000) g(i);
UPDATE tr_etoomuch SET id = -id WHERE id = 5000;
+UPDATE tr_oddlength SET id = 'x', data = 'quux';
+UPDATE tr_oddlength SET id = 'yy', data = 'a';
DELETE FROM spoolme;
DROP TABLE spoolme;
COMMIT;
/*
* Convert change from its on-disk format to in-memory format and queue it onto
* the TXN's ->changes list.
+ *
+ * Note: although "data" is declared char*, at entry it points to a
+ * maxalign'd buffer, making it safe in most of this function to assume
+ * that the pointed-to data is suitably aligned for direct access.
*/
static void
ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
case REORDER_BUFFER_CHANGE_DELETE:
if (change->data.tp.oldtuple)
{
- Size tuplelen = ((HeapTuple) data)->t_len;
+ uint32 tuplelen = ((HeapTuple) data)->t_len;
change->data.tp.oldtuple =
ReorderBufferGetTupleBuf(rb, tuplelen - offsetof(HeapTupleHeaderData, t_bits));
if (change->data.tp.newtuple)
{
- Size tuplelen = ((HeapTuple) data)->t_len;
+ /* here, data might not be suitably aligned! */
+ uint32 tuplelen;
+
+ memcpy(&tuplelen, data + offsetof(HeapTupleData, t_len),
+ sizeof(uint32));
change->data.tp.newtuple =
ReorderBufferGetTupleBuf(rb, tuplelen - offsetof(HeapTupleHeaderData, t_bits));