diff --git a/configure b/configure
index a10a2c85c6a..a6eab396299 100755
--- a/configure
+++ b/configure
@@ -5616,15 +5616,15 @@ fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -Wimplicit-fallthrough=3, for CFLAGS" >&5
-$as_echo_n "checking whether ${CC} supports -Wimplicit-fallthrough=3, for CFLAGS... " >&6; }
-if ${pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CC} supports -Wimplicit-fallthrough=5, for CFLAGS" >&5
+$as_echo_n "checking whether ${CC} supports -Wimplicit-fallthrough=5, for CFLAGS... " >&6; }
+if ${pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CFLAGS=$CFLAGS
pgac_save_CC=$CC
CC=${CC}
-CFLAGS="${CFLAGS} -Wimplicit-fallthrough=3"
+CFLAGS="${CFLAGS} -Wimplicit-fallthrough=5"
ac_save_c_werror_flag=$ac_c_werror_flag
ac_c_werror_flag=yes
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -5639,31 +5639,31 @@ main ()
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
- pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3=yes
+ pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5=yes
else
- pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3=no
+ pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_c_werror_flag=$ac_save_c_werror_flag
CFLAGS="$pgac_save_CFLAGS"
CC="$pgac_save_CC"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3" >&5
-$as_echo "$pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3" >&6; }
-if test x"$pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_3" = x"yes"; then
- CFLAGS="${CFLAGS} -Wimplicit-fallthrough=3"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5" >&5
+$as_echo "$pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5" >&6; }
+if test x"$pgac_cv_prog_CC_cflags__Wimplicit_fallthrough_5" = x"yes"; then
+ CFLAGS="${CFLAGS} -Wimplicit-fallthrough=5"
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -Wimplicit-fallthrough=3, for CXXFLAGS" >&5
-$as_echo_n "checking whether ${CXX} supports -Wimplicit-fallthrough=3, for CXXFLAGS... " >&6; }
-if ${pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${CXX} supports -Wimplicit-fallthrough=5, for CXXFLAGS" >&5
+$as_echo_n "checking whether ${CXX} supports -Wimplicit-fallthrough=5, for CXXFLAGS... " >&6; }
+if ${pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5+:} false; then :
$as_echo_n "(cached) " >&6
else
pgac_save_CXXFLAGS=$CXXFLAGS
pgac_save_CXX=$CXX
CXX=${CXX}
-CXXFLAGS="${CXXFLAGS} -Wimplicit-fallthrough=3"
+CXXFLAGS="${CXXFLAGS} -Wimplicit-fallthrough=5"
ac_save_cxx_werror_flag=$ac_cxx_werror_flag
ac_cxx_werror_flag=yes
ac_ext=cpp
@@ -5684,9 +5684,9 @@ main ()
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
- pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3=yes
+ pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5=yes
else
- pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3=no
+ pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
ac_ext=c
@@ -5699,10 +5699,10 @@ ac_cxx_werror_flag=$ac_save_cxx_werror_flag
CXXFLAGS="$pgac_save_CXXFLAGS"
CXX="$pgac_save_CXX"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3" >&5
-$as_echo "$pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3" >&6; }
-if test x"$pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_3" = x"yes"; then
- CXXFLAGS="${CXXFLAGS} -Wimplicit-fallthrough=3"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5" >&5
+$as_echo "$pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5" >&6; }
+if test x"$pgac_cv_prog_CXX_cxxflags__Wimplicit_fallthrough_5" = x"yes"; then
+ CXXFLAGS="${CXXFLAGS} -Wimplicit-fallthrough=5"
fi
diff --git a/configure.ac b/configure.ac
index 814e64a967e..455ba31f1d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -556,8 +556,8 @@ if test "$GCC" = yes -a "$ICC" = no; then
PGAC_PROG_CXX_CFLAGS_OPT([-Wendif-labels])
PGAC_PROG_CC_CFLAGS_OPT([-Wmissing-format-attribute])
PGAC_PROG_CXX_CFLAGS_OPT([-Wmissing-format-attribute])
- PGAC_PROG_CC_CFLAGS_OPT([-Wimplicit-fallthrough=3])
- PGAC_PROG_CXX_CFLAGS_OPT([-Wimplicit-fallthrough=3])
+ PGAC_PROG_CC_CFLAGS_OPT([-Wimplicit-fallthrough=5])
+ PGAC_PROG_CXX_CFLAGS_OPT([-Wimplicit-fallthrough=5])
PGAC_PROG_CC_CFLAGS_OPT([-Wcast-function-type])
PGAC_PROG_CXX_CFLAGS_OPT([-Wcast-function-type])
PGAC_PROG_CC_CFLAGS_OPT([-Wshadow=compatible-local])
diff --git a/contrib/Makefile b/contrib/Makefile
index 2f0a88d3f77..dd04c20acd2 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -34,6 +34,7 @@ SUBDIRS = \
pg_freespacemap \
pg_logicalinspect \
pg_overexplain \
+ pg_plan_advice \
pg_prewarm \
pg_stat_statements \
pg_surgery \
diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 30c2f583173..31e19fbc697 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -73,7 +73,7 @@ typedef enum SkipPages
*/
typedef struct ToastedAttribute
{
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
BlockNumber blkno; /* block in main table */
OffsetNumber offnum; /* offset in main table */
AttrNumber attnum; /* attribute in main table */
@@ -1660,11 +1660,11 @@ static bool
check_tuple_attribute(HeapCheckContext *ctx)
{
Datum attdatum;
- struct varlena *attr;
+ varlena *attr;
char *tp; /* pointer to the tuple data */
uint16 infomask;
CompactAttribute *thisatt;
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
infomask = ctx->tuphdr->t_infomask;
thisatt = TupleDescCompactAttr(RelationGetDescr(ctx->rel), ctx->attnum);
@@ -1754,7 +1754,7 @@ check_tuple_attribute(HeapCheckContext *ctx)
* We go further, because we need to check if the toast datum is corrupt.
*/
- attr = (struct varlena *) DatumGetPointer(attdatum);
+ attr = (varlena *) DatumGetPointer(attdatum);
/*
* Now we follow the logic of detoast_external_attr(), with the same
diff --git a/contrib/btree_gin/btree_gin.c b/contrib/btree_gin/btree_gin.c
index afb8b3820af..8dfbaa4781d 100644
--- a/contrib/btree_gin/btree_gin.c
+++ b/contrib/btree_gin/btree_gin.c
@@ -120,7 +120,7 @@ gin_btree_extract_query(FunctionCallInfo fcinfo,
case BTGreaterEqualStrategyNumber:
case BTGreaterStrategyNumber:
*ptr_partialmatch = true;
- /* FALLTHROUGH */
+ pg_fallthrough;
case BTEqualStrategyNumber:
/* If we have a conversion function, apply it */
if (cvt_fns && cvt_fns[rhs_code])
diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c
index 6847e4e54d5..e1945cf808f 100644
--- a/contrib/btree_gist/btree_utils_var.c
+++ b/contrib/btree_gist/btree_utils_var.c
@@ -70,7 +70,7 @@ gbt_var_key_readable(const GBT_VARKEY *k)
* Create a leaf-entry to store in the index, from a single Datum.
*/
static GBT_VARKEY *
-gbt_var_key_from_datum(const struct varlena *u)
+gbt_var_key_from_datum(const varlena *u)
{
int32 lowersize = VARSIZE(u);
GBT_VARKEY *r;
@@ -115,36 +115,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo)
/*
* returns the common prefix length of a node key
+ *
+ * If the underlying type is character data, the prefix length may point in
+ * the middle of a multibyte character.
*/
static int32
gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
{
GBT_VARKEY_R r = gbt_var_key_readable(node);
int32 i = 0;
- int32 l = 0;
+ int32 l_left_to_match = 0;
+ int32 l_total = 0;
int32 t1len = VARSIZE(r.lower) - VARHDRSZ;
int32 t2len = VARSIZE(r.upper) - VARHDRSZ;
int32 ml = Min(t1len, t2len);
char *p1 = VARDATA(r.lower);
char *p2 = VARDATA(r.upper);
+ const char *end1 = p1 + t1len;
+ const char *end2 = p2 + t2len;
if (ml == 0)
return 0;
while (i < ml)
{
- if (tinfo->eml > 1 && l == 0)
+ if (tinfo->eml > 1 && l_left_to_match == 0)
{
- if ((l = pg_mblen(p1)) != pg_mblen(p2))
+ l_total = pg_mblen_range(p1, end1);
+ if (l_total != pg_mblen_range(p2, end2))
{
return i;
}
+ l_left_to_match = l_total;
}
if (*p1 != *p2)
{
if (tinfo->eml > 1)
{
- return (i - l + 1);
+ int32 l_matched_subset = l_total - l_left_to_match;
+
+ /* end common prefix at final byte of last matching char */
+ return i - l_matched_subset;
}
else
{
@@ -154,7 +165,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo)
p1++;
p2++;
- l--;
+ l_left_to_match--;
i++;
}
return ml; /* lower == upper */
@@ -283,7 +294,7 @@ gbt_var_compress(GISTENTRY *entry, const gbtree_vinfo *tinfo)
if (entry->leafkey)
{
- struct varlena *leaf = PG_DETOAST_DATUM(entry->key);
+ varlena *leaf = PG_DETOAST_DATUM(entry->key);
GBT_VARKEY *r;
r = gbt_var_key_from_datum(leaf);
diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c
index 8cb3166495c..2498d80c8e7 100644
--- a/contrib/dblink/dblink.c
+++ b/contrib/dblink/dblink.c
@@ -2069,6 +2069,7 @@ get_text_array_contents(ArrayType *array, int *numitems)
int16 typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
char **values;
char *ptr;
bits8 *bitmap;
@@ -2081,6 +2082,7 @@ get_text_array_contents(ArrayType *array, int *numitems)
get_typlenbyvalalign(ARR_ELEMTYPE(array),
&typlen, &typbyval, &typalign);
+ typalignby = typalign_to_alignby(typalign);
values = palloc_array(char *, nitems);
@@ -2098,7 +2100,7 @@ get_text_array_contents(ArrayType *array, int *numitems)
{
values[i] = TextDatumGetCString(PointerGetDatum(ptr));
ptr = att_addlength_pointer(ptr, typlen, ptr);
- ptr = (char *) att_align_nominal(ptr, typalign);
+ ptr = (char *) att_nominal_alignby(ptr, typalignby);
}
/* advance bitmap pointer if any */
diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c
index 5c4917ce1fc..9e3784e0f47 100644
--- a/contrib/dict_xsyn/dict_xsyn.c
+++ b/contrib/dict_xsyn/dict_xsyn.c
@@ -54,14 +54,14 @@ find_word(char *in, char **end)
*end = NULL;
while (*in && isspace((unsigned char) *in))
- in += pg_mblen(in);
+ in += pg_mblen_cstr(in);
if (!*in || *in == '#')
return NULL;
start = in;
while (*in && !isspace((unsigned char) *in))
- in += pg_mblen(in);
+ in += pg_mblen_cstr(in);
*end = in;
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 34e3918811c..9b72efb8674 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -67,7 +67,7 @@ prssyntaxerror(HSParser *state)
errsave(state->escontext,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in hstore, near \"%.*s\" at position %d",
- pg_mblen(state->ptr), state->ptr,
+ pg_mblen_cstr(state->ptr), state->ptr,
(int) (state->ptr - state->begin))));
/* In soft error situation, return false as convenience for caller */
return false;
@@ -385,7 +385,8 @@ hstoreUniquePairs(Pairs *a, int32 l, int32 *buflen)
if (ptr->needfree)
{
pfree(ptr->key);
- pfree(ptr->val);
+ if (ptr->val != NULL)
+ pfree(ptr->val);
}
}
else
diff --git a/contrib/hstore_plperl/hstore_plperl.c b/contrib/hstore_plperl/hstore_plperl.c
index 31393b4fa50..69001191cc0 100644
--- a/contrib/hstore_plperl/hstore_plperl.c
+++ b/contrib/hstore_plperl/hstore_plperl.c
@@ -21,6 +21,13 @@ static hstoreCheckKeyLen_t hstoreCheckKeyLen_p;
typedef size_t (*hstoreCheckValLen_t) (size_t len);
static hstoreCheckValLen_t hstoreCheckValLen_p;
+/* Static asserts verify that typedefs above match original declarations */
+StaticAssertVariableIsOfType(&hstoreUpgrade, hstoreUpgrade_t);
+StaticAssertVariableIsOfType(&hstoreUniquePairs, hstoreUniquePairs_t);
+StaticAssertVariableIsOfType(&hstorePairs, hstorePairs_t);
+StaticAssertVariableIsOfType(&hstoreCheckKeyLen, hstoreCheckKeyLen_t);
+StaticAssertVariableIsOfType(&hstoreCheckValLen, hstoreCheckValLen_t);
+
/*
* Module initialize function: fetch function pointers for cross-module calls.
@@ -28,24 +35,18 @@ static hstoreCheckValLen_t hstoreCheckValLen_p;
void
_PG_init(void)
{
- /* Asserts verify that typedefs above match original declarations */
- AssertVariableIsOfType(&hstoreUpgrade, hstoreUpgrade_t);
hstoreUpgrade_p = (hstoreUpgrade_t)
load_external_function("$libdir/hstore", "hstoreUpgrade",
true, NULL);
- AssertVariableIsOfType(&hstoreUniquePairs, hstoreUniquePairs_t);
hstoreUniquePairs_p = (hstoreUniquePairs_t)
load_external_function("$libdir/hstore", "hstoreUniquePairs",
true, NULL);
- AssertVariableIsOfType(&hstorePairs, hstorePairs_t);
hstorePairs_p = (hstorePairs_t)
load_external_function("$libdir/hstore", "hstorePairs",
true, NULL);
- AssertVariableIsOfType(&hstoreCheckKeyLen, hstoreCheckKeyLen_t);
hstoreCheckKeyLen_p = (hstoreCheckKeyLen_t)
load_external_function("$libdir/hstore", "hstoreCheckKeyLen",
true, NULL);
- AssertVariableIsOfType(&hstoreCheckValLen, hstoreCheckValLen_t);
hstoreCheckValLen_p = (hstoreCheckValLen_t)
load_external_function("$libdir/hstore", "hstoreCheckValLen",
true, NULL);
diff --git a/contrib/hstore_plpython/hstore_plpython.c b/contrib/hstore_plpython/hstore_plpython.c
index e2bfc6da38e..d2be030e07c 100644
--- a/contrib/hstore_plpython/hstore_plpython.c
+++ b/contrib/hstore_plpython/hstore_plpython.c
@@ -28,6 +28,15 @@ static hstoreCheckKeyLen_t hstoreCheckKeyLen_p;
typedef size_t (*hstoreCheckValLen_t) (size_t len);
static hstoreCheckValLen_t hstoreCheckValLen_p;
+/* Static asserts verify that typedefs above match original declarations */
+StaticAssertVariableIsOfType(&PLyObject_AsString, PLyObject_AsString_t);
+StaticAssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
+StaticAssertVariableIsOfType(&hstoreUpgrade, hstoreUpgrade_t);
+StaticAssertVariableIsOfType(&hstoreUniquePairs, hstoreUniquePairs_t);
+StaticAssertVariableIsOfType(&hstorePairs, hstorePairs_t);
+StaticAssertVariableIsOfType(&hstoreCheckKeyLen, hstoreCheckKeyLen_t);
+StaticAssertVariableIsOfType(&hstoreCheckValLen, hstoreCheckValLen_t);
+
/*
* Module initialize function: fetch function pointers for cross-module calls.
@@ -35,32 +44,24 @@ static hstoreCheckValLen_t hstoreCheckValLen_p;
void
_PG_init(void)
{
- /* Asserts verify that typedefs above match original declarations */
- AssertVariableIsOfType(&PLyObject_AsString, PLyObject_AsString_t);
PLyObject_AsString_p = (PLyObject_AsString_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLyObject_AsString",
true, NULL);
- AssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
PLyUnicode_FromStringAndSize_p = (PLyUnicode_FromStringAndSize_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLyUnicode_FromStringAndSize",
true, NULL);
- AssertVariableIsOfType(&hstoreUpgrade, hstoreUpgrade_t);
hstoreUpgrade_p = (hstoreUpgrade_t)
load_external_function("$libdir/hstore", "hstoreUpgrade",
true, NULL);
- AssertVariableIsOfType(&hstoreUniquePairs, hstoreUniquePairs_t);
hstoreUniquePairs_p = (hstoreUniquePairs_t)
load_external_function("$libdir/hstore", "hstoreUniquePairs",
true, NULL);
- AssertVariableIsOfType(&hstorePairs, hstorePairs_t);
hstorePairs_p = (hstorePairs_t)
load_external_function("$libdir/hstore", "hstorePairs",
true, NULL);
- AssertVariableIsOfType(&hstoreCheckKeyLen, hstoreCheckKeyLen_t);
hstoreCheckKeyLen_p = (hstoreCheckKeyLen_t)
load_external_function("$libdir/hstore", "hstoreCheckKeyLen",
true, NULL);
- AssertVariableIsOfType(&hstoreCheckValLen, hstoreCheckValLen_t);
hstoreCheckValLen_p = (hstoreCheckValLen_t)
load_external_function("$libdir/hstore", "hstoreCheckValLen",
true, NULL);
diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c
index 4a7053028c6..7fce743632f 100644
--- a/contrib/intarray/_int_selfuncs.c
+++ b/contrib/intarray/_int_selfuncs.c
@@ -19,6 +19,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
+#include "commands/extension.h"
#include "miscadmin.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
@@ -170,7 +171,18 @@ _int_matchsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(0.0);
}
- /* The caller made sure the const is a query, so get it now */
+ /*
+ * Verify that the Const is a query_int, else return a default estimate.
+ * (This could only fail if someone attached this estimator to the wrong
+ * operator.)
+ */
+ if (((Const *) other)->consttype !=
+ get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int"))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+ }
+
query = DatumGetQueryTypeP(((Const *) other)->constvalue);
/* Empty query matches nothing */
diff --git a/contrib/jsonb_plpython/jsonb_plpython.c b/contrib/jsonb_plpython/jsonb_plpython.c
index 7e8e1d6674f..c2c4ce37c08 100644
--- a/contrib/jsonb_plpython/jsonb_plpython.c
+++ b/contrib/jsonb_plpython/jsonb_plpython.c
@@ -33,22 +33,24 @@ typedef PyObject *(*PLyUnicode_FromStringAndSize_t)
(const char *s, Py_ssize_t size);
static PLyUnicode_FromStringAndSize_t PLyUnicode_FromStringAndSize_p;
+/* Static asserts verify that typedefs above match original declarations */
+StaticAssertVariableIsOfType(&PLyObject_AsString, PLyObject_AsString_t);
+StaticAssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
+StaticAssertVariableIsOfType(&PLy_elog_impl, PLy_elog_impl_t);
+
+
/*
* Module initialize function: fetch function pointers for cross-module calls.
*/
void
_PG_init(void)
{
- /* Asserts verify that typedefs above match original declarations */
- AssertVariableIsOfType(&PLyObject_AsString, PLyObject_AsString_t);
PLyObject_AsString_p = (PLyObject_AsString_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLyObject_AsString",
true, NULL);
- AssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
PLyUnicode_FromStringAndSize_p = (PLyUnicode_FromStringAndSize_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLyUnicode_FromStringAndSize",
true, NULL);
- AssertVariableIsOfType(&PLy_elog_impl, PLy_elog_impl_t);
PLy_elog_impl_p = (PLy_elog_impl_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLy_elog_impl",
true, NULL);
diff --git a/contrib/ltree/crc32.c b/contrib/ltree/crc32.c
index 3918d4a0ec2..d21bed31fdd 100644
--- a/contrib/ltree/crc32.c
+++ b/contrib/ltree/crc32.c
@@ -23,6 +23,7 @@ ltree_crc32_sz(const char *buf, int size)
{
pg_crc32 crc;
const char *p = buf;
+ const char *end = buf + size;
static pg_locale_t locale = NULL;
if (!locale)
@@ -32,7 +33,7 @@ ltree_crc32_sz(const char *buf, int size)
while (size > 0)
{
char foldstr[UNICODE_CASEMAP_BUFSZ];
- int srclen = pg_mblen(p);
+ int srclen = pg_mblen_range(p, end);
size_t foldlen;
/* fold one codepoint at a time */
diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c
index a28ddbf40de..0adcdd8ff2a 100644
--- a/contrib/ltree/lquery_op.c
+++ b/contrib/ltree/lquery_op.c
@@ -27,14 +27,14 @@ getlexeme(char *start, char *end, int *len)
char *ptr;
while (start < end && t_iseq(start, '_'))
- start += pg_mblen(start);
+ start += pg_mblen_range(start, end);
ptr = start;
if (ptr >= end)
return NULL;
while (ptr < end && !t_iseq(ptr, '_'))
- ptr += pg_mblen(ptr);
+ ptr += pg_mblen_range(ptr, end);
*len = ptr - start;
return start;
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 78478dec173..b0ded40eba9 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -127,7 +127,7 @@ typedef struct
#define LQUERY_HASNOT 0x01
/* valid label chars are alphanumerics, underscores and hyphens */
-#define ISLABEL(x) ( t_isalnum(x) || t_iseq(x, '_') || t_iseq(x, '-') )
+#define ISLABEL(x) ( t_isalnum_cstr(x) || t_iseq(x, '_') || t_iseq(x, '-') )
/* full text query */
diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index 59c4462df80..54c4ca3c5c3 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -54,7 +54,7 @@ parse_ltree(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
- charlen = pg_mblen(ptr);
+ charlen = pg_mblen_cstr(ptr);
if (t_iseq(ptr, '.'))
num++;
ptr += charlen;
@@ -69,7 +69,7 @@ parse_ltree(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
- charlen = pg_mblen(ptr);
+ charlen = pg_mblen_cstr(ptr);
switch (state)
{
@@ -291,7 +291,7 @@ parse_lquery(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
- charlen = pg_mblen(ptr);
+ charlen = pg_mblen_cstr(ptr);
if (t_iseq(ptr, '.'))
num++;
@@ -311,7 +311,7 @@ parse_lquery(const char *buf, struct Node *escontext)
ptr = buf;
while (*ptr)
{
- charlen = pg_mblen(ptr);
+ charlen = pg_mblen_cstr(ptr);
switch (state)
{
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
index 91a2222eaa9..f4296880c03 100644
--- a/contrib/ltree/ltxtquery_io.c
+++ b/contrib/ltree/ltxtquery_io.c
@@ -64,7 +64,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
for (;;)
{
- charlen = pg_mblen(state->buf);
+ charlen = pg_mblen_cstr(state->buf);
switch (state->state)
{
@@ -277,7 +277,7 @@ makepol(QPRS_STATE *state)
case ERR:
if (SOFT_ERROR_OCCURRED(state->escontext))
return ERR;
- /* fall through */
+ pg_fallthrough;
default:
ereturn(state->escontext, ERR,
(errcode(ERRCODE_SYNTAX_ERROR),
diff --git a/contrib/ltree_plpython/ltree_plpython.c b/contrib/ltree_plpython/ltree_plpython.c
index 0493aeb2423..d4e7b613fa1 100644
--- a/contrib/ltree_plpython/ltree_plpython.c
+++ b/contrib/ltree_plpython/ltree_plpython.c
@@ -13,6 +13,9 @@ PG_MODULE_MAGIC_EXT(
typedef PyObject *(*PLyUnicode_FromStringAndSize_t) (const char *s, Py_ssize_t size);
static PLyUnicode_FromStringAndSize_t PLyUnicode_FromStringAndSize_p;
+/* Static asserts verify that typedefs above match original declarations */
+StaticAssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
+
/*
* Module initialize function: fetch function pointers for cross-module calls.
@@ -20,8 +23,6 @@ static PLyUnicode_FromStringAndSize_t PLyUnicode_FromStringAndSize_p;
void
_PG_init(void)
{
- /* Asserts verify that typedefs above match original declarations */
- AssertVariableIsOfType(&PLyUnicode_FromStringAndSize, PLyUnicode_FromStringAndSize_t);
PLyUnicode_FromStringAndSize_p = (PLyUnicode_FromStringAndSize_t)
load_external_function("$libdir/" PLPYTHON_LIBNAME, "PLyUnicode_FromStringAndSize",
true, NULL);
diff --git a/contrib/meson.build b/contrib/meson.build
index def13257cbe..5a752eac347 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -48,6 +48,7 @@ subdir('pgcrypto')
subdir('pg_freespacemap')
subdir('pg_logicalinspect')
subdir('pg_overexplain')
+subdir('pg_plan_advice')
subdir('pg_prewarm')
subdir('pgrowlocks')
subdir('pg_stat_statements')
diff --git a/contrib/oid2name/oid2name.c b/contrib/oid2name/oid2name.c
index 51802907138..63e6ce2dae8 100644
--- a/contrib/oid2name/oid2name.c
+++ b/contrib/oid2name/oid2name.c
@@ -469,7 +469,7 @@ void
sql_exec_dumpalltables(PGconn *conn, struct options *opts)
{
char todo[1024];
- char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\" ";
+ char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\", pg_relation_filepath(c.oid) as \"Path\" ";
snprintf(todo, sizeof(todo),
"SELECT pg_catalog.pg_relation_filenode(c.oid) as \"Filenode\", relname as \"Table Name\" %s "
@@ -507,7 +507,7 @@ sql_exec_searchtables(PGconn *conn, struct options *opts)
*comma_filenumbers,
*comma_tables;
bool written = false;
- char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\" ";
+ char *addfields = ",c.oid AS \"Oid\", nspname AS \"Schema\", spcname as \"Tablespace\", pg_relation_filepath(c.oid) as \"Path\" ";
/* get tables qualifiers, whether names, filenumbers, or OIDs */
comma_oids = get_comma_elts(opts->oids);
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 8277fa256c3..8e31632ce0e 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -101,7 +101,7 @@ text_to_bits(char *str, int len)
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid character \"%.*s\" in t_bits string",
- pg_mblen(str + off), str + off)));
+ pg_mblen_cstr(str + off), str + off)));
if (off % 8 == 7)
bits[off / 8] = byte;
@@ -396,7 +396,7 @@ tuple_data_split_internal(Oid relid, char *tupdata,
errmsg("unexpected end of tuple data")));
if (attr->attlen == -1 && do_detoast)
- attr_data = pg_detoast_datum_copy((struct varlena *) (tupdata + off));
+ attr_data = pg_detoast_datum_copy((varlena *) (tupdata + off));
else
{
attr_data = (bytea *) palloc(len + VARHDRSZ);
diff --git a/contrib/pg_overexplain/expected/pg_overexplain.out b/contrib/pg_overexplain/expected/pg_overexplain.out
index 55d34666d87..f376d2e7996 100644
--- a/contrib/pg_overexplain/expected/pg_overexplain.out
+++ b/contrib/pg_overexplain/expected/pg_overexplain.out
@@ -104,6 +104,7 @@ $$);
Parallel Safe: true
Plan Node ID: 2
Append RTIs: 1
+ Child Append RTIs: none
-> Seq Scan on brassica vegetables_1
Disabled Nodes: 0
Parallel Safe: true
@@ -142,7 +143,7 @@ $$);
Relation Kind: relation
Relation Lock Mode: AccessShareLock
Unprunable RTIs: 1 3 4
-(53 rows)
+(54 rows)
-- Test a different output format.
SELECT explain_filter($$
@@ -197,6 +198,7 @@ $$);
none +
none +
1 +
+ none +
0 +
+
+
@@ -452,6 +454,8 @@ SELECT * FROM vegetables WHERE genus = 'daucus';
Seq Scan on daucus vegetables
Filter: (genus = 'daucus'::text)
Scan RTI: 2
+ Elided Node Type: Append
+ Elided Node RTIs: 1
RTI 1 (relation, inherited, in-from-clause):
Eref: vegetables (id, name, genus)
Relation: vegetables
@@ -465,7 +469,7 @@ SELECT * FROM vegetables WHERE genus = 'daucus';
Relation Kind: relation
Relation Lock Mode: AccessShareLock
Unprunable RTIs: 1 2
-(16 rows)
+(18 rows)
-- Also test a case that involves a write.
EXPLAIN (RANGE_TABLE, COSTS OFF)
@@ -489,3 +493,122 @@ INSERT INTO vegetables (name, genus) VALUES ('broccoflower', 'brassica');
Result RTIs: 1
(15 rows)
+-- should show "Subplan: sub"
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM vegetables v,
+ (SELECT * FROM vegetables WHERE genus = 'daucus' OFFSET 0) sub;
+ QUERY PLAN
+----------------------------------------------
+ Nested Loop
+ -> Seq Scan on daucus vegetables
+ Filter: (genus = 'daucus'::text)
+ Scan RTI: 6
+ Elided Node Type: Append
+ Elided Node RTIs: 5
+ Elided Node Type: SubqueryScan
+ Elided Node RTIs: 2
+ -> Append
+ Append RTIs: 1
+ Child Append RTIs: none
+ -> Seq Scan on brassica v_1
+ Scan RTI: 3
+ -> Seq Scan on daucus v_2
+ Scan RTI: 4
+ RTI 1 (relation, inherited, in-from-clause):
+ Alias: v ()
+ Eref: v (id, name, genus)
+ Relation: vegetables
+ Relation Kind: partitioned_table
+ Relation Lock Mode: AccessShareLock
+ Permission Info Index: 1
+ RTI 2 (subquery, in-from-clause):
+ Alias: sub ()
+ Eref: sub (id, name, genus)
+ RTI 3 (relation, in-from-clause):
+ Alias: v (id, name, genus)
+ Eref: v (id, name, genus)
+ Relation: brassica
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ RTI 4 (relation, in-from-clause):
+ Alias: v (id, name, genus)
+ Eref: v (id, name, genus)
+ Relation: daucus
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ RTI 5 (relation, inherited, in-from-clause):
+ Subplan: sub
+ Eref: vegetables (id, name, genus)
+ Relation: vegetables
+ Relation Kind: partitioned_table
+ Relation Lock Mode: AccessShareLock
+ Permission Info Index: 2
+ RTI 6 (relation, in-from-clause):
+ Subplan: sub
+ Alias: vegetables (id, name, genus)
+ Eref: vegetables (id, name, genus)
+ Relation: daucus
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ Unprunable RTIs: 1 3 4 5 6
+(52 rows)
+
+-- should show "Subplan: unnamed_subquery"
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM vegetables v,
+ (SELECT * FROM vegetables WHERE genus = 'daucus' OFFSET 0);
+ QUERY PLAN
+----------------------------------------------
+ Nested Loop
+ -> Seq Scan on daucus vegetables
+ Filter: (genus = 'daucus'::text)
+ Scan RTI: 6
+ Elided Node Type: Append
+ Elided Node RTIs: 5
+ Elided Node Type: SubqueryScan
+ Elided Node RTIs: 2
+ -> Append
+ Append RTIs: 1
+ Child Append RTIs: none
+ -> Seq Scan on brassica v_1
+ Scan RTI: 3
+ -> Seq Scan on daucus v_2
+ Scan RTI: 4
+ RTI 1 (relation, inherited, in-from-clause):
+ Alias: v ()
+ Eref: v (id, name, genus)
+ Relation: vegetables
+ Relation Kind: partitioned_table
+ Relation Lock Mode: AccessShareLock
+ Permission Info Index: 1
+ RTI 2 (subquery, in-from-clause):
+ Eref: unnamed_subquery (id, name, genus)
+ RTI 3 (relation, in-from-clause):
+ Alias: v (id, name, genus)
+ Eref: v (id, name, genus)
+ Relation: brassica
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ RTI 4 (relation, in-from-clause):
+ Alias: v (id, name, genus)
+ Eref: v (id, name, genus)
+ Relation: daucus
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ RTI 5 (relation, inherited, in-from-clause):
+ Subplan: unnamed_subquery
+ Eref: vegetables (id, name, genus)
+ Relation: vegetables
+ Relation Kind: partitioned_table
+ Relation Lock Mode: AccessShareLock
+ Permission Info Index: 2
+ RTI 6 (relation, in-from-clause):
+ Subplan: unnamed_subquery
+ Alias: vegetables (id, name, genus)
+ Eref: vegetables (id, name, genus)
+ Relation: daucus
+ Relation Kind: relation
+ Relation Lock Mode: AccessShareLock
+ Unprunable RTIs: 1 3 4 5 6
+(51 rows)
+
diff --git a/contrib/pg_overexplain/pg_overexplain.c b/contrib/pg_overexplain/pg_overexplain.c
index 316ffd1c87f..36e6aac0e2c 100644
--- a/contrib/pg_overexplain/pg_overexplain.c
+++ b/contrib/pg_overexplain/pg_overexplain.c
@@ -54,6 +54,8 @@ static void overexplain_alias(const char *qlabel, Alias *alias,
ExplainState *es);
static void overexplain_bitmapset(const char *qlabel, Bitmapset *bms,
ExplainState *es);
+static void overexplain_bitmapset_list(const char *qlabel, List *bms_list,
+ ExplainState *es);
static void overexplain_intlist(const char *qlabel, List *list,
ExplainState *es);
@@ -191,6 +193,8 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors,
*/
if (options->range_table)
{
+ bool opened_elided_nodes = false;
+
switch (nodeTag(plan))
{
case T_SeqScan:
@@ -230,11 +234,17 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors,
overexplain_bitmapset("Append RTIs",
((Append *) plan)->apprelids,
es);
+ overexplain_bitmapset_list("Child Append RTIs",
+ ((Append *) plan)->child_append_relid_sets,
+ es);
break;
case T_MergeAppend:
overexplain_bitmapset("Append RTIs",
((MergeAppend *) plan)->apprelids,
es);
+ overexplain_bitmapset_list("Child Append RTIs",
+ ((MergeAppend *) plan)->child_append_relid_sets,
+ es);
break;
case T_Result:
@@ -251,6 +261,43 @@ overexplain_per_node_hook(PlanState *planstate, List *ancestors,
default:
break;
}
+
+ foreach_node(ElidedNode, n, es->pstmt->elidedNodes)
+ {
+ char *elidednodetag;
+
+ if (n->plan_node_id != plan->plan_node_id)
+ continue;
+
+ if (!opened_elided_nodes)
+ {
+ ExplainOpenGroup("Elided Nodes", "Elided Nodes", false, es);
+ opened_elided_nodes = true;
+ }
+
+ switch (n->elided_type)
+ {
+ case T_Append:
+ elidednodetag = "Append";
+ break;
+ case T_MergeAppend:
+ elidednodetag = "MergeAppend";
+ break;
+ case T_SubqueryScan:
+ elidednodetag = "SubqueryScan";
+ break;
+ default:
+ elidednodetag = psprintf("%d", n->elided_type);
+ break;
+ }
+
+ ExplainOpenGroup("Elided Node", NULL, true, es);
+ ExplainPropertyText("Elided Node Type", elidednodetag, es);
+ overexplain_bitmapset("Elided Node RTIs", n->relids, es);
+ ExplainCloseGroup("Elided Node", NULL, true, es);
+ }
+ if (opened_elided_nodes)
+ ExplainCloseGroup("Elided Nodes", "Elided Nodes", false, es);
}
}
@@ -395,6 +442,8 @@ static void
overexplain_range_table(PlannedStmt *plannedstmt, ExplainState *es)
{
Index rti;
+ ListCell *lc_subrtinfo = list_head(plannedstmt->subrtinfos);
+ SubPlanRTInfo *rtinfo = NULL;
/* Open group, one entry per RangeTblEntry */
ExplainOpenGroup("Range Table", "Range Table", false, es);
@@ -405,6 +454,18 @@ overexplain_range_table(PlannedStmt *plannedstmt, ExplainState *es)
RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable);
char *kind = NULL;
char *relkind;
+ SubPlanRTInfo *next_rtinfo;
+
+ /* Advance to next SubRTInfo, if it's time. */
+ if (lc_subrtinfo != NULL)
+ {
+ next_rtinfo = lfirst(lc_subrtinfo);
+ if (rti > next_rtinfo->rtoffset)
+ {
+ rtinfo = next_rtinfo;
+ lc_subrtinfo = lnext(plannedstmt->subrtinfos, lc_subrtinfo);
+ }
+ }
/* NULL entries are possible; skip them */
if (rte == NULL)
@@ -469,6 +530,28 @@ overexplain_range_table(PlannedStmt *plannedstmt, ExplainState *es)
ExplainPropertyBool("In From Clause", rte->inFromCl, es);
}
+ /*
+ * Indicate which subplan is the origin of which RTE. Note dummy
+ * subplans. Here again, we crunch more onto one line in text format.
+ */
+ if (rtinfo != NULL)
+ {
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ if (!rtinfo->dummy)
+ ExplainPropertyText("Subplan", rtinfo->plan_name, es);
+ else
+ ExplainPropertyText("Subplan",
+ psprintf("%s (dummy)",
+ rtinfo->plan_name), es);
+ }
+ else
+ {
+ ExplainPropertyText("Subplan", rtinfo->plan_name, es);
+ ExplainPropertyBool("Subplan Is Dummy", rtinfo->dummy, es);
+ }
+ }
+
/* rte->alias is optional; rte->eref is requested */
if (rte->alias != NULL)
overexplain_alias("Alias", rte->alias, es);
@@ -740,6 +823,54 @@ overexplain_bitmapset(const char *qlabel, Bitmapset *bms, ExplainState *es)
pfree(buf.data);
}
+/*
+ * Emit a text property describing the contents of a list of bitmapsets.
+ * If a bitmapset contains exactly 1 member, we just print an integer;
+ * otherwise, we surround the list of members by parentheses.
+ *
+ * If there are no bitmapsets in the list, we print the word "none".
+ */
+static void
+overexplain_bitmapset_list(const char *qlabel, List *bms_list,
+ ExplainState *es)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+
+ foreach_node(Bitmapset, bms, bms_list)
+ {
+ if (bms_membership(bms) == BMS_SINGLETON)
+ appendStringInfo(&buf, " %d", bms_singleton_member(bms));
+ else
+ {
+ int x = -1;
+ bool first = true;
+
+ appendStringInfoString(&buf, " (");
+ while ((x = bms_next_member(bms, x)) >= 0)
+ {
+ if (first)
+ first = false;
+ else
+ appendStringInfoChar(&buf, ' ');
+ appendStringInfo(&buf, "%d", x);
+ }
+ appendStringInfoChar(&buf, ')');
+ }
+ }
+
+ if (buf.len == 0)
+ {
+ ExplainPropertyText(qlabel, "none", es);
+ return;
+ }
+
+ Assert(buf.data[0] == ' ');
+ ExplainPropertyText(qlabel, buf.data + 1, es);
+ pfree(buf.data);
+}
+
/*
* Emit a text property describing the contents of a list of integers, OIDs,
* or XIDs -- either a space-separated list of integer members, or the word
diff --git a/contrib/pg_overexplain/sql/pg_overexplain.sql b/contrib/pg_overexplain/sql/pg_overexplain.sql
index 42e275ac2f9..34a957cbed3 100644
--- a/contrib/pg_overexplain/sql/pg_overexplain.sql
+++ b/contrib/pg_overexplain/sql/pg_overexplain.sql
@@ -110,3 +110,13 @@ SELECT * FROM vegetables WHERE genus = 'daucus';
-- Also test a case that involves a write.
EXPLAIN (RANGE_TABLE, COSTS OFF)
INSERT INTO vegetables (name, genus) VALUES ('broccoflower', 'brassica');
+
+-- should show "Subplan: sub"
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM vegetables v,
+ (SELECT * FROM vegetables WHERE genus = 'daucus' OFFSET 0) sub;
+
+-- should show "Subplan: unnamed_subquery"
+EXPLAIN (RANGE_TABLE, COSTS OFF)
+SELECT * FROM vegetables v,
+ (SELECT * FROM vegetables WHERE genus = 'daucus' OFFSET 0);
diff --git a/contrib/pg_plan_advice/.gitignore b/contrib/pg_plan_advice/.gitignore
new file mode 100644
index 00000000000..19a14253019
--- /dev/null
+++ b/contrib/pg_plan_advice/.gitignore
@@ -0,0 +1,3 @@
+/pgpa_parser.h
+/pgpa_parser.c
+/pgpa_scanner.c
diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile
new file mode 100644
index 00000000000..1d4c559aed8
--- /dev/null
+++ b/contrib/pg_plan_advice/Makefile
@@ -0,0 +1,50 @@
+# contrib/pg_plan_advice/Makefile
+
+MODULE_big = pg_plan_advice
+OBJS = \
+ $(WIN32RES) \
+ pg_plan_advice.o \
+ pgpa_ast.o \
+ pgpa_collector.o \
+ pgpa_identifier.o \
+ pgpa_join.o \
+ pgpa_output.o \
+ pgpa_parser.o \
+ pgpa_planner.o \
+ pgpa_scan.o \
+ pgpa_scanner.o \
+ pgpa_trove.o \
+ pgpa_walker.o
+
+EXTENSION = pg_plan_advice
+DATA = pg_plan_advice--1.0.sql
+PGFILEDESC = "pg_plan_advice - help the planner get the right plan"
+
+REGRESS = gather join_order join_strategy partitionwise scan
+TAP_TESTS = 1
+
+EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c
+
+# required for 001_regress.pl
+REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)
+export REGRESS_SHLIB
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_plan_advice
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+# See notes in src/backend/parser/Makefile about the following two rules
+pgpa_parser.h: pgpa_parser.c
+ touch $@
+
+pgpa_parser.c: BISONFLAGS += -d
+
+# Force these dependencies to be known even without dependency info built:
+pgpa_parser.o pgpa_scanner.o: pgpa_parser.h
diff --git a/contrib/pg_plan_advice/README b/contrib/pg_plan_advice/README
new file mode 100644
index 00000000000..0b888fd82f2
--- /dev/null
+++ b/contrib/pg_plan_advice/README
@@ -0,0 +1,260 @@
+contrib/pg_plan_advice/README
+
+Plan Advice
+===========
+
+This module implements a mini-language for "plan advice" that allows for
+control of certain key planner decisions. Goals include (1) enforcing plan
+stability (my previous plan was good and I would like to keep getting a
+similar one) and (2) allowing users to experiment with plans other than
+the one preferred by the optimizer. Non-goals include (1) controlling
+every possible planner decision and (2) forcing consideration of plans
+that the optimizer rejects for reasons other than cost. (There is some
+room for bikeshedding about what exactly this non-goal means: what if
+we skip path generation entirely for a certain case on the theory that
+we know it cannot win on cost? Does that count as a cost-based rejection
+even though no cost was ever computed?)
+
+Generally, plan advice is a series of whitespace-separated advice items,
+each of which applies an advice tag to a list of advice targets. For
+example, "SEQ_SCAN(foo) HASH_JOIN(bar@ss)" contains two items of advice,
+the first of which applies the SEQ_SCAN tag to "foo" and the second of
+which applies the HASH_JOIN tag to "bar@ss". In this simple example, each
+target identifies a single relation; see "Relation Identifiers", below.
+Advice tags can also be applied to groups of relations; for example,
+"HASH_JOIN(baz (bletch quux))" applies the HASH_JOIN tag to the single
+relation identifier "baz" as well as to the 2-item list containing
+"bletch" and "quux".
+
+Critically, this module knows both how to generate plan advice from an
+already-existing plan, and also how to enforce it during future planning
+cycles. Everything it does is intended to be "round-trip safe": if you
+generate advice from a plan and then feed that back into a future planing
+cycle, each piece of advice should be guaranteed to apply to the exactly the
+same part of the query from which it was generated without ambiguity or
+guesswork, and it should succesfully enforce the same planning decision that
+led to it being generated in the first place. Note that there is no
+intention that these guarantees hold in the presence of intervening DDL;
+e.g. if you change the properties of a function so that a subquery is no
+longer inlined, or if you drop an index named in the plan advice, the advice
+isn't going to work any more. That's expected.
+
+This module aims to force the planner to follow any provided advice without
+regard to whether it is appears to be good advice or bad advice. If the
+user provides bad advice, whether derived from a previously-generated plan
+or manually written, they may get a bad plan. We regard this as user error,
+not a defect in this module. It seems likely that applying advice
+judiciously and only when truly required to avoid problems will be a more
+successful strategy than applying it with a broad brush, but users are free
+to experiment with whatever strategies they think best.
+
+Relation Identifiers
+====================
+
+Uniquely identifying the part of a query to which a certain piece of
+advice applies is harder than it sounds. Our basic approach is to use
+relation aliases as a starting point, and then disambiguate. There are
+three ways that same relation alias can occur multiple times:
+
+1. It can appear in more than one subquery.
+
+2. It can appear more than once in the same subquery,
+ e.g. (foo JOIN bar) x JOIN foo.
+
+3. The table can be partitioned.
+
+Any combination of these things can occur simultaneously. Therefore, our
+general syntax for a relation identifier is:
+
+alias_name#occurrence_number/partition_schema.partition_name@plan_name
+
+All components except for the alias_name are optional and included only
+when required. When a component is omitted, the associated punctuation
+must also be omitted. Occurrence numbers are counted ignoring children of
+partitioned tables. When the generated occurrence number is 1, we omit
+the occurrence number. The partition schema and partition name are included
+only for children of partitioned tables. In generated advice, the
+partition_schema is always included whenever there is a partition_name,
+but user-written advice may mention the name and omit the schema. The
+plan_name is omitted for the top-level PlannerInfo.
+
+Scan Advice
+===========
+
+For many types of scan, no advice is generated or possible; for instance,
+a subquery is always scanned using a subquery scan. While that scan may be
+elided via setrefs processing, this doesn't change the fact that only one
+basic approach exists. Hence, scan advice applies mostly to relations, which
+can be scanned in multiple ways.
+
+We tend to think of a scan as targeting a single relation, and that's
+normally the case, but it doesn't have to be. For instance, if a join is
+proven empty, the whole thing may be replaced with a single Result node
+which, in effect, is a degenerate scan of every relation in the collapsed
+portion of the join tree. Similarly, it's possible to inject a custom scan
+in such a way that it replaces an entire join. If we ever emit advice
+for these cases, it would target sets of relation identifiers surrounded
+by parentheses, e.g. SOME_SORT_OF_SCAN(foo (bar baz)) would mean that the
+the given scan type would be used for foo as a single relation and also the
+combination of bar and baz as a join product. We have no such cases at
+present.
+
+For index and index-only scans, both the relation being scanned and the
+index or indexes being used must be specified. For example, INDEX_SCAN(foo
+foo_a_idx bar bar_b_idx) indicates that an index scan (not an index-only
+scan) should be used on foo_a_idx when scanning foo, and that an index scan
+should be used on bar_b_idx when scanning bar.
+
+Bitmap heap scans currently do not allow for an index specification:
+BITMAP_HEAP_SCAN(foo bar) simply means that each of foo and bar should use
+some sort of bitmap heap scan.
+
+Join Order Advice
+=================
+
+The JOIN_ORDER tag specifies the order in which several tables that are
+part of the same join problem should be joined. Each subquery (except for
+those that are inlined) is a separate join problem. Within a subquery,
+partitionwise joins can create additional, separate join problems. Hence,
+queries involving partitionwise joins may use JOIN_ORDER() many times.
+
+We take the canonical join structure to be an outer-deep tree, so
+JOIN_ORDER(t1 t2 t3) says that t1 is the driving table and should be joined
+first to t2 and then to t3. If the join problem involves additional tables,
+they can be joined in any order after the join between t1, t2, and t3 has
+been constructured. Generated join advice always mentions all tables
+in the join problem, but manually written join advice need not do so.
+
+For trees which are not outer-deep, parentheses can be used. For example,
+JOIN_ORDER(t1 (t2 t3)) says that the top-level join should have t1 on the
+outer side and a join between t2 and t3 on the inner side. That join should
+be constructed so that t2 is on the outer side and t3 is on the inner side.
+
+In some cases, it's not possible to fully specify the join order in this way.
+For example, if t2 and t3 are being scanned by a single custom scan or foreign
+scan, or if a partitionwise join is being performed between those tables, then
+it's impossible to say that t2 is the outer table and t3 is the inner table,
+or the other way around; it's just undefined. In such cases, we generate
+join advice that uses curly braces, intending to indicate a lack of ordering:
+JOIN_ORDER(t1 {t2 t3}) says that the uppermost join should have t1 on the outer
+side and some kind of join between t2 and t3 on the inner side, but without
+saying how that join must be performed or anything about which relation should
+appear on which side of the join, or even whether this kind of join has sides.
+
+Join Strategy Advice
+====================
+
+Tags such as NESTED_LOOP_PLAIN specify the method that should be used to
+perform a certain join. More specifically, NESTED_LOOP_PLAIN(x (y z)) says
+that the plan should put the relation whose identifier is "x" on the inner
+side of a plain nested loop (one without materialization or memoization)
+and that it should also put a join between the relation whose identifier is
+"y" and the relation whose identifier is "z" on the inner side of a nested
+loop. Hence, for an N-table join problem, there will be N-1 pieces of join
+strategy advice; no join strategy advice is required for the outermost
+table in the join problem.
+
+Considering that we have both join order advice and join strategy advice,
+it might seem natural to say that NESTED_LOOP_PLAIN(x) should be redefined
+to mean that x should appear by itself on one side or the other of a nested
+loop, rather than specifically on the inner side, but this definition appears
+useless in practice. It gives the planner too much freedom to do things that
+bear little resemblance to what the user probably had in mind. This makes
+only a limited amount of practical difference in the case of a merge join or
+unparameterized nested loop, but for a parameterized nested loop or a hash
+join, the two sides are treated very differently and saying that a certain
+relation should be involved in one of those operations without saying which
+role it should take isn't saying much.
+
+This choice of definition implies that join strategy advice also imposes some
+join order constraints. For example, given a join between foo and bar,
+HASH_JOIN(bar) implies that foo is the driving table. Otherwise, it would
+be impossible to put bar beneath the inner side of a Hash Join.
+
+Note that, given this definition, it's reasonable to consider deleting the
+join order advice but applying the join strategy advice. For example,
+consider a star schema with tables fact, dim1, dim2, dim3, dim4, and dim5.
+The automatically generated advice might specify JOIN_ORDER(fact dim1 dim3
+dim4 dim2 dim5) HASH_JOIN(dim2 dim4) NESTED_LOOP_PLAIN(dim1 dim3 dim5).
+Deleting the JOIN_ORDER advice allows the planner to reorder the joins
+however it likes while still forcing the same choice of join method. This
+seems potentially useful, and is one reason why a unified syntax that controls
+both join order and join method in a single locution was not chosen.
+
+Advice Completeness
+===================
+
+An essential guiding principle is that no inference may made on the basis
+of the absence of advice. The user is entitled to remove any portion of the
+generated advice which they deem unsuitable or counterproductive and the
+result should only be to increase the flexibility afforded to the planner.
+This means that if advice can say that a certain optimization or technique
+should be used, it should also be able to say that the optimization or
+technique should not be used. We should never assume that the absence of an
+instruction to do a certain thing means that it should not be done; all
+instructions must be explicit.
+
+Semijoin Uniqueness
+===================
+
+Faced with a semijoin, the planner considers both a direct implementation
+and a plan where the one side is made unique and then an inner join is
+performed. We emit SEMIJOIN_UNIQUE() advice when this transformation occurs
+and SEMIJOIN_NON_UNIQUE() advice when it doesn't. These items work like
+join strategy advice: the inner side of the relevant join is named, and the
+chosen join order must be compatible with the advice having some effect.
+
+Partitionwise
+=============
+
+PARTITIONWISE() advise can be used to specify both those partitionwise joins
+which should be performed and those which should not be performed; the idea
+is that each argument to PARTITIONWISE specifies a set of relations that
+should be scanned partitionwise after being joined to each other and nothing
+else. Hence, for example, PARTITIONWISE((t1 t2) t3) specifies that the
+query should contain a partitionwise join between t1 and t2 and that t3
+should not be part of any partitionwise join. If there are no other rels
+in the query, specifying just PARTITIONWISE((t1 t2)) would have the same
+effect, since there would be no other rels to which t3 could be joined in
+a partitionwise fashion.
+
+Parallel Query (Gather, etc.)
+=============================
+
+Each argument to GATHER() or GATHER_MERGE() is a single relation or an
+exact set of relations on top of which a Gather or Gather Merge node,
+respectively, should be placed. Each argument to NO_GATHER() is a single
+relation that should not appear beneath any Gather or Gather Merge node;
+that is, parallelism should not be used.
+
+Implicit Join Order Constraints
+===============================
+
+When JOIN_ORDER() advice is not provided for a particular join problem,
+other pieces of advice may still incidentally constraint the join order.
+For example, a user who specifies HASH_JOIN((foo bar)) is explicitly saying
+that there should be a hash join with exactly foo and bar on the outer
+side of it, but that also implies that foo and bar must be joined to
+each other before either of them is joined to anything else. Otherwise,
+the join the user is attempting to constraint won't actually occur in the
+query, which ends up looking like the system has just decided to ignore
+the advice altogether.
+
+Future Work
+===========
+
+We don't handle choice of aggregation: it would be nice to be able to force
+sorted or grouped aggregation. I'm guessing this can be left to future work.
+
+More seriously, we don't know anything about eager aggregation, which could
+have a large impact on the shape of the plan tree. XXX: This needs some study
+to determine how large a problem it is, and might need to be fixed sooner
+rather than later.
+
+We don't offer any control over estimates, only outcomes. It seems like a
+good idea to incorporate that ability at some future point, as pg_hint_plan
+does. However, since primary goal of the initial development work is to be
+able to induce the planner to recreate a desired plan that worked well in
+the past, this has not been included in the initial development effort.
+
+XXX Need to investigate whether and how well supplying advice works with GEQO
diff --git a/contrib/pg_plan_advice/expected/gather.out b/contrib/pg_plan_advice/expected/gather.out
new file mode 100644
index 00000000000..0cc0dedf859
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/gather.out
@@ -0,0 +1,371 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+CREATE TABLE gt_dim (id serial primary key, dim text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+CREATE TABLE gt_fact (
+ id int not null,
+ dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+ SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------------
+ Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER_MERGE((f d))
+(14 rows)
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------------
+ Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER_MERGE((f d)) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER_MERGE((f d))
+(16 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------------
+ Sort
+ Sort Key: f.dim_id
+ -> Gather
+ Workers Planned: 1
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER((f d)) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER((f d))
+(16 rows)
+
+COMMIT;
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Seq Scan on gt_fact f
+ -> Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: d.id
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER_MERGE(f) /* matched */
+ GATHER_MERGE(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f d)
+ GATHER_MERGE(f d)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Sort
+ Sort Key: f.dim_id
+ -> Gather
+ Workers Planned: 1
+ -> Parallel Seq Scan on gt_fact f
+ -> Sort
+ Sort Key: d.id
+ -> Gather
+ Workers Planned: 1
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER(f) /* matched */
+ GATHER(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f d)
+ GATHER(f d)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Seq Scan on gt_fact f
+ -> Index Scan using gt_dim_pkey on gt_dim d
+ Supplied Plan Advice:
+ GATHER((d d/d.d)) /* partially matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.gt_dim_pkey)
+ GATHER_MERGE(f)
+ NO_GATHER(d)
+(17 rows)
+
+COMMIT;
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Seq Scan on gt_fact f
+ -> Index Scan using gt_dim_pkey on gt_dim d
+ Supplied Plan Advice:
+ GATHER_MERGE(f) /* matched */
+ NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.gt_dim_pkey)
+ GATHER_MERGE(f)
+ NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Sort
+ Sort Key: f.dim_id
+ -> Seq Scan on gt_fact f
+ -> Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: d.id
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER_MERGE(d) /* matched */
+ NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f d)
+ GATHER_MERGE(d)
+ NO_GATHER(f)
+(19 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+--------------------------------------------------
+ Merge Join
+ Merge Cond: (d.id = f.dim_id)
+ -> Index Scan using gt_dim_pkey on gt_dim d
+ -> Sort
+ Sort Key: f.dim_id
+ -> Gather
+ Workers Planned: 1
+ -> Parallel Seq Scan on gt_fact f
+ Supplied Plan Advice:
+ GATHER(f) /* matched */
+ NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ MERGE_JOIN_PLAIN(f)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.gt_dim_pkey)
+ GATHER(f)
+ NO_GATHER(d)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Sort
+ Sort Key: f.dim_id
+ -> Seq Scan on gt_fact f
+ -> Sort
+ Sort Key: d.id
+ -> Gather
+ Workers Planned: 1
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER(d) /* matched */
+ NO_GATHER(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ SEQ_SCAN(f d)
+ GATHER(d)
+ NO_GATHER(f)
+(19 rows)
+
+COMMIT;
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+------------------------------------------------
+ Merge Join
+ Merge Cond: (d.id = f.dim_id)
+ -> Index Scan using gt_dim_pkey on gt_dim d
+ -> Sort
+ Sort Key: f.dim_id
+ -> Seq Scan on gt_fact f
+ Supplied Plan Advice:
+ NO_GATHER(f) /* matched */
+ NO_GATHER(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ MERGE_JOIN_PLAIN(f)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.gt_dim_pkey)
+ NO_GATHER(f d)
+(15 rows)
+
+COMMIT;
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+-------------------------------------------------
+ Gather
+ Disabled: true
+ Workers Planned: 1
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER_MERGE((f d)) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER((f d))
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+-------------------------------------------------
+ Gather
+ Workers Planned: 1
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER((f d)) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER((f d))
+(14 rows)
+
+COMMIT;
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather((f d)) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+ QUERY PLAN
+-------------------------------------------------------
+ Gather Merge
+ Workers Planned: 1
+ -> Sort
+ Sort Key: f.dim_id
+ -> Parallel Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Parallel Seq Scan on gt_fact f
+ -> Parallel Hash
+ -> Parallel Seq Scan on gt_dim d
+ Supplied Plan Advice:
+ GATHER((f d)) /* matched, conflicting, failed */
+ NO_GATHER(f) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ GATHER_MERGE((f d))
+(17 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/join_order.out b/contrib/pg_plan_advice/expected/join_order.out
new file mode 100644
index 00000000000..db0dcef7012
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/join_order.out
@@ -0,0 +1,509 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+ WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+ SELECT g, 'some filler text ' || g, (g % 3) + 1
+ FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+ WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+ SELECT g, 'some filler text ' || g, (g % 7) + 1
+ FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+CREATE TABLE jo_fact (
+ id int primary key,
+ dim1_id integer not null references jo_dim1 (id),
+ dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+ SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+------------------------------------------
+ Hash Join
+ Hash Cond: (f.dim1_id = d1.id)
+ -> Hash Join
+ Hash Cond: (f.dim2_id = d2.id)
+ -> Seq Scan on jo_fact f
+ -> Hash
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ Generated Plan Advice:
+ JOIN_ORDER(f d2 d1)
+ HASH_JOIN(d2 d1)
+ SEQ_SCAN(f d2 d1)
+ NO_GATHER(f d1 d2)
+(16 rows)
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+------------------------------------------
+ Hash Join
+ Hash Cond: (f.dim2_id = d2.id)
+ -> Hash Join
+ Hash Cond: (f.dim1_id = d1.id)
+ -> Seq Scan on jo_fact f
+ -> Hash
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(f d1 d2) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d1 d2)
+ HASH_JOIN(d1 d2)
+ SEQ_SCAN(f d1 d2)
+ NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+------------------------------------------
+ Hash Join
+ Hash Cond: (f.dim1_id = d1.id)
+ -> Hash Join
+ Hash Cond: (f.dim2_id = d2.id)
+ -> Seq Scan on jo_fact f
+ -> Hash
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d2 d1)
+ HASH_JOIN(d2 d1)
+ SEQ_SCAN(f d2 d1)
+ NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-----------------------------------------
+ Hash Join
+ Hash Cond: (f.dim2_id = d2.id)
+ -> Hash Join
+ Hash Cond: (d1.id = f.dim1_id)
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on jo_fact f
+ -> Hash
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(d1 f d2) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d1 f d2)
+ HASH_JOIN(f d2)
+ SEQ_SCAN(d1 f d2)
+ NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Merge Join
+ Merge Cond: ((f.dim2_id = d2.id) AND (f.dim1_id = d1.id))
+ -> Sort
+ Sort Key: f.dim2_id, f.dim1_id
+ -> Seq Scan on jo_fact f
+ -> Sort
+ Sort Key: d2.id, d1.id
+ -> Nested Loop
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Materialize
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(f (d1 d2)) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f (d1 d2))
+ MERGE_JOIN_PLAIN((d1 d2))
+ NESTED_LOOP_MATERIALIZE(d2)
+ SEQ_SCAN(f d1 d2)
+ NO_GATHER(f d1 d2)
+(21 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Merge Join
+ Merge Cond: ((f.dim2_id = d2.id) AND (f.dim1_id = d1.id))
+ -> Sort
+ Sort Key: f.dim2_id, f.dim1_id
+ -> Seq Scan on jo_fact f
+ -> Sort
+ Sort Key: d2.id, d1.id
+ -> Nested Loop
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(f {d1 d2}) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f (d2 d1))
+ MERGE_JOIN_PLAIN((d1 d2))
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(f d2 d1)
+ NO_GATHER(f d1 d2)
+(21 rows)
+
+COMMIT;
+-- Force a join order by mentioning just a prefix of the join list.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+------------------------------------------------
+ Hash Join
+ Hash Cond: (d2.id = f.dim2_id)
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Hash Join
+ Hash Cond: (f.dim1_id = d1.id)
+ -> Seq Scan on jo_fact f
+ -> Hash
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(d2) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d2 (f d1))
+ HASH_JOIN(d1 (f d1))
+ SEQ_SCAN(d2 f d1)
+ NO_GATHER(f d1 d2)
+(18 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Merge Join
+ Merge Cond: ((d2.id = f.dim2_id) AND (d1.id = f.dim1_id))
+ -> Sort
+ Sort Key: d2.id, d1.id
+ -> Nested Loop
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Sort
+ Sort Key: f.dim2_id, f.dim1_id
+ -> Seq Scan on jo_fact f
+ Supplied Plan Advice:
+ JOIN_ORDER(d2 d1) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d2 d1 f)
+ MERGE_JOIN_PLAIN(f)
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(d2 d1 f)
+ NO_GATHER(f d1 d2)
+(21 rows)
+
+COMMIT;
+-- jo_fact is not partitioned, but let's try pretending that it is and
+-- verifying that the advice does not apply.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Disabled: true
+ -> Nested Loop
+ Disabled: true
+ -> Seq Scan on jo_fact f
+ -> Index Scan using jo_dim1_pkey on jo_dim1 d1
+ Index Cond: (id = f.dim1_id)
+ Filter: (val1 = 1)
+ -> Index Scan using jo_dim2_pkey on jo_dim2 d2
+ Index Cond: (id = f.dim2_id)
+ Filter: (val2 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(f/d1 d1 d2) /* partially matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d1 d2)
+ NESTED_LOOP_PLAIN(d1 d2)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d1 public.jo_dim1_pkey d2 public.jo_dim2_pkey)
+ NO_GATHER(f d1 d2)
+(19 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+--------------------------------------------------------------
+ Nested Loop
+ Disabled: true
+ Join Filter: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id))
+ -> Nested Loop
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Materialize
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Seq Scan on jo_fact f
+ Supplied Plan Advice:
+ JOIN_ORDER(f/d1 (d1 d2)) /* partially matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d1 d2 f)
+ NESTED_LOOP_PLAIN(f)
+ NESTED_LOOP_MATERIALIZE(d2)
+ SEQ_SCAN(d1 d2 f)
+ NO_GATHER(f d1 d2)
+(18 rows)
+
+COMMIT;
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Merge Full Join
+ Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Generated Plan Advice:
+ JOIN_ORDER(d2 f d1)
+ MERGE_JOIN_PLAIN(f)
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(d2 f d1)
+ NO_GATHER(d1 f d2)
+(18 rows)
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Disabled: true
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Merge Full Join
+ Disabled: true
+ Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ -> Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+ JOIN_ORDER(f d1 d2) /* partially matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d2 f d1)
+ MERGE_JOIN_PLAIN(f)
+ NESTED_LOOP_PLAIN(d1)
+ SEQ_SCAN(d2 f d1)
+ NO_GATHER(d1 f d2)
+(21 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Merge Full Join
+ Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+ JOIN_ORDER(f d2 d1) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d2 d1)
+ MERGE_JOIN_PLAIN(d2)
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(f d2 d1)
+ NO_GATHER(d1 f d2)
+(20 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Merge Full Join
+ Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+ JOIN_ORDER(d2 f d1) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d2 f d1)
+ MERGE_JOIN_PLAIN(f)
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(d2 f d1)
+ NO_GATHER(d1 f d2)
+(20 rows)
+
+COMMIT;
+-- Two incompatible join orders should conflict. In the second case,
+-- the conflict is implicit: if d1 is on the inner side of a join of any
+-- type, it cannot also be the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Merge Full Join
+ Merge Cond: (((f.dim2_id + 0)) = ((d2.id + 0)))
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Supplied Plan Advice:
+ JOIN_ORDER(f) /* matched, conflicting */
+ JOIN_ORDER(d1) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f d2 d1)
+ MERGE_JOIN_PLAIN(d2)
+ NESTED_LOOP_MATERIALIZE(d1)
+ SEQ_SCAN(f d2 d1)
+ NO_GATHER(d1 f d2)
+(21 rows)
+
+SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((d1.id = f.dim1_id) OR (f.dim1_id IS NULL))
+ -> Seq Scan on jo_dim1 d1
+ -> Materialize
+ -> Merge Full Join
+ Merge Cond: (((d2.id + 0)) = ((f.dim2_id + 0)))
+ -> Sort
+ Sort Key: ((d2.id + 0))
+ -> Seq Scan on jo_dim2 d2
+ -> Sort
+ Sort Key: ((f.dim2_id + 0))
+ -> Seq Scan on jo_fact f
+ Supplied Plan Advice:
+ JOIN_ORDER(d1) /* matched, conflicting */
+ HASH_JOIN(d1) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(d1 (d2 f))
+ MERGE_JOIN_PLAIN(f)
+ NESTED_LOOP_MATERIALIZE((f d2))
+ SEQ_SCAN(d1 d2 f)
+ NO_GATHER(d1 f d2)
+(21 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/join_strategy.out b/contrib/pg_plan_advice/expected/join_strategy.out
new file mode 100644
index 00000000000..0f9db692190
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/join_strategy.out
@@ -0,0 +1,339 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE join_dim (id serial primary key, dim text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+CREATE TABLE join_fact (
+ id int primary key,
+ dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+ SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Seq Scan on join_fact f
+ -> Hash
+ -> Seq Scan on join_dim d
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ NO_GATHER(f d)
+(10 rows)
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Seq Scan on join_fact f
+ -> Hash
+ -> Seq Scan on join_dim d
+ Supplied Plan Advice:
+ HASH_JOIN(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------------------
+ Merge Join
+ Disabled: true
+ Merge Cond: (f.dim_id = d.id)
+ -> Index Scan using join_fact_dim_id on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+ MERGE_JOIN_MATERIALIZE(d) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------------------
+ Merge Join
+ Merge Cond: (f.dim_id = d.id)
+ -> Index Scan using join_fact_dim_id on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Supplied Plan Advice:
+ MERGE_JOIN_PLAIN(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ MERGE_JOIN_PLAIN(d)
+ INDEX_SCAN(f public.join_fact_dim_id d public.join_dim_pkey)
+ NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+--------------------------------------------
+ Nested Loop
+ Join Filter: (f.dim_id = d.id)
+ -> Seq Scan on join_fact f
+ -> Materialize
+ -> Seq Scan on join_dim d
+ Supplied Plan Advice:
+ NESTED_LOOP_MATERIALIZE(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ NESTED_LOOP_MATERIALIZE(d)
+ SEQ_SCAN(f d)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------------
+ Nested Loop
+ -> Seq Scan on join_fact f
+ -> Memoize
+ Cache Key: f.dim_id
+ Cache Mode: logical
+ -> Index Scan using join_dim_pkey on join_dim d
+ Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+ NESTED_LOOP_MEMOIZE(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ NESTED_LOOP_MEMOIZE(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.join_dim_pkey)
+ NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ -> Seq Scan on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+ NESTED_LOOP_PLAIN(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ NESTED_LOOP_PLAIN(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.join_dim_pkey)
+ NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+-------------------------------------
+ Hash Join
+ Hash Cond: (d.id = f.dim_id)
+ -> Seq Scan on join_dim d
+ -> Hash
+ -> Seq Scan on join_fact f
+ Supplied Plan Advice:
+ HASH_JOIN(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ HASH_JOIN(f)
+ SEQ_SCAN(d f)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------------------
+ Merge Join
+ Merge Cond: (d.id = f.dim_id)
+ -> Index Scan using join_dim_pkey on join_dim d
+ -> Materialize
+ -> Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+ MERGE_JOIN_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ MERGE_JOIN_MATERIALIZE(f)
+ INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------------------
+ Merge Join
+ Merge Cond: (d.id = f.dim_id)
+ -> Index Scan using join_dim_pkey on join_dim d
+ -> Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+ MERGE_JOIN_PLAIN(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ MERGE_JOIN_PLAIN(f)
+ INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+ NO_GATHER(f d)
+(11 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+--------------------------------------------
+ Nested Loop
+ Join Filter: (f.dim_id = d.id)
+ -> Seq Scan on join_dim d
+ -> Materialize
+ -> Seq Scan on join_fact f
+ Supplied Plan Advice:
+ NESTED_LOOP_MATERIALIZE(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ NESTED_LOOP_MATERIALIZE(f)
+ SEQ_SCAN(d f)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+--------------------------------------------------------------
+ Nested Loop
+ -> Seq Scan on join_dim d
+ -> Memoize
+ Cache Key: d.id
+ Cache Mode: logical
+ -> Index Scan using join_fact_dim_id on join_fact f
+ Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+ NESTED_LOOP_MEMOIZE(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ NESTED_LOOP_MEMOIZE(f)
+ SEQ_SCAN(d)
+ INDEX_SCAN(f public.join_fact_dim_id)
+ NO_GATHER(f d)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+--------------------------------------------------------
+ Nested Loop
+ -> Seq Scan on join_dim d
+ -> Index Scan using join_fact_dim_id on join_fact f
+ Index Cond: (dim_id = d.id)
+ Supplied Plan Advice:
+ NESTED_LOOP_PLAIN(f) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ NESTED_LOOP_PLAIN(f)
+ SEQ_SCAN(d)
+ INDEX_SCAN(f public.join_fact_dim_id)
+ NO_GATHER(f d)
+(12 rows)
+
+COMMIT;
+-- Non-working cases. We can't force a foreign join between these tables,
+-- because they aren't foreign tables. We also can't use two different
+-- strategies on the same table, nor can we put both tables on the inner
+-- side of the same join.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ Disabled: true
+ -> Seq Scan on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+ FOREIGN_JOIN((f d)) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ NESTED_LOOP_PLAIN(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.join_dim_pkey)
+ NO_GATHER(f d)
+(13 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Merge Join
+ Merge Cond: (d.id = f.dim_id)
+ -> Index Scan using join_dim_pkey on join_dim d
+ -> Index Scan using join_fact_dim_id on join_fact f
+ Supplied Plan Advice:
+ NESTED_LOOP_PLAIN(f) /* matched, conflicting, failed */
+ NESTED_LOOP_MATERIALIZE(f) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(d f)
+ MERGE_JOIN_PLAIN(f)
+ INDEX_SCAN(d public.join_dim_pkey f public.join_fact_dim_id)
+ NO_GATHER(f d)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ Disabled: true
+ -> Seq Scan on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+ NESTED_LOOP_PLAIN(f) /* matched, failed */
+ NESTED_LOOP_PLAIN(d) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ NESTED_LOOP_PLAIN(d)
+ SEQ_SCAN(f)
+ INDEX_SCAN(d public.join_dim_pkey)
+ NO_GATHER(f d)
+(14 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/local_collector.out b/contrib/pg_plan_advice/expected/local_collector.out
new file mode 100644
index 00000000000..30c07682cea
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/local_collector.out
@@ -0,0 +1,69 @@
+CREATE EXTENSION pg_plan_advice;
+SET debug_parallel_query = off;
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice
+---------------------------------
+
+(1 row)
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+-- Enable the collector.
+SET pg_plan_advice.local_collector = on;
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+ WITH (autovacuum_enabled = false, parallel_workers = 0);
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+ a | b | a | b
+---+---+---+---
+(0 rows)
+
+SELECT * FROM dummy_table;
+ a | b
+---+---
+(0 rows)
+
+-- Should return the advice from the second test query.
+SET pg_plan_advice.local_collector = off;
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id DESC LIMIT 1;
+ advice
+------------------------
+ SEQ_SCAN(dummy_table) +
+ NO_GATHER(dummy_table)
+(1 row)
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice
+---------------------------------
+
+(1 row)
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+SET pg_plan_advice.local_collector = on;
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+ FOR x IN 1..2000 LOOP
+ EXECUTE 'SELECT * FROM dummy_table';
+ END LOOP;
+END
+$$;
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+ count
+-------
+ 2000
+(1 row)
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
+ pg_clear_collected_local_advice
+---------------------------------
+
+(1 row)
+
diff --git a/contrib/pg_plan_advice/expected/partitionwise.out b/contrib/pg_plan_advice/expected/partitionwise.out
new file mode 100644
index 00000000000..2b3d0a82443
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/partitionwise.out
@@ -0,0 +1,426 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+ SELECT g, 'some filler text ' || g, (g % 3) + 1
+ FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+ SELECT g, 'some other text ' || g, (g % 5) + 1
+ FROM generate_series(1,3000,2) g;
+VACUUM ANALYZE pt2;
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+ SELECT g, 'a third random text ' || g, (g % 7) + 1
+ FROM generate_series(1,3000,3) g;
+VACUUM ANALYZE pt3;
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+ PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+ FOR VALUES FROM (1) to (1501)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+ FOR VALUES FROM (1501) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+ SELECT g, 'yet another text ' || g, (g % 2) + 1
+ FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Append
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_1.id = pt3_1.id)
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Index Scan using pt1a_pkey on pt1a pt1_1
+ Index Cond: (id = pt2_1.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_2.id = pt3_2.id)
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Index Scan using pt1b_pkey on pt1b pt1_2
+ Index Cond: (id = pt2_2.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_3.id = pt3_3.id)
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ -> Index Scan using pt1c_pkey on pt1c pt1_3
+ Index Cond: (id = pt2_3.id)
+ Filter: (val1 = 1)
+ Generated Plan Advice:
+ JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a)
+ JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+ JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+ NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+ HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+ SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b
+ pt2/public.pt2c pt3/public.pt3c)
+ INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+ pt1/public.pt1c public.pt1c_pkey)
+ PARTITIONWISE((pt1 pt2 pt3))
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2.id = pt3.id)
+ -> Append
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Append
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ -> Append
+ -> Index Scan using pt1a_pkey on pt1a pt1_1
+ Index Cond: (id = pt2.id)
+ Filter: (val1 = 1)
+ -> Index Scan using pt1b_pkey on pt1b pt1_2
+ Index Cond: (id = pt2.id)
+ Filter: (val1 = 1)
+ -> Index Scan using pt1c_pkey on pt1c pt1_3
+ Index Cond: (id = pt2.id)
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ PARTITIONWISE(pt1) /* matched */
+ PARTITIONWISE(pt2) /* matched */
+ PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(pt2 pt3 pt1)
+ NESTED_LOOP_PLAIN(pt1)
+ HASH_JOIN(pt3)
+ SEQ_SCAN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a
+ pt3/public.pt3b pt3/public.pt3c)
+ INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+ pt1/public.pt1c public.pt1c_pkey)
+ PARTITIONWISE(pt2 pt3 pt1)
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(43 rows)
+
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (pt1.id = pt3.id)
+ -> Append
+ -> Hash Join
+ Hash Cond: (pt1_1.id = pt2_1.id)
+ -> Seq Scan on pt1a pt1_1
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Hash Join
+ Hash Cond: (pt1_2.id = pt2_2.id)
+ -> Seq Scan on pt1b pt1_2
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Hash Join
+ Hash Cond: (pt1_3.id = pt2_3.id)
+ -> Seq Scan on pt1c pt1_3
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Append
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ Supplied Plan Advice:
+ PARTITIONWISE((pt1 pt2)) /* matched */
+ PARTITIONWISE(pt3) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(pt1/public.pt1a pt2/public.pt2a)
+ JOIN_ORDER(pt1/public.pt1b pt2/public.pt2b)
+ JOIN_ORDER(pt1/public.pt1c pt2/public.pt2c)
+ JOIN_ORDER({pt1 pt2} pt3)
+ HASH_JOIN(pt2/public.pt2a pt2/public.pt2b pt2/public.pt2c pt3)
+ SEQ_SCAN(pt1/public.pt1a pt2/public.pt2a pt1/public.pt1b pt2/public.pt2b
+ pt1/public.pt1c pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b
+ pt3/public.pt3c)
+ PARTITIONWISE((pt1 pt2) pt3)
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(47 rows)
+
+COMMIT;
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) (pt1 pt3))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Append
+ Disabled: true
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_1.id = pt3_1.id)
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Index Scan using pt1a_pkey on pt1a pt1_1
+ Index Cond: (id = pt2_1.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_2.id = pt3_2.id)
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Index Scan using pt1b_pkey on pt1b pt1_2
+ Index Cond: (id = pt2_2.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_3.id = pt3_3.id)
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ -> Index Scan using pt1c_pkey on pt1c pt1_3
+ Index Cond: (id = pt2_3.id)
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ PARTITIONWISE((pt1 pt2)) /* matched, conflicting, failed */
+ PARTITIONWISE((pt1 pt3)) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(pt2/public.pt2a pt3/public.pt3a pt1/public.pt1a)
+ JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+ JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+ NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+ HASH_JOIN(pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+ SEQ_SCAN(pt2/public.pt2a pt3/public.pt3a pt2/public.pt2b pt3/public.pt3b
+ pt2/public.pt2c pt3/public.pt3c)
+ INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+ pt1/public.pt1c public.pt1c_pkey)
+ PARTITIONWISE((pt1 pt2 pt3))
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(51 rows)
+
+COMMIT;
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Nested Loop
+ Disabled: true
+ -> Append
+ -> Seq Scan on pt1a pt1_1
+ -> Seq Scan on pt1b pt1_2
+ -> Seq Scan on pt1c pt1_3
+ -> Append
+ -> Index Scan using ptmismatcha_pkey on ptmismatcha ptmismatch_1
+ Index Cond: (id = pt1.id)
+ -> Index Scan using ptmismatchb_pkey on ptmismatchb ptmismatch_2
+ Index Cond: (id = pt1.id)
+ Supplied Plan Advice:
+ PARTITIONWISE((pt1 ptmismatch)) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(pt1 ptmismatch)
+ NESTED_LOOP_PLAIN(ptmismatch)
+ SEQ_SCAN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+ INDEX_SCAN(ptmismatch/public.ptmismatcha public.ptmismatcha_pkey
+ ptmismatch/public.ptmismatchb public.ptmismatchb_pkey)
+ PARTITIONWISE(pt1 ptmismatch)
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c
+ ptmismatch/public.ptmismatcha ptmismatch/public.ptmismatchb)
+(22 rows)
+
+COMMIT;
+-- Force join order for a particular branch of the partitionwise join with
+-- and without mentioning the schema name.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Append
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt3_1.id = pt2_1.id)
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Hash
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Index Scan using pt1a_pkey on pt1a pt1_1
+ Index Cond: (id = pt2_1.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_2.id = pt3_2.id)
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Index Scan using pt1b_pkey on pt1b pt1_2
+ Index Cond: (id = pt2_2.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_3.id = pt3_3.id)
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ -> Index Scan using pt1c_pkey on pt1c pt1_3
+ Index Cond: (id = pt2_3.id)
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)
+ JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+ JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+ NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+ HASH_JOIN(pt2/public.pt2a pt3/public.pt3b pt3/public.pt3c)
+ SEQ_SCAN(pt3/public.pt3a pt2/public.pt2a pt2/public.pt2b pt3/public.pt3b
+ pt2/public.pt2c pt3/public.pt3c)
+ INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+ pt1/public.pt1c public.pt1c_pkey)
+ PARTITIONWISE((pt1 pt2 pt3))
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(49 rows)
+
+SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Append
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt3_1.id = pt2_1.id)
+ -> Seq Scan on pt3a pt3_1
+ Filter: (val3 = 1)
+ -> Hash
+ -> Seq Scan on pt2a pt2_1
+ Filter: (val2 = 1)
+ -> Index Scan using pt1a_pkey on pt1a pt1_1
+ Index Cond: (id = pt2_1.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_2.id = pt3_2.id)
+ -> Seq Scan on pt2b pt2_2
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3b pt3_2
+ Filter: (val3 = 1)
+ -> Index Scan using pt1b_pkey on pt1b pt1_2
+ Index Cond: (id = pt2_2.id)
+ Filter: (val1 = 1)
+ -> Nested Loop
+ -> Hash Join
+ Hash Cond: (pt2_3.id = pt3_3.id)
+ -> Seq Scan on pt2c pt2_3
+ Filter: (val2 = 1)
+ -> Hash
+ -> Seq Scan on pt3c pt3_3
+ Filter: (val3 = 1)
+ -> Index Scan using pt1c_pkey on pt1c pt1_3
+ Index Cond: (id = pt2_3.id)
+ Filter: (val1 = 1)
+ Supplied Plan Advice:
+ JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)
+ JOIN_ORDER(pt2/public.pt2b pt3/public.pt3b pt1/public.pt1b)
+ JOIN_ORDER(pt2/public.pt2c pt3/public.pt3c pt1/public.pt1c)
+ NESTED_LOOP_PLAIN(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c)
+ HASH_JOIN(pt2/public.pt2a pt3/public.pt3b pt3/public.pt3c)
+ SEQ_SCAN(pt3/public.pt3a pt2/public.pt2a pt2/public.pt2b pt3/public.pt3b
+ pt2/public.pt2c pt3/public.pt3c)
+ INDEX_SCAN(pt1/public.pt1a public.pt1a_pkey pt1/public.pt1b public.pt1b_pkey
+ pt1/public.pt1c public.pt1c_pkey)
+ PARTITIONWISE((pt1 pt2 pt3))
+ NO_GATHER(pt1/public.pt1a pt1/public.pt1b pt1/public.pt1c pt2/public.pt2a
+ pt2/public.pt2b pt2/public.pt2c pt3/public.pt3a pt3/public.pt3b pt3/public.pt3c)
+(49 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/prepared.out b/contrib/pg_plan_advice/expected/prepared.out
new file mode 100644
index 00000000000..07a7c623659
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/prepared.out
@@ -0,0 +1,67 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE ptab (id integer, val text) WITH (autovacuum_enabled = false);
+SET pg_plan_advice.always_store_advice_details = false;
+-- Not prepared, so advice should be generated.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM ptab;
+ QUERY PLAN
+------------------------
+ Seq Scan on ptab
+ Generated Plan Advice:
+ SEQ_SCAN(ptab)
+ NO_GATHER(ptab)
+(4 rows)
+
+-- Prepared, so advice should not be generated.
+PREPARE pt1 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt1;
+ QUERY PLAN
+------------------
+ Seq Scan on ptab
+(1 row)
+
+SET pg_plan_advice.always_store_advice_details = true;
+-- Prepared, but always_store_advice_details = true, so should show advice.
+PREPARE pt2 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2;
+ QUERY PLAN
+------------------------
+ Seq Scan on ptab
+ Generated Plan Advice:
+ SEQ_SCAN(ptab)
+ NO_GATHER(ptab)
+(4 rows)
+
+-- Not prepared, so feedback should be generated.
+SET pg_plan_advice.always_store_advice_details = false;
+SET pg_plan_advice.advice = 'SEQ_SCAN(ptab)';
+EXPLAIN (COSTS OFF)
+SELECT * FROM ptab;
+ QUERY PLAN
+--------------------------------
+ Seq Scan on ptab
+ Supplied Plan Advice:
+ SEQ_SCAN(ptab) /* matched */
+(3 rows)
+
+-- Prepared, so advice should not be generated.
+PREPARE pt3 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF) EXECUTE pt1;
+ QUERY PLAN
+------------------
+ Seq Scan on ptab
+(1 row)
+
+SET pg_plan_advice.always_store_advice_details = true;
+-- Prepared, but always_store_advice_details = true, so should show feedback.
+PREPARE pt4 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2;
+ QUERY PLAN
+------------------------
+ Seq Scan on ptab
+ Generated Plan Advice:
+ SEQ_SCAN(ptab)
+ NO_GATHER(ptab)
+(4 rows)
+
diff --git a/contrib/pg_plan_advice/expected/scan.out b/contrib/pg_plan_advice/expected/scan.out
new file mode 100644
index 00000000000..3f9e13b6d41
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/scan.out
@@ -0,0 +1,757 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+CREATE TABLE scan_table (a int primary key, b text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+ SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+-------------------------
+ Seq Scan on scan_table
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(4 rows)
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(5 rows)
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(5 rows)
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+ QUERY PLAN
+-----------------------------------------------
+ Bitmap Heap Scan on scan_table
+ Recheck Cond: (b > 'some text 8'::text)
+ -> Bitmap Index Scan on scan_table_b
+ Index Cond: (b > 'some text 8'::text)
+ Generated Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+ QUERY PLAN
+-----------------------------------
+ Tid Scan on scan_table
+ TID Cond: (ctid = '(0,1)'::tid)
+ Generated Plan Advice:
+ TID_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(5 rows)
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+ QUERY PLAN
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+ TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Generated Plan Advice:
+ TID_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(5 rows)
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+--------------------------------------
+ Seq Scan on scan_table
+ Supplied Plan Advice:
+ SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(6 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+ QUERY PLAN
+-----------------------------------------------
+ Bitmap Heap Scan on scan_table
+ Recheck Cond: (b > 'some text 8'::text)
+ -> Bitmap Index Scan on scan_table_b
+ Index Cond: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(9 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+ QUERY PLAN
+--------------------------------------
+ Tid Scan on scan_table
+ TID Cond: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+ TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ TID_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+ QUERY PLAN
+---------------------------------------------------------------
+ Tid Range Scan on scan_table
+ TID Cond: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+ TID_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ TID_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on scan_table
+ Disabled: true
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+ Disabled: true
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+------------------------------------------------------
+ Seq Scan on scan_table
+ Disabled: true
+ Supplied Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+ QUERY PLAN
+----------------------------------------------
+ Seq Scan on scan_table
+ Disabled: true
+ Supplied Plan Advice:
+ TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+ QUERY PLAN
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a > 0)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Seq Scan on scan_table
+ Disabled: true
+ Filter: (a > 0)
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+ QUERY PLAN
+----------------------------------------------
+ Bitmap Heap Scan on scan_table
+ Recheck Cond: (a > 0)
+ -> Bitmap Index Scan on scan_table_pkey
+ Index Cond: (a > 0)
+ Supplied Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ BITMAP_HEAP_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(9 rows)
+
+COMMIT;
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+--------------------------------------
+ Seq Scan on scan_table
+ Filter: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+---------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Disabled: true
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(scan_table scan_table_pkey) /* matched, failed */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Disabled: true
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ TID_SCAN(scan_table) /* matched, failed */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Disabled: true
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+ QUERY PLAN
+--------------------------------------
+ Seq Scan on scan_table
+ Filter: (b > 'some text 8'::text)
+ Supplied Plan Advice:
+ SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Disabled: true
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_b) /* matched, failed */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+ QUERY PLAN
+--------------------------------------
+ Seq Scan on scan_table
+ Filter: (ctid = '(0,1)'::tid)
+ Supplied Plan Advice:
+ SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+ QUERY PLAN
+-------------------------------------------------------------
+ Seq Scan on scan_table
+ Filter: ((ctid > '(1,1)'::tid) AND (ctid < '(2,1)'::tid))
+ Supplied Plan Advice:
+ SEQ_SCAN(scan_table) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(scan_table)
+ NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table cilbup.scan_table_pkey) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+--------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+ QUERY PLAN
+----------------------------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table scan_table_pkey) /* matched, conflicting */
+ INDEX_SCAN(scan_table public.scan_table_pkey) /* matched, conflicting */
+ Generated Plan Advice:
+ INDEX_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(8 rows)
+
+COMMIT;
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(nothing) /* not matched */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_SCAN(scan_table bogus) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+---------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(nothing whatsoever) /* not matched */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Index Only Scan using scan_table_pkey on scan_table
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ INDEX_ONLY_SCAN(scan_table bogus) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+ INDEX_ONLY_SCAN(scan_table public.scan_table_pkey)
+ NO_GATHER(scan_table)
+(7 rows)
+
+COMMIT;
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+ QUERY PLAN
+-------------------------------------------------------------------
+ Nested Loop Left Join
+ -> Nested Loop Left Join
+ -> Function Scan on generate_series g
+ -> Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = g.g)
+ -> Index Scan using scan_table_pkey on scan_table s_1
+ Index Cond: (a = g.g)
+ Generated Plan Advice:
+ JOIN_ORDER(g s s#2)
+ NESTED_LOOP_PLAIN(s s#2)
+ INDEX_SCAN(s public.scan_table_pkey s#2 public.scan_table_pkey)
+ NO_GATHER(g s s#2)
+(12 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+ QUERY PLAN
+----------------------------------------------------------
+ Nested Loop Left Join
+ -> Hash Left Join
+ Hash Cond: (g.g = s.a)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> Seq Scan on scan_table s
+ -> Index Scan using scan_table_pkey on scan_table s_1
+ Index Cond: (a = g.g)
+ Supplied Plan Advice:
+ SEQ_SCAN(s) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(g s s#2)
+ NESTED_LOOP_PLAIN(s#2)
+ HASH_JOIN(s)
+ SEQ_SCAN(s)
+ INDEX_SCAN(s#2 public.scan_table_pkey)
+ NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+ QUERY PLAN
+--------------------------------------------------------------
+ Hash Left Join
+ Hash Cond: (g.g = s_1.a)
+ -> Nested Loop Left Join
+ -> Function Scan on generate_series g
+ -> Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = g.g)
+ -> Hash
+ -> Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+ SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(g s s#2)
+ NESTED_LOOP_PLAIN(s)
+ HASH_JOIN(s#2)
+ SEQ_SCAN(s#2)
+ INDEX_SCAN(s public.scan_table_pkey)
+ NO_GATHER(g s s#2)
+(17 rows)
+
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+ QUERY PLAN
+------------------------------------------------
+ Hash Left Join
+ Hash Cond: (g.g = s_1.a)
+ -> Hash Left Join
+ Hash Cond: (g.g = s.a)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> Seq Scan on scan_table s
+ -> Hash
+ -> Seq Scan on scan_table s_1
+ Supplied Plan Advice:
+ SEQ_SCAN(s) /* matched */
+ SEQ_SCAN(s#2) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(g s s#2)
+ HASH_JOIN(s s#2)
+ SEQ_SCAN(s s#2)
+ NO_GATHER(g s s#2)
+(17 rows)
+
+COMMIT;
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+ QUERY PLAN
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Generated Plan Advice:
+ INDEX_SCAN(s@x public.scan_table_pkey)
+ NO_GATHER(x s@x)
+(5 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+ QUERY PLAN
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Generated Plan Advice:
+ INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+ NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(5 rows)
+
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+ QUERY PLAN
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+ INDEX_SCAN(s@x public.scan_table_pkey)
+ NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+ QUERY PLAN
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s) /* not matched */
+ Generated Plan Advice:
+ INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+ NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+ QUERY PLAN
+-------------------------------
+ Seq Scan on scan_table s
+ Filter: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s@x) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(s@x)
+ NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+ QUERY PLAN
+---------------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s@x) /* not matched */
+ Generated Plan Advice:
+ INDEX_SCAN(s@unnamed_subquery public.scan_table_pkey)
+ NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+ QUERY PLAN
+--------------------------------------------------
+ Index Scan using scan_table_pkey on scan_table s
+ Index Cond: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s@unnamed_subquery) /* not matched */
+ Generated Plan Advice:
+ INDEX_SCAN(s@x public.scan_table_pkey)
+ NO_GATHER(x s@x)
+(7 rows)
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+ QUERY PLAN
+--------------------------------------------------
+ Seq Scan on scan_table s
+ Filter: (a = 1)
+ Supplied Plan Advice:
+ SEQ_SCAN(s@unnamed_subquery) /* matched */
+ Generated Plan Advice:
+ SEQ_SCAN(s@unnamed_subquery)
+ NO_GATHER(unnamed_subquery s@unnamed_subquery)
+(7 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/semijoin.out b/contrib/pg_plan_advice/expected/semijoin.out
new file mode 100644
index 00000000000..5551c028a1f
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/semijoin.out
@@ -0,0 +1,377 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+CREATE TABLE sj_wide (
+ id integer primary key,
+ val1 integer,
+ padding text storage plain
+) WITH (autovacuum_enabled = false);
+INSERT INTO sj_wide
+ SELECT g, g%10+1, repeat(' ', 300) FROM generate_series(1, 1000) g;
+CREATE INDEX ON sj_wide (val1);
+VACUUM ANALYZE sj_wide;
+CREATE TABLE sj_narrow (
+ id integer primary key,
+ val1 integer
+) WITH (autovacuum_enabled = false);
+INSERT INTO sj_narrow
+ SELECT g, g%10+1 FROM generate_series(1, 1000) g;
+CREATE INDEX ON sj_narrow (val1);
+VACUUM ANALYZE sj_narrow;
+-- We expect this to make the VALUES list unique and use index lookups to
+-- find the rows in sj_wide, so as to avoid a full scan of sj_wide.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+-----------------------------------------------------------
+ Nested Loop
+ -> HashAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using sj_wide_pkey on sj_wide
+ Index Cond: (id = "*VALUES*".column1)
+ Filter: (val1 = "*VALUES*".column2)
+ Generated Plan Advice:
+ JOIN_ORDER("*VALUES*" sj_wide)
+ NESTED_LOOP_PLAIN(sj_wide)
+ INDEX_SCAN(sj_wide public.sj_wide_pkey)
+ SEMIJOIN_UNIQUE("*VALUES*")
+ NO_GATHER(sj_wide "*VALUES*")
+(13 rows)
+
+-- If we ask for a unique semijoin, we should get the same plan as with
+-- no advice. If we ask for a non-unique semijoin, we should see a Semi
+-- Join operation in the plan tree.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+-----------------------------------------------------------
+ Nested Loop
+ -> HashAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using sj_wide_pkey on sj_wide
+ Index Cond: (id = "*VALUES*".column1)
+ Filter: (val1 = "*VALUES*".column2)
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE("*VALUES*") /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER("*VALUES*" sj_wide)
+ NESTED_LOOP_PLAIN(sj_wide)
+ INDEX_SCAN(sj_wide public.sj_wide_pkey)
+ SEMIJOIN_UNIQUE("*VALUES*")
+ NO_GATHER(sj_wide "*VALUES*")
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: ((sj_wide.id = "*VALUES*".column1) AND (sj_wide.val1 = "*VALUES*".column2))
+ -> Seq Scan on sj_wide
+ -> Hash
+ -> Values Scan on "*VALUES*"
+ Supplied Plan Advice:
+ SEMIJOIN_NON_UNIQUE("*VALUES*") /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_wide "*VALUES*")
+ HASH_JOIN("*VALUES*")
+ SEQ_SCAN(sj_wide)
+ SEMIJOIN_NON_UNIQUE("*VALUES*")
+ NO_GATHER(sj_wide "*VALUES*")
+(13 rows)
+
+COMMIT;
+-- Because this table is narrower than the previous one, a sequential scan
+-- is less expensive, and we choose a straightforward Semi Join plan by
+-- default. (Note that this is also very sensitive to the length of the IN
+-- list, which affects how many index lookups the alternative plan will need.)
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2))
+ -> Seq Scan on sj_narrow
+ -> Hash
+ -> Values Scan on "*VALUES*"
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow "*VALUES*")
+ HASH_JOIN("*VALUES*")
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_NON_UNIQUE("*VALUES*")
+ NO_GATHER(sj_narrow "*VALUES*")
+(11 rows)
+
+-- Here, we expect advising a unique semijoin to swith to the same plan that
+-- we got with sj_wide, and advising a non-unique semijoin should not change
+-- the plan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2))
+ -> Seq Scan on sj_narrow
+ -> Hash
+ -> HashAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE("*VALUES*") /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow "*VALUES*")
+ HASH_JOIN("*VALUES*")
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE("*VALUES*")
+ NO_GATHER(sj_narrow "*VALUES*")
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: ((sj_narrow.id = "*VALUES*".column1) AND (sj_narrow.val1 = "*VALUES*".column2))
+ -> Seq Scan on sj_narrow
+ -> Hash
+ -> Values Scan on "*VALUES*"
+ Supplied Plan Advice:
+ SEMIJOIN_NON_UNIQUE("*VALUES*") /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow "*VALUES*")
+ HASH_JOIN("*VALUES*")
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_NON_UNIQUE("*VALUES*")
+ NO_GATHER(sj_narrow "*VALUES*")
+(13 rows)
+
+COMMIT;
+-- In the above example, we made the outer side of the join unique, but here,
+-- we should make the inner side unique.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+------------------------------------------
+ Hash Join
+ Hash Cond: (g.g = sj_narrow.val1)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ Generated Plan Advice:
+ JOIN_ORDER(g sj_narrow)
+ HASH_JOIN(sj_narrow)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(13 rows)
+
+-- We should be able to force a plan with or without the make-unique strategy,
+-- with either side as the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+--------------------------------------------
+ Hash Join
+ Hash Cond: (g.g = sj_narrow.val1)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE(sj_narrow) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(g sj_narrow)
+ HASH_JOIN(sj_narrow)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (g.g = sj_narrow.val1)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> Seq Scan on sj_narrow
+ Supplied Plan Advice:
+ SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(g sj_narrow)
+ HASH_JOIN(sj_narrow)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_NON_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(13 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) join_order(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+------------------------------------------------
+ Hash Join
+ Hash Cond: (sj_narrow.val1 = g.g)
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ -> Hash
+ -> Function Scan on generate_series g
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE(sj_narrow) /* matched */
+ JOIN_ORDER(sj_narrow) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow g)
+ HASH_JOIN(g)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(16 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow) join_order(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+------------------------------------------------
+ Hash Right Semi Join
+ Hash Cond: (sj_narrow.val1 = g.g)
+ -> Seq Scan on sj_narrow
+ -> Hash
+ -> Function Scan on generate_series g
+ Supplied Plan Advice:
+ SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched */
+ JOIN_ORDER(sj_narrow) /* matched */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow g)
+ HASH_JOIN(g)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_NON_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(14 rows)
+
+COMMIT;
+-- However, mentioning the wrong side of the join should result in an advice
+-- failure.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+--------------------------------------------
+ Nested Loop
+ Disabled: true
+ Join Filter: (g.g = sj_narrow.val1)
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ -> Function Scan on generate_series g
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE(g) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow g)
+ NESTED_LOOP_PLAIN(g)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(15 rows)
+
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+------------------------------------------------
+ Nested Loop
+ Disabled: true
+ Join Filter: (g.g = sj_narrow.val1)
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ -> Function Scan on generate_series g
+ Supplied Plan Advice:
+ SEMIJOIN_NON_UNIQUE(g) /* matched, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(sj_narrow g)
+ NESTED_LOOP_PLAIN(g)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(15 rows)
+
+COMMIT;
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) semijoin_non_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+ QUERY PLAN
+---------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (g.g = sj_narrow.val1)
+ -> Function Scan on generate_series g
+ -> Hash
+ -> HashAggregate
+ Group Key: sj_narrow.val1
+ -> Seq Scan on sj_narrow
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE(sj_narrow) /* matched, conflicting */
+ SEMIJOIN_NON_UNIQUE(sj_narrow) /* matched, conflicting, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(g sj_narrow)
+ HASH_JOIN(sj_narrow)
+ SEQ_SCAN(sj_narrow)
+ SEMIJOIN_UNIQUE(sj_narrow)
+ NO_GATHER(g sj_narrow)
+(16 rows)
+
+COMMIT;
+-- Try applying SEMIJOIN_UNIQUE() to a non-semijoin.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g, sj_narrow s WHERE g = s.val1;
+ QUERY PLAN
+----------------------------------------------------------
+ Merge Join
+ Merge Cond: (s.val1 = g.g)
+ -> Index Scan using sj_narrow_val1_idx on sj_narrow s
+ -> Sort
+ Sort Key: g.g
+ -> Function Scan on generate_series g
+ Supplied Plan Advice:
+ SEMIJOIN_UNIQUE(g) /* matched, inapplicable, failed */
+ Generated Plan Advice:
+ JOIN_ORDER(s g)
+ MERGE_JOIN_PLAIN(g)
+ INDEX_SCAN(s public.sj_narrow_val1_idx)
+ NO_GATHER(g s)
+(13 rows)
+
+COMMIT;
diff --git a/contrib/pg_plan_advice/expected/syntax.out b/contrib/pg_plan_advice/expected/syntax.out
new file mode 100644
index 00000000000..be61402b569
--- /dev/null
+++ b/contrib/pg_plan_advice/expected/syntax.out
@@ -0,0 +1,192 @@
+LOAD 'pg_plan_advice';
+-- An empty string is allowed. Empty target lists are allowed for most advice
+-- tags, but not for JOIN_ORDER. "Supplied Plan Advice" should be omitted in
+-- text format when there is no actual advice, but not in non-text format.
+SET pg_plan_advice.advice = '';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+------------
+ Result
+(1 row)
+
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+------------
+ Result
+(1 row)
+
+SET pg_plan_advice.advice = 'NESTED_LOOP_PLAIN()';
+EXPLAIN (COSTS OFF, FORMAT JSON) SELECT 1;
+ QUERY PLAN
+--------------------------------
+ [ +
+ { +
+ "Plan": { +
+ "Node Type": "Result", +
+ "Parallel Aware": false,+
+ "Async Capable": false, +
+ "Disabled": false +
+ }, +
+ "Supplied Plan Advice": ""+
+ } +
+ ]
+(1 row)
+
+SET pg_plan_advice.advice = 'JOIN_ORDER()';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER()"
+DETAIL: Could not parse advice: JOIN_ORDER must have at least one target at or near ")"
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+---------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+-----------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x@y) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+-----------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x#2) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+-----------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x/y) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) ';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+-------------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x/y.z) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+-----------------------------------------
+ Result
+ Supplied Plan Advice:
+ SEQ_SCAN(x#2/y.z@t) /* not matched */
+(3 rows)
+
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQUENTIAL_SCAN(x)"
+DETAIL: Could not parse advice: syntax error at or near "SEQUENTIAL_SCAN"
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN"
+DETAIL: Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("
+DETAIL: Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(""
+DETAIL: Could not parse advice: unterminated quoted identifier at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN("")';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("")"
+DETAIL: Could not parse advice: zero-length delimited identifier at or near """
+SET pg_plan_advice.advice = 'SEQ_SCAN("a"';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN("a""
+DETAIL: Could not parse advice: syntax error at end of input
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN(#"
+DETAIL: Could not parse advice: syntax error at or near "#"
+SET pg_plan_advice.advice = '()';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "()"
+DETAIL: Could not parse advice: syntax error at or near "("
+SET pg_plan_advice.advice = '123';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "123"
+DETAIL: Could not parse advice: syntax error at or near "123"
+-- Tags like SEQ_SCAN and NO_GATHER don't allow sublists at all; other tags,
+-- except for JOIN_ORDER, allow at most one level of sublist. Hence, these
+-- examples should error out.
+SET pg_plan_advice.advice = 'SEQ_SCAN((x))';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "SEQ_SCAN((x))"
+DETAIL: Could not parse advice: syntax error at or near "("
+SET pg_plan_advice.advice = 'GATHER(((x)))';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "GATHER(((x)))"
+DETAIL: Could not parse advice: syntax error at or near "("
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+------------
+ Result
+(1 row)
+
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+----------------------------------
+ Result
+ Supplied Plan Advice:
+ HASH_JOIN(_) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+----------------------------------
+ Result
+ Supplied Plan Advice:
+ HASH_JOIN(y) /* not matched */
+(3 rows)
+
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+------------------------------------
+ Result
+ Supplied Plan Advice:
+ HASH_JOIN(y/z) /* not matched */
+(3 rows)
+
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "/*"
+DETAIL: Could not parse advice: unterminated comment at end of input
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "JOIN_ORDER("fOO") /* oops"
+DETAIL: Could not parse advice: unterminated comment at end of input
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+EXPLAIN (COSTS OFF) SELECT 1;
+ QUERY PLAN
+------------
+ Result
+(1 row)
+
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "/*/* stuff */*/"
+DETAIL: Could not parse advice: syntax error at or near "*"
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN(a)"
+DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
+ERROR: invalid value for parameter "pg_plan_advice.advice": "FOREIGN_JOIN((a))"
+DETAIL: Could not parse advice: FOREIGN_JOIN targets must contain more than one relation identifier at or near ")"
diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build
new file mode 100644
index 00000000000..f7229dddcef
--- /dev/null
+++ b/contrib/pg_plan_advice/meson.build
@@ -0,0 +1,79 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+pg_plan_advice_sources = files(
+ 'pg_plan_advice.c',
+ 'pgpa_ast.c',
+ 'pgpa_collector.c',
+ 'pgpa_identifier.c',
+ 'pgpa_join.c',
+ 'pgpa_output.c',
+ 'pgpa_planner.c',
+ 'pgpa_scan.c',
+ 'pgpa_trove.c',
+ 'pgpa_walker.c',
+)
+
+pgpa_scanner = custom_target('pgpa_scanner',
+ input: 'pgpa_scanner.l',
+ output: 'pgpa_scanner.c',
+ command: flex_cmd,
+)
+generated_sources += pgpa_scanner
+pg_plan_advice_sources += pgpa_scanner
+
+pgpa_parser = custom_target('pgpa_parser',
+ input: 'pgpa_parser.y',
+ kwargs: bison_kw,
+)
+generated_sources += pgpa_parser.to_list()
+pg_plan_advice_sources += pgpa_parser
+
+if host_system == 'windows'
+ pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_plan_advice',
+ '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',])
+endif
+
+pg_plan_advice_inc = include_directories('.')
+
+pg_plan_advice = shared_module('pg_plan_advice',
+ pg_plan_advice_sources,
+ include_directories: pg_plan_advice_inc,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_plan_advice
+
+install_data(
+ 'pg_plan_advice--1.0.sql',
+ 'pg_plan_advice.control',
+ kwargs: contrib_data_args,
+)
+
+install_headers(
+ 'pg_plan_advice.h',
+ install_dir: dir_include_extension / 'pg_plan_advice',
+)
+
+tests += {
+ 'name': 'pg_plan_advice',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'gather',
+ 'join_order',
+ 'join_strategy',
+ 'local_collector',
+ 'partitionwise',
+ 'prepared',
+ 'scan',
+ 'semijoin',
+ 'syntax',
+ ],
+ },
+ 'tap': {
+ 'tests': [
+ 't/001_regress.pl',
+ ],
+ },
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice--1.0.sql b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
new file mode 100644
index 00000000000..450c42040fd
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
@@ -0,0 +1,43 @@
+/* contrib/pg_plan_advice/pg_plan_advice--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_plan_advice" to load this file. \quit
+
+CREATE FUNCTION pg_clear_collected_local_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_clear_collected_shared_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_shared_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_local_advice(
+ OUT id bigint,
+ OUT userid oid,
+ OUT dbid oid,
+ OUT queryid bigint,
+ OUT collection_time timestamptz,
+ OUT query text,
+ OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_shared_advice(
+ OUT id bigint,
+ OUT userid oid,
+ OUT dbid oid,
+ OUT queryid bigint,
+ OUT collection_time timestamptz,
+ OUT query text,
+ OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_shared_advice'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION pg_clear_collected_shared_advice() FROM PUBLIC;
+REVOKE ALL ON FUNCTION pg_get_collected_shared_advice() FROM PUBLIC;
diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c
new file mode 100644
index 00000000000..99b97843991
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.c
@@ -0,0 +1,563 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.c
+ * main entrypoints for generating and applying planner advice
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pg_plan_advice.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_ast.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "commands/explain_format.h"
+#include "commands/explain_state.h"
+#include "funcapi.h"
+#include "optimizer/planner.h"
+#include "storage/dsm_registry.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+static pgpa_shared_state *pgpa_state = NULL;
+static dsa_area *pgpa_dsa_area = NULL;
+static List *advisor_hook_list = NIL;
+
+/* GUC variables */
+char *pg_plan_advice_advice = NULL;
+bool pg_plan_advice_always_store_advice_details = false;
+static bool pg_plan_advice_always_explain_supplied_advice = true;
+bool pg_plan_advice_feedback_warnings = false;
+bool pg_plan_advice_local_collector = false;
+int pg_plan_advice_local_collection_limit = 0;
+bool pg_plan_advice_shared_collector = false;
+int pg_plan_advice_shared_collection_limit = 0;
+bool pg_plan_advice_trace_mask = false;
+
+/* Saved hook value */
+static explain_per_plan_hook_type prev_explain_per_plan = NULL;
+
+/* Other file-level globals */
+static int es_extension_id;
+static MemoryContext pgpa_memory_context = NULL;
+
+static void pgpa_init_shared_state(void *ptr, void *arg);
+static void pg_plan_advice_explain_option_handler(ExplainState *es,
+ DefElem *opt,
+ ParseState *pstate);
+static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+ IntoClause *into,
+ ExplainState *es,
+ const char *queryString,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv);
+static bool pg_plan_advice_advice_check_hook(char **newval, void **extra,
+ GucSource source);
+static DefElem *find_defelem_by_defname(List *deflist, char *defname);
+
+/*
+ * Initialize this module.
+ */
+void
+_PG_init(void)
+{
+ DefineCustomStringVariable("pg_plan_advice.advice",
+ "advice to apply during query planning",
+ NULL,
+ &pg_plan_advice_advice,
+ NULL,
+ PGC_USERSET,
+ 0,
+ pg_plan_advice_advice_check_hook,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.always_explain_supplied_advice",
+ "EXPLAIN output includes supplied advice even without EXPLAIN (PLAN_ADVICE)",
+ NULL,
+ &pg_plan_advice_always_explain_supplied_advice,
+ true,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.always_store_advice_details",
+ "Generate advice strings even when seemingly not required",
+ "Use this option to see generated advice for prepared queries.",
+ &pg_plan_advice_always_store_advice_details,
+ false,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.feedback_warnings",
+ "Warn when supplied advice does not apply cleanly",
+ NULL,
+ &pg_plan_advice_feedback_warnings,
+ false,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.local_collector",
+ "Enable the local advice collector.",
+ NULL,
+ &pg_plan_advice_local_collector,
+ false,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("pg_plan_advice.local_collection_limit",
+ "# of advice entries to retain in per-backend memory",
+ NULL,
+ &pg_plan_advice_local_collection_limit,
+ 0,
+ 0, INT_MAX,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.shared_collector",
+ "Enable the shared advice collector.",
+ NULL,
+ &pg_plan_advice_shared_collector,
+ false,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("pg_plan_advice.shared_collection_limit",
+ "# of advice entries to retain in shared memory",
+ NULL,
+ &pg_plan_advice_shared_collection_limit,
+ 0,
+ 0, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_plan_advice.trace_mask",
+ "Emit debugging messages showing the computed strategy mask for each relation",
+ NULL,
+ &pg_plan_advice_trace_mask,
+ false,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ MarkGUCPrefixReserved("pg_plan_advice");
+
+ /* Get an ID that we can use to cache data in an ExplainState. */
+ es_extension_id = GetExplainExtensionId("pg_plan_advice");
+
+ /* Register the new EXPLAIN options implemented by this module. */
+ RegisterExtensionExplainOption("plan_advice",
+ pg_plan_advice_explain_option_handler);
+
+ /* Install hooks */
+ pgpa_planner_install_hooks();
+ prev_explain_per_plan = explain_per_plan_hook;
+ explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook;
+}
+
+/*
+ * Initialize shared state when first created.
+ */
+static void
+pgpa_init_shared_state(void *ptr, void *arg)
+{
+ pgpa_shared_state *state = (pgpa_shared_state *) ptr;
+
+ LWLockInitialize(&state->lock, LWLockNewTrancheId("pg_plan_advice_lock"));
+ state->dsa_tranche = LWLockNewTrancheId("pg_plan_advice_dsa");
+ state->area = DSA_HANDLE_INVALID;
+ state->shared_collector = InvalidDsaPointer;
+}
+
+/*
+ * Return a pointer to a memory context where long-lived data managed by this
+ * module can be stored.
+ */
+MemoryContext
+pg_plan_advice_get_mcxt(void)
+{
+ if (pgpa_memory_context == NULL)
+ pgpa_memory_context = AllocSetContextCreate(TopMemoryContext,
+ "pg_plan_advice",
+ ALLOCSET_DEFAULT_SIZES);
+
+ return pgpa_memory_context;
+}
+
+/*
+ * Get a pointer to our shared state.
+ *
+ * If no shared state exists, create and initialize it. If it does exist but
+ * this backend has not yet accessed it, attach to it. Otherwise, just return
+ * our cached pointer.
+ *
+ * Along the way, make sure the relevant LWLock tranches are registered.
+ */
+pgpa_shared_state *
+pg_plan_advice_attach(void)
+{
+ if (pgpa_state == NULL)
+ {
+ bool found;
+
+ pgpa_state =
+ GetNamedDSMSegment("pg_plan_advice", sizeof(pgpa_shared_state),
+ pgpa_init_shared_state, &found, NULL);
+ }
+
+ return pgpa_state;
+}
+
+/*
+ * Return a pointer to pg_plan_advice's DSA area, creating it if needed.
+ */
+dsa_area *
+pg_plan_advice_dsa_area(void)
+{
+ if (pgpa_dsa_area == NULL)
+ {
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_handle area_handle;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+ area_handle = state->area;
+ if (area_handle == DSA_HANDLE_INVALID)
+ {
+ pgpa_dsa_area = dsa_create(state->dsa_tranche);
+ dsa_pin(pgpa_dsa_area);
+ state->area = dsa_get_handle(pgpa_dsa_area);
+ LWLockRelease(&state->lock);
+ }
+ else
+ {
+ LWLockRelease(&state->lock);
+ pgpa_dsa_area = dsa_attach(area_handle);
+ }
+
+ dsa_pin_mapping(pgpa_dsa_area);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ return pgpa_dsa_area;
+}
+
+/*
+ * Was the PLAN_ADVICE option specified and not set to false?
+ */
+bool
+pg_plan_advice_should_explain(ExplainState *es)
+{
+ bool *plan_advice = NULL;
+
+ if (es != NULL)
+ plan_advice = GetExplainExtensionState(es, es_extension_id);
+ return plan_advice != NULL && *plan_advice;
+}
+
+/*
+ * Get the advice that should be used while planning a particular query.
+ */
+char *
+pg_plan_advice_get_supplied_query_advice(PlannerGlobal *glob,
+ Query *parse,
+ const char *query_string,
+ int cursorOptions,
+ ExplainState *es)
+{
+ ListCell *lc;
+
+ /*
+ * If any advisors are loaded, consult them. The first one that produces a
+ * non-NULL string wins.
+ */
+ foreach(lc, advisor_hook_list)
+ {
+ pg_plan_advice_advisor_hook hook = lfirst(lc);
+ char *advice_string;
+
+ advice_string = (*hook) (glob, parse, query_string, cursorOptions, es);
+ if (advice_string != NULL)
+ return advice_string;
+ }
+
+ /* Otherwise, just use the value of the GUC. */
+ return pg_plan_advice_advice;
+}
+
+/*
+ * Add an advisor, which can supply advice strings to be used during future
+ * query planning operations.
+ *
+ * The advisor should return NULL if it has no advice string to offer for a
+ * given query. If multiple advisors are added, they will be consulted in the
+ * order added until one of them returns a non-NULL value.
+ */
+void
+pg_plan_advice_add_advisor(pg_plan_advice_advisor_hook hook)
+{
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+ advisor_hook_list = lappend(advisor_hook_list, hook);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Remove an advisor.
+ */
+void
+pg_plan_advice_remove_advisor(pg_plan_advice_advisor_hook hook)
+{
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+ advisor_hook_list = list_delete_ptr(advisor_hook_list, hook);
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Handler for EXPLAIN (PLAN_ADVICE).
+ */
+static void
+pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt,
+ ParseState *pstate)
+{
+ bool *plan_advice;
+
+ plan_advice = GetExplainExtensionState(es, es_extension_id);
+
+ if (plan_advice == NULL)
+ {
+ plan_advice = palloc0_object(bool);
+ SetExplainExtensionState(es, es_extension_id, plan_advice);
+ }
+
+ *plan_advice = defGetBoolean(opt);
+}
+
+/*
+ * Display a string that is likely to consist of multiple lines in EXPLAIN
+ * output.
+ */
+static void
+pg_plan_advice_explain_text_multiline(ExplainState *es, char *qlabel,
+ char *value)
+{
+ char *s;
+
+ /* For non-text formats, it's best not to add any special handling. */
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainPropertyText(qlabel, value, es);
+ return;
+ }
+
+ /* In text format, if there is no data, display nothing. */
+ if (*value == '\0')
+ return;
+
+ /*
+ * It looks nicest to indent each line of the advice separately, beginning
+ * on the line below the label.
+ */
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "%s:\n", qlabel);
+ es->indent++;
+ while ((s = strchr(value, '\n')) != NULL)
+ {
+ ExplainIndentText(es);
+ appendBinaryStringInfo(es->str, value, (s - value) + 1);
+ value = s + 1;
+ }
+
+ /* Don't interpret a terminal newline as a request for an empty line. */
+ if (*value != '\0')
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "%s\n", value);
+ }
+
+ es->indent--;
+}
+
+/*
+ * Add advice feedback to the EXPLAIN output.
+ */
+static void
+pg_plan_advice_explain_feedback(ExplainState *es, List *feedback)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ foreach_node(DefElem, item, feedback)
+ {
+ int flags = defGetInt32(item);
+
+ appendStringInfo(&buf, "%s /* ", item->defname);
+ pgpa_trove_append_flags(&buf, flags);
+ appendStringInfo(&buf, " */\n");
+ }
+
+ pg_plan_advice_explain_text_multiline(es, "Supplied Plan Advice",
+ buf.data);
+}
+
+/*
+ * Add relevant details, if any, to the EXPLAIN output for a single plan.
+ */
+static void
+pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+ IntoClause *into,
+ ExplainState *es,
+ const char *queryString,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv)
+{
+ bool should_explain;
+ DefElem *pgpa_item;
+ List *pgpa_list;
+
+ if (prev_explain_per_plan)
+ prev_explain_per_plan(plannedstmt, into, es, queryString, params,
+ queryEnv);
+
+ /* Should an advice string be part of the EXPLAIN output? */
+ should_explain = pg_plan_advice_should_explain(es);
+
+ /* Find any data pgpa_planner_shutdown stashed in the PlannedStmt. */
+ pgpa_item = find_defelem_by_defname(plannedstmt->extension_state,
+ "pg_plan_advice");
+ pgpa_list = pgpa_item == NULL ? NULL : (List *) pgpa_item->arg;
+
+ /*
+ * By default, if there is a record of attempting to apply advice during
+ * query planning, we always output that information, but the user can set
+ * pg_plan_advice.always_explain_supplied_advice = false to suppress that
+ * behavior. If they do, we'll only display it when the PLAN_ADVICE option
+ * was specified and not set to false.
+ *
+ * NB: If we're explaining a query planned beforehand -- i.e. a prepared
+ * statement -- the application of query advice may not have been
+ * recorded, and therefore this won't be able to show anything. Use
+ * pg_plan_advice.always_store_advice_details = true to work around this.
+ */
+ if (pgpa_list != NULL && (pg_plan_advice_always_explain_supplied_advice ||
+ should_explain))
+ {
+ DefElem *feedback;
+
+ feedback = find_defelem_by_defname(pgpa_list, "feedback");
+ if (feedback != NULL)
+ pg_plan_advice_explain_feedback(es, (List *) feedback->arg);
+ }
+
+ /*
+ * If the PLAN_ADVICE option was specified -- and not sent to FALSE --
+ * show generated advice.
+ */
+ if (should_explain)
+ {
+ DefElem *advice_string_item;
+ char *advice_string = NULL;
+
+ advice_string_item =
+ find_defelem_by_defname(pgpa_list, "advice_string");
+ if (advice_string_item != NULL)
+ {
+ advice_string = strVal(advice_string_item->arg);
+ pg_plan_advice_explain_text_multiline(es, "Generated Plan Advice",
+ advice_string);
+ }
+ }
+}
+
+/*
+ * Check hook for pg_plan_advice.advice
+ */
+static bool
+pg_plan_advice_advice_check_hook(char **newval, void **extra, GucSource source)
+{
+ MemoryContext oldcontext;
+ MemoryContext tmpcontext;
+ char *error;
+
+ if (*newval == NULL)
+ return true;
+
+ tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "pg_plan_advice.advice",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+ /*
+ * It would be nice to save the parse tree that we construct here for
+ * eventual use when planning with this advice, but *extra can only point
+ * to a single guc_malloc'd chunk, and our parse tree involves an
+ * arbitrary number of memory allocations.
+ */
+ (void) pgpa_parse(*newval, &error);
+
+ if (error != NULL)
+ {
+ GUC_check_errdetail("Could not parse advice: %s", error);
+ return false;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(tmpcontext);
+
+ return true;
+}
+
+/*
+ * Search a list of DefElem objects for a given defname.
+ */
+static DefElem *
+find_defelem_by_defname(List *deflist, char *defname)
+{
+ foreach_node(DefElem, item, deflist)
+ {
+ if (strcmp(item->defname, defname) == 0)
+ return item;
+ }
+
+ return NULL;
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice.control b/contrib/pg_plan_advice/pg_plan_advice.control
new file mode 100644
index 00000000000..aa6fdc9e7b2
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.control
@@ -0,0 +1,5 @@
+# pg_plan_advice extension
+comment = 'help the planner get the right plan'
+default_version = '1.0'
+module_pathname = '$libdir/pg_plan_advice'
+relocatable = true
diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h
new file mode 100644
index 00000000000..21f66092fa2
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.h
+ * main header file for pg_plan_advice contrib module
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pg_plan_advice.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PLAN_ADVICE_H
+#define PG_PLAN_ADVICE_H
+
+#include "commands/explain_state.h"
+#include "nodes/pathnodes.h"
+#include "nodes/plannodes.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+
+typedef struct pgpa_shared_state
+{
+ LWLock lock;
+ int dsa_tranche;
+ dsa_handle area;
+ dsa_pointer shared_collector;
+} pgpa_shared_state;
+
+/* Hook for other plugins to supply advice strings */
+typedef char *(*pg_plan_advice_advisor_hook) (PlannerGlobal *glob,
+ Query *parse,
+ const char *query_string,
+ int cursorOptions,
+ ExplainState *es);
+
+/* GUC variables */
+extern char *pg_plan_advice_advice;
+extern bool pg_plan_advice_always_store_advice_details;
+extern bool pg_plan_advice_feedback_warnings;
+extern bool pg_plan_advice_local_collector;
+extern int pg_plan_advice_local_collection_limit;
+extern bool pg_plan_advice_shared_collector;
+extern int pg_plan_advice_shared_collection_limit;
+extern bool pg_plan_advice_trace_mask;
+
+/* Function prototypes (for use by pg_plan_advice itself) */
+extern MemoryContext pg_plan_advice_get_mcxt(void);
+extern pgpa_shared_state *pg_plan_advice_attach(void);
+extern dsa_area *pg_plan_advice_dsa_area(void);
+extern bool pg_plan_advice_should_explain(ExplainState *es);
+extern char *pg_plan_advice_get_supplied_query_advice(PlannerGlobal *glob,
+ Query *parse,
+ const char *query_string,
+ int cursorOptions,
+ ExplainState *es);
+
+/* Function prototypes (for use by other plugins) */
+extern PGDLLEXPORT void pg_plan_advice_add_advisor(pg_plan_advice_advisor_hook hook);
+extern PGDLLEXPORT void pg_plan_advice_remove_advisor(pg_plan_advice_advisor_hook hook);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_ast.c b/contrib/pg_plan_advice/pgpa_ast.c
new file mode 100644
index 00000000000..85bd74859df
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_ast.c
@@ -0,0 +1,351 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.c
+ * additional supporting code related to plan advice parsing
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_ast.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_ast.h"
+
+#include "funcapi.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+static bool pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+ pgpa_advice_target *target,
+ bool *rids_used);
+
+/*
+ * Get a C string that corresponds to the specified advice tag.
+ */
+char *
+pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag)
+{
+ switch (advice_tag)
+ {
+ case PGPA_TAG_BITMAP_HEAP_SCAN:
+ return "BITMAP_HEAP_SCAN";
+ case PGPA_TAG_FOREIGN_JOIN:
+ return "FOREIGN_JOIN";
+ case PGPA_TAG_GATHER:
+ return "GATHER";
+ case PGPA_TAG_GATHER_MERGE:
+ return "GATHER_MERGE";
+ case PGPA_TAG_HASH_JOIN:
+ return "HASH_JOIN";
+ case PGPA_TAG_INDEX_ONLY_SCAN:
+ return "INDEX_ONLY_SCAN";
+ case PGPA_TAG_INDEX_SCAN:
+ return "INDEX_SCAN";
+ case PGPA_TAG_JOIN_ORDER:
+ return "JOIN_ORDER";
+ case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+ return "MERGE_JOIN_MATERIALIZE";
+ case PGPA_TAG_MERGE_JOIN_PLAIN:
+ return "MERGE_JOIN_PLAIN";
+ case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+ return "NESTED_LOOP_MATERIALIZE";
+ case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+ return "NESTED_LOOP_MEMOIZE";
+ case PGPA_TAG_NESTED_LOOP_PLAIN:
+ return "NESTED_LOOP_PLAIN";
+ case PGPA_TAG_NO_GATHER:
+ return "NO_GATHER";
+ case PGPA_TAG_PARTITIONWISE:
+ return "PARTITIONWISE";
+ case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+ return "SEMIJOIN_NON_UNIQUE";
+ case PGPA_TAG_SEMIJOIN_UNIQUE:
+ return "SEMIJOIN_UNIQUE";
+ case PGPA_TAG_SEQ_SCAN:
+ return "SEQ_SCAN";
+ case PGPA_TAG_TID_SCAN:
+ return "TID_SCAN";
+ }
+
+ pg_unreachable();
+ return NULL;
+}
+
+/*
+ * Convert an advice tag, formatted as a string that has already been
+ * downcased as appropriate, to a pgpa_advice_tag_type.
+ *
+ * If we succeed, set *fail = false and return the result; if we fail,
+ * set *fail = true and reurn an arbitrary value.
+ */
+pgpa_advice_tag_type
+pgpa_parse_advice_tag(const char *tag, bool *fail)
+{
+ *fail = false;
+
+ switch (tag[0])
+ {
+ case 'b':
+ if (strcmp(tag, "bitmap_heap_scan") == 0)
+ return PGPA_TAG_BITMAP_HEAP_SCAN;
+ break;
+ case 'f':
+ if (strcmp(tag, "foreign_join") == 0)
+ return PGPA_TAG_FOREIGN_JOIN;
+ break;
+ case 'g':
+ if (strcmp(tag, "gather") == 0)
+ return PGPA_TAG_GATHER;
+ if (strcmp(tag, "gather_merge") == 0)
+ return PGPA_TAG_GATHER_MERGE;
+ break;
+ case 'h':
+ if (strcmp(tag, "hash_join") == 0)
+ return PGPA_TAG_HASH_JOIN;
+ break;
+ case 'i':
+ if (strcmp(tag, "index_scan") == 0)
+ return PGPA_TAG_INDEX_SCAN;
+ if (strcmp(tag, "index_only_scan") == 0)
+ return PGPA_TAG_INDEX_ONLY_SCAN;
+ break;
+ case 'j':
+ if (strcmp(tag, "join_order") == 0)
+ return PGPA_TAG_JOIN_ORDER;
+ break;
+ case 'm':
+ if (strcmp(tag, "merge_join_materialize") == 0)
+ return PGPA_TAG_MERGE_JOIN_MATERIALIZE;
+ if (strcmp(tag, "merge_join_plain") == 0)
+ return PGPA_TAG_MERGE_JOIN_PLAIN;
+ break;
+ case 'n':
+ if (strcmp(tag, "nested_loop_materialize") == 0)
+ return PGPA_TAG_NESTED_LOOP_MATERIALIZE;
+ if (strcmp(tag, "nested_loop_memoize") == 0)
+ return PGPA_TAG_NESTED_LOOP_MEMOIZE;
+ if (strcmp(tag, "nested_loop_plain") == 0)
+ return PGPA_TAG_NESTED_LOOP_PLAIN;
+ if (strcmp(tag, "no_gather") == 0)
+ return PGPA_TAG_NO_GATHER;
+ break;
+ case 'p':
+ if (strcmp(tag, "partitionwise") == 0)
+ return PGPA_TAG_PARTITIONWISE;
+ break;
+ case 's':
+ if (strcmp(tag, "semijoin_non_unique") == 0)
+ return PGPA_TAG_SEMIJOIN_NON_UNIQUE;
+ if (strcmp(tag, "semijoin_unique") == 0)
+ return PGPA_TAG_SEMIJOIN_UNIQUE;
+ if (strcmp(tag, "seq_scan") == 0)
+ return PGPA_TAG_SEQ_SCAN;
+ break;
+ case 't':
+ if (strcmp(tag, "tid_scan") == 0)
+ return PGPA_TAG_TID_SCAN;
+ break;
+ }
+
+ /* didn't work out */
+ *fail = true;
+
+ /* return an arbitrary value to unwind the call stack */
+ return PGPA_TAG_SEQ_SCAN;
+}
+
+/*
+ * Format a pgpa_advice_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_advice_target(StringInfo str, pgpa_advice_target *target)
+{
+ if (target->ttype != PGPA_TARGET_IDENTIFIER)
+ {
+ bool first = true;
+ char *delims;
+
+ if (target->ttype == PGPA_TARGET_UNORDERED_LIST)
+ delims = "{}";
+ else
+ delims = "()";
+
+ appendStringInfoChar(str, delims[0]);
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ if (first)
+ first = false;
+ else
+ appendStringInfoChar(str, ' ');
+ pgpa_format_advice_target(str, child_target);
+ }
+ appendStringInfoChar(str, delims[1]);
+ }
+ else
+ {
+ const char *rt_identifier;
+
+ rt_identifier = pgpa_identifier_string(&target->rid);
+ appendStringInfoString(str, rt_identifier);
+ }
+}
+
+/*
+ * Format a pgpa_index_target as a string and append result to a StringInfo.
+ */
+void
+pgpa_format_index_target(StringInfo str, pgpa_index_target *itarget)
+{
+ if (itarget->indnamespace != NULL)
+ appendStringInfo(str, "%s.",
+ quote_identifier(itarget->indnamespace));
+ appendStringInfoString(str, quote_identifier(itarget->indname));
+}
+
+/*
+ * Determine whether two pgpa_index_target objects are exactly identical.
+ */
+bool
+pgpa_index_targets_equal(pgpa_index_target *i1, pgpa_index_target *i2)
+{
+ /* indnamespace can be NULL, and two NULL values are equal */
+ if ((i1->indnamespace != NULL || i2->indnamespace != NULL) &&
+ (i1->indnamespace == NULL || i2->indnamespace == NULL ||
+ strcmp(i1->indnamespace, i2->indnamespace) != 0))
+ return false;
+ if (strcmp(i1->indname, i2->indname) != 0)
+ return false;
+
+ return true;
+}
+
+/*
+ * Check whether an identifier matches an any part of an advice target.
+ */
+bool
+pgpa_identifier_matches_target(pgpa_identifier *rid, pgpa_advice_target *target)
+{
+ /* For non-identifiers, check all descendents. */
+ if (target->ttype != PGPA_TARGET_IDENTIFIER)
+ {
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ if (pgpa_identifier_matches_target(rid, child_target))
+ return true;
+ }
+ return false;
+ }
+
+ /* Straightforward comparisons of alias name and occcurrence number. */
+ if (strcmp(rid->alias_name, target->rid.alias_name) != 0)
+ return false;
+ if (rid->occurrence != target->rid.occurrence)
+ return false;
+
+ /*
+ * If a relation identifer mentions a partition name, it should also
+ * specify a partition schema. But the target may leave the schema NULL to
+ * match anything.
+ */
+ Assert(rid->partnsp != NULL || rid->partrel == NULL);
+ if (rid->partnsp != NULL && target->rid.partnsp != NULL &&
+ strcmp(rid->partnsp, target->rid.partnsp) != 0)
+ return false;
+
+ /*
+ * These fields can be NULL on either side, but NULL only matches another
+ * NULL.
+ */
+ if (!strings_equal_or_both_null(rid->partrel, target->rid.partrel))
+ return false;
+ if (!strings_equal_or_both_null(rid->plan_name, target->rid.plan_name))
+ return false;
+
+ return true;
+}
+
+/*
+ * Match identifiers to advice targets and return an enum value indicating
+ * the relationship between the set of keys and the set of targets.
+ *
+ * See the comments for pgpa_itm_type.
+ */
+pgpa_itm_type
+pgpa_identifiers_match_target(int nrids, pgpa_identifier *rids,
+ pgpa_advice_target *target)
+{
+ bool all_rids_used = true;
+ bool any_rids_used = false;
+ bool all_targets_used;
+ bool *rids_used = palloc0_array(bool, nrids);
+
+ all_targets_used =
+ pgpa_identifiers_cover_target(nrids, rids, target, rids_used);
+
+ for (int i = 0; i < nrids; ++i)
+ {
+ if (rids_used[i])
+ any_rids_used = true;
+ else
+ all_rids_used = false;
+ }
+
+ if (all_rids_used)
+ {
+ if (all_targets_used)
+ return PGPA_ITM_EQUAL;
+ else
+ return PGPA_ITM_KEYS_ARE_SUBSET;
+ }
+ else
+ {
+ if (all_targets_used)
+ return PGPA_ITM_TARGETS_ARE_SUBSET;
+ else if (any_rids_used)
+ return PGPA_ITM_INTERSECTING;
+ else
+ return PGPA_ITM_DISJOINT;
+ }
+}
+
+/*
+ * Returns true if every target or sub-target is matched by at least one
+ * identifier, and otherwise false.
+ *
+ * Also sets rids_used[i] = true for each idenifier that matches at least one
+ * target.
+ */
+static bool
+pgpa_identifiers_cover_target(int nrids, pgpa_identifier *rids,
+ pgpa_advice_target *target, bool *rids_used)
+{
+ bool result = false;
+
+ if (target->ttype != PGPA_TARGET_IDENTIFIER)
+ {
+ result = true;
+
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ if (!pgpa_identifiers_cover_target(nrids, rids, child_target,
+ rids_used))
+ result = false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < nrids; ++i)
+ {
+ if (pgpa_identifier_matches_target(&rids[i], target))
+ {
+ rids_used[i] = true;
+ result = true;
+ }
+ }
+ }
+
+ return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_ast.h b/contrib/pg_plan_advice/pgpa_ast.h
new file mode 100644
index 00000000000..5d3f8d58a71
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_ast.h
@@ -0,0 +1,185 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_ast.h
+ * abstract syntax trees for plan advice, plus parser/scanner support
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_ast.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_AST_H
+#define PGPA_AST_H
+
+#include "pgpa_identifier.h"
+
+#include "nodes/pg_list.h"
+
+/*
+ * Advice items generally take the form SOME_TAG(item [...]), where an item
+ * can take various forms. The simplest case is a relation identifier, but
+ * some tags allow sublists, and JOIN_ORDER() allows both ordered and unordered
+ * sublists.
+ */
+typedef enum
+{
+ PGPA_TARGET_IDENTIFIER, /* relation identifier */
+ PGPA_TARGET_ORDERED_LIST, /* (item ...) */
+ PGPA_TARGET_UNORDERED_LIST /* {item ...} */
+} pgpa_target_type;
+
+/*
+ * An index specification.
+ */
+typedef struct pgpa_index_target
+{
+ /* Index schema and name */
+ char *indnamespace;
+ char *indname;
+} pgpa_index_target;
+
+/*
+ * A single item about which advice is being given, which could be either
+ * a relation identifier that we want to break out into its constituent fields,
+ * or a sublist of some kind.
+ */
+typedef struct pgpa_advice_target
+{
+ pgpa_target_type ttype;
+
+ /*
+ * This field is meaningful when ttype is PGPA_TARGET_IDENTIFIER.
+ *
+ * All identifiers must have an alias name and an occurrence number; the
+ * remaining fields can be NULL. Note that it's possible to specify a
+ * partition name without a partition schema, but not the reverse.
+ */
+ pgpa_identifier rid;
+
+ /*
+ * This field is set when ttype is PPGA_TARGET_IDENTIFIER and the advice
+ * tag is PGPA_TAG_INDEX_SCAN or PGPA_TAG_INDEX_ONLY_SCAN.
+ */
+ pgpa_index_target *itarget;
+
+ /*
+ * When the ttype is PGPA_TARGET__LIST, this field contains a
+ * list of additional pgpa_advice_target objects. Otherwise, it is unused.
+ */
+ List *children;
+} pgpa_advice_target;
+
+/*
+ * These are all the kinds of advice that we know how to parse. If a keyword
+ * is found at the top level, it must be in this list.
+ *
+ * If you change anything here, also update pgpa_parse_advice_tag and
+ * pgpa_cstring_advice_tag.
+ */
+typedef enum pgpa_advice_tag_type
+{
+ PGPA_TAG_BITMAP_HEAP_SCAN,
+ PGPA_TAG_FOREIGN_JOIN,
+ PGPA_TAG_GATHER,
+ PGPA_TAG_GATHER_MERGE,
+ PGPA_TAG_HASH_JOIN,
+ PGPA_TAG_INDEX_ONLY_SCAN,
+ PGPA_TAG_INDEX_SCAN,
+ PGPA_TAG_JOIN_ORDER,
+ PGPA_TAG_MERGE_JOIN_MATERIALIZE,
+ PGPA_TAG_MERGE_JOIN_PLAIN,
+ PGPA_TAG_NESTED_LOOP_MATERIALIZE,
+ PGPA_TAG_NESTED_LOOP_MEMOIZE,
+ PGPA_TAG_NESTED_LOOP_PLAIN,
+ PGPA_TAG_NO_GATHER,
+ PGPA_TAG_PARTITIONWISE,
+ PGPA_TAG_SEMIJOIN_NON_UNIQUE,
+ PGPA_TAG_SEMIJOIN_UNIQUE,
+ PGPA_TAG_SEQ_SCAN,
+ PGPA_TAG_TID_SCAN
+} pgpa_advice_tag_type;
+
+/*
+ * An item of advice, meaning a tag and the list of all targets to which
+ * it is being applied.
+ *
+ * "targets" is a list of pgpa_advice_target objects.
+ *
+ * The List returned from pgpa_yyparse is list of pgpa_advice_item objects.
+ */
+typedef struct pgpa_advice_item
+{
+ pgpa_advice_tag_type tag;
+ List *targets;
+} pgpa_advice_item;
+
+/*
+ * Result of comparing an array of pgpa_relation_identifier objects to a
+ * pgpa_advice_target.
+ *
+ * PGPA_ITM_EQUAL means all targets are matched by some identifier, and
+ * all identifiers were matched to a target.
+ *
+ * PGPA_ITM_KEYS_ARE_SUBSET means that all identifiers matched to a target,
+ * but there were leftover targets. Generally, this means that the advice is
+ * looking to apply to all of the rels we have plus some additional ones that
+ * we don't have.
+ *
+ * PGPA_ITM_TARGETS_ARE_SUBSET means that all targets are matched by an
+ * identifiers, but there were leftover identifiers. Generally, this means
+ * that the advice is looking to apply to some but not all of the rels we have.
+ *
+ * PGPA_ITM_INTERSECTING means that some identifeirs and targets were matched,
+ * but neither all identifiers nor all targets could be matched to items in
+ * the other set.
+ *
+ * PGPA_ITM_DISJOINT means that no matches between identifeirs and targets were
+ * found.
+ */
+typedef enum
+{
+ PGPA_ITM_EQUAL,
+ PGPA_ITM_KEYS_ARE_SUBSET,
+ PGPA_ITM_TARGETS_ARE_SUBSET,
+ PGPA_ITM_INTERSECTING,
+ PGPA_ITM_DISJOINT
+} pgpa_itm_type;
+
+/* for pgpa_scanner.l and pgpa_parser.y */
+union YYSTYPE;
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
+
+/* in pgpa_scanner.l */
+extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result,
+ char **parse_error_msg_p, yyscan_t yyscanner);
+extern void pgpa_yyerror(List **result, char **parse_error_msg_p,
+ yyscan_t yyscanner,
+ const char *message);
+extern void pgpa_scanner_init(const char *str, yyscan_t *yyscannerp);
+extern void pgpa_scanner_finish(yyscan_t yyscanner);
+
+/* in pgpa_parser.y */
+extern int pgpa_yyparse(List **result, char **parse_error_msg_p,
+ yyscan_t yyscanner);
+extern List *pgpa_parse(const char *advice_string, char **error_p);
+
+/* in pgpa_ast.c */
+extern char *pgpa_cstring_advice_tag(pgpa_advice_tag_type advice_tag);
+extern bool pgpa_identifier_matches_target(pgpa_identifier *rid,
+ pgpa_advice_target *target);
+extern pgpa_itm_type pgpa_identifiers_match_target(int nrids,
+ pgpa_identifier *rids,
+ pgpa_advice_target *target);
+extern bool pgpa_index_targets_equal(pgpa_index_target *i1,
+ pgpa_index_target *i2);
+extern pgpa_advice_tag_type pgpa_parse_advice_tag(const char *tag, bool *fail);
+extern void pgpa_format_advice_target(StringInfo str,
+ pgpa_advice_target *target);
+extern void pgpa_format_index_target(StringInfo str,
+ pgpa_index_target *itarget);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_collector.c b/contrib/pg_plan_advice/pgpa_collector.c
new file mode 100644
index 00000000000..a0b0d7e1594
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.c
@@ -0,0 +1,639 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.c
+ * collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_collector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+
+#include "datatype/timestamp.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/timestamp.h"
+
+PG_FUNCTION_INFO_V1(pg_clear_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_clear_collected_shared_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_shared_advice);
+
+#define ADVICE_CHUNK_SIZE 1024
+#define ADVICE_CHUNK_ARRAY_SIZE 64
+
+#define PG_GET_ADVICE_COLUMNS 7
+
+/*
+ * Advice extracted from one query plan, together with the query string
+ * and various other identifying details.
+ */
+typedef struct pgpa_collected_advice
+{
+ Oid userid; /* user OID */
+ Oid dbid; /* database OID */
+ uint64 queryid; /* query identifier */
+ TimestampTz timestamp; /* query timestamp */
+ int advice_offset; /* start of advice in textual data */
+ char textual_data[FLEXIBLE_ARRAY_MEMBER];
+} pgpa_collected_advice;
+
+/*
+ * A bunch of pointers to pgpa_collected_advice objects, stored in
+ * backend-local memory.
+ */
+typedef struct pgpa_local_advice_chunk
+{
+ pgpa_collected_advice *entries[ADVICE_CHUNK_SIZE];
+} pgpa_local_advice_chunk;
+
+/*
+ * Information about all of the pgpa_collected_advice objects that we're
+ * storing in local memory.
+ *
+ * We assign consecutive IDs, starting from 0, to each pgpa_collected_advice
+ * object that we store. The actual storage is an array of chunks, which
+ * helps keep memcpy() overhead low when we start discarding older data.
+ */
+typedef struct pgpa_local_advice
+{
+ uint64 next_id;
+ uint64 oldest_id;
+ uint64 base_id;
+ int chunk_array_allocated_size;
+ pgpa_local_advice_chunk **chunks;
+} pgpa_local_advice;
+
+/*
+ * Just like pgpa_local_advice_chunk, but stored in a dynamic shared area,
+ * so we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice_chunk
+{
+ dsa_pointer entries[ADVICE_CHUNK_SIZE];
+} pgpa_shared_advice_chunk;
+
+/*
+ * Just like pgpa_local_advice, but stored in a dynamic shared area, so
+ * we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice
+{
+ uint64 next_id;
+ uint64 oldest_id;
+ uint64 base_id;
+ int chunk_array_allocated_size;
+ dsa_pointer chunks;
+} pgpa_shared_advice;
+
+/* Pointers to local and shared collectors */
+static pgpa_local_advice *local_collector = NULL;
+static pgpa_shared_advice *shared_collector = NULL;
+
+/* Static functions */
+static pgpa_collected_advice *pgpa_make_collected_advice(Oid userid,
+ Oid dbid,
+ uint64 queryId,
+ TimestampTz timestamp,
+ const char *query_string,
+ const char *advice_string,
+ dsa_area *area,
+ dsa_pointer *result);
+static void pgpa_store_local_advice(pgpa_collected_advice *ca);
+static void pgpa_trim_local_advice(int limit);
+static void pgpa_store_shared_advice(dsa_pointer ca_pointer);
+static void pgpa_trim_shared_advice(dsa_area *area, int limit);
+
+/* Helper function to extract the query string from pgpa_collected_advice */
+static inline const char *
+query_string(pgpa_collected_advice *ca)
+{
+ return ca->textual_data;
+}
+
+/* Helper function to extract the advice string from pgpa_collected_advice */
+static inline const char *
+advice_string(pgpa_collected_advice *ca)
+{
+ return ca->textual_data + ca->advice_offset;
+}
+
+/*
+ * Store collected query advice into the local or shared advice collector,
+ * as appropriate.
+ */
+void
+pgpa_collect_advice(uint64 queryId, const char *query_string,
+ const char *advice_string)
+{
+ Oid userid = GetUserId();
+ Oid dbid = MyDatabaseId;
+ TimestampTz now = GetCurrentTimestamp();
+
+ if (pg_plan_advice_local_collector &&
+ pg_plan_advice_local_collection_limit > 0)
+ {
+ pgpa_collected_advice *ca;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+ ca = pgpa_make_collected_advice(userid, dbid, queryId, now,
+ query_string, advice_string,
+ NULL, NULL);
+ pgpa_store_local_advice(ca);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ if (pg_plan_advice_shared_collector &&
+ pg_plan_advice_shared_collection_limit > 0)
+ {
+ dsa_area *area = pg_plan_advice_dsa_area();
+ dsa_pointer ca_pointer = InvalidDsaPointer; /* placate compiler */
+
+ pgpa_make_collected_advice(userid, dbid, queryId, now,
+ query_string, advice_string, area,
+ &ca_pointer);
+ pgpa_store_shared_advice(ca_pointer);
+ }
+}
+
+/*
+ * Allocate and fill a new pgpa_collected_advice object.
+ *
+ * If area != NULL, it is used to allocate the new object, and the resulting
+ * dsa_pointer is returned via *result.
+ *
+ * If area == NULL, the new object is allocated in the current memory context,
+ * and result is not examined or modified.
+ */
+static pgpa_collected_advice *
+pgpa_make_collected_advice(Oid userid, Oid dbid, uint64 queryId,
+ TimestampTz timestamp,
+ const char *query_string,
+ const char *advice_string,
+ dsa_area *area, dsa_pointer *result)
+{
+ size_t query_string_length = strlen(query_string) + 1;
+ size_t advice_string_length = strlen(advice_string) + 1;
+ size_t total_length;
+ pgpa_collected_advice *ca;
+
+ total_length = offsetof(pgpa_collected_advice, textual_data)
+ + query_string_length + advice_string_length;
+
+ if (area == NULL)
+ ca = palloc(total_length);
+ else
+ {
+ *result = dsa_allocate(area, total_length);
+ ca = dsa_get_address(area, *result);
+ }
+
+ ca->userid = userid;
+ ca->dbid = dbid;
+ ca->queryid = queryId;
+ ca->timestamp = timestamp;
+ ca->advice_offset = query_string_length;
+
+ memcpy(ca->textual_data, query_string, query_string_length);
+ memcpy(&ca->textual_data[ca->advice_offset],
+ advice_string, advice_string_length);
+
+ return ca;
+}
+
+/*
+ * Add a pg_collected_advice object to our backend-local advice collection.
+ *
+ * Caller is responsible for switching to the appropriate memory context;
+ * the provided object should have been allocated in that same context.
+ */
+static void
+pgpa_store_local_advice(pgpa_collected_advice *ca)
+{
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_local_advice *la = local_collector;
+
+ /* If the local advice collector isn't initialized yet, do that now. */
+ if (la == NULL)
+ {
+ la = palloc0(sizeof(pgpa_local_advice));
+ la->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+ la->chunks = palloc0_array(pgpa_local_advice_chunk *,
+ la->chunk_array_allocated_size);
+ local_collector = la;
+ }
+
+ /* Compute chunk and offset at which to store this advice. */
+ chunk_number = (la->next_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (la->next_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ /* Extend chunk array, if needed. */
+ if (chunk_number >= la->chunk_array_allocated_size)
+ {
+ int new_size;
+
+ new_size = la->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+ la->chunks = repalloc0_array(la->chunks,
+ pgpa_local_advice_chunk *,
+ la->chunk_array_allocated_size,
+ new_size);
+ la->chunk_array_allocated_size = new_size;
+ }
+
+ /* Allocate new chunk, if needed. */
+ if (la->chunks[chunk_number] == NULL)
+ la->chunks[chunk_number] = palloc0_object(pgpa_local_advice_chunk);
+
+ /* Save pointer and bump next-id counter. */
+ Assert(la->chunks[chunk_number]->entries[chunk_offset] == NULL);
+ la->chunks[chunk_number]->entries[chunk_offset] = ca;
+ ++la->next_id;
+
+ /* If we've exceeded the storage limit, discard old data. */
+ pgpa_trim_local_advice(pg_plan_advice_local_collection_limit);
+}
+
+/*
+ * Add a pg_collected_advice object to the shared advice collection.
+ *
+ * 'ca_pointer' should have been allocated from the pg_plan_advice DSA area
+ * and should point to an object of type pgpa_collected_advice.
+ */
+static void
+pgpa_store_shared_advice(dsa_pointer ca_pointer)
+{
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+ pgpa_shared_advice *sa = shared_collector;
+ dsa_pointer *chunk_array;
+ pgpa_shared_advice_chunk *chunk;
+
+ /* Lock the shared state. */
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now.
+ * If we're the first ones to attach, we may need to create the object.
+ */
+ if (sa == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ state->shared_collector =
+ dsa_allocate0(area, sizeof(pgpa_shared_advice));
+ shared_collector = sa = dsa_get_address(area, state->shared_collector);
+ }
+
+ /*
+ * It's possible that some other backend may have succeeded in creating
+ * the main collector object but failed to allocate an initial chunk
+ * array, so we must be prepared to allocate the chunk array here whether
+ * or not we created the collector object.
+ */
+ if (shared_collector->chunk_array_allocated_size == 0)
+ {
+ sa->chunks =
+ dsa_allocate0(area,
+ sizeof(dsa_pointer) * ADVICE_CHUNK_ARRAY_SIZE);
+ sa->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+ }
+
+ /* Compute chunk and offset at which to store this advice. */
+ chunk_number = (sa->next_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (sa->next_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ /* Get the address of the chunk array and, if needed, extend it. */
+ if (chunk_number >= sa->chunk_array_allocated_size)
+ {
+ int new_size;
+ dsa_pointer new_chunks;
+
+ /*
+ * DSA can't enlarge an existing allocation, so we must make a new
+ * allocation and copy data over.
+ */
+ new_size = sa->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+ new_chunks = dsa_allocate0(area, sizeof(dsa_pointer) * new_size);
+ chunk_array = dsa_get_address(area, new_chunks);
+ memcpy(chunk_array, dsa_get_address(area, sa->chunks),
+ sizeof(dsa_pointer) * sa->chunk_array_allocated_size);
+ dsa_free(area, sa->chunks);
+ sa->chunks = new_chunks;
+ sa->chunk_array_allocated_size = new_size;
+ }
+ else
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Get the address of the desired chunk, allocating it if needed. */
+ if (chunk_array[chunk_number] == InvalidDsaPointer)
+ chunk_array[chunk_number] =
+ dsa_allocate0(area, sizeof(pgpa_shared_advice_chunk));
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+
+ /* Save pointer and bump next-id counter. */
+ Assert(chunk->entries[chunk_offset] == InvalidDsaPointer);
+ chunk->entries[chunk_offset] = ca_pointer;
+ ++sa->next_id;
+
+ /* If we've exceeded the storage limit, discard old data. */
+ pgpa_trim_shared_advice(area, pg_plan_advice_shared_collection_limit);
+
+ /* Release lock on shared state. */
+ LWLockRelease(&state->lock);
+}
+
+/*
+ * Discard collected advice stored in backend-local memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_local_advice(int limit)
+{
+ pgpa_local_advice *la = local_collector;
+ uint64 current_count;
+ uint64 trim_count;
+ uint64 total_chunk_count;
+ uint64 trim_chunk_count;
+ uint64 remaining_chunk_count;
+
+ /* If we haven't yet reached the limit, there's nothing to do. */
+ current_count = la->next_id - la->oldest_id;
+ if (current_count <= limit)
+ return;
+
+ /* Free enough entries to get us back down to the limit. */
+ trim_count = current_count - limit;
+ while (trim_count > 0)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+
+ chunk_number = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (la->oldest_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ Assert(la->chunks[chunk_number]->entries[chunk_offset] != NULL);
+ pfree(la->chunks[chunk_number]->entries[chunk_offset]);
+ la->chunks[chunk_number]->entries[chunk_offset] = NULL;
+ ++la->oldest_id;
+ --trim_count;
+ }
+
+ /* Free any chunks that are now entirely unused. */
+ trim_chunk_count = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ for (uint64 n = 0; n < trim_chunk_count; ++n)
+ pfree(la->chunks[n]);
+
+ /* Slide remaining chunk pointers back toward the base of the array. */
+ total_chunk_count = (la->next_id - la->base_id +
+ ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+ remaining_chunk_count = total_chunk_count - trim_chunk_count;
+ if (remaining_chunk_count > 0)
+ memmove(&la->chunks[0], &la->chunks[trim_chunk_count],
+ sizeof(pgpa_local_advice_chunk *) * remaining_chunk_count);
+
+ /* Don't leave stale pointers around. */
+ memset(&la->chunks[remaining_chunk_count], 0,
+ sizeof(pgpa_local_advice_chunk *)
+ * (total_chunk_count - remaining_chunk_count));
+
+ /* Adjust base ID value accordingly. */
+ la->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * Discard collected advice stored in shared memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_shared_advice(dsa_area *area, int limit)
+{
+ pgpa_shared_advice *sa = shared_collector;
+ uint64 current_count;
+ uint64 trim_count;
+ uint64 total_chunk_count;
+ uint64 trim_chunk_count;
+ uint64 remaining_chunk_count;
+ dsa_pointer *chunk_array;
+
+ /* If we haven't yet reached the limit, there's nothing to do. */
+ current_count = sa->next_id - sa->oldest_id;
+ if (current_count <= limit)
+ return;
+
+ /* Get a pointer to the chunk array. */
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Free enough entries to get us back down to the limit. */
+ trim_count = current_count - limit;
+ while (trim_count > 0)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_advice_chunk *chunk;
+
+ chunk_number = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (sa->oldest_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+ Assert(chunk->entries[chunk_offset] != InvalidDsaPointer);
+ dsa_free(area, chunk->entries[chunk_offset]);
+ chunk->entries[chunk_offset] = InvalidDsaPointer;
+ ++sa->oldest_id;
+ --trim_count;
+ }
+
+ /* Free any chunks that are now entirely unused. */
+ trim_chunk_count = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ for (uint64 n = 0; n < trim_chunk_count; ++n)
+ dsa_free(area, chunk_array[n]);
+
+ /* Slide remaining chunk pointers back toward the base of the array. */
+ total_chunk_count = (sa->next_id - sa->base_id +
+ ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+ remaining_chunk_count = total_chunk_count - trim_chunk_count;
+ if (remaining_chunk_count > 0)
+ memmove(&chunk_array[0], &chunk_array[trim_chunk_count],
+ sizeof(dsa_pointer) * remaining_chunk_count);
+
+ /* Don't leave stale pointers around. */
+ memset(&chunk_array[remaining_chunk_count], 0,
+ sizeof(pgpa_shared_advice_chunk *)
+ * (total_chunk_count - remaining_chunk_count));
+
+ /* Adjust base ID value accordingly. */
+ sa->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_local_advice(PG_FUNCTION_ARGS)
+{
+ if (local_collector != NULL)
+ pgpa_trim_local_advice(0);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now;
+ * but if the collector doesn't even exist, we can return without doing
+ * anything else.
+ */
+ if (shared_collector == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ {
+ LWLockRelease(&state->lock);
+ return (Datum) 0;
+ }
+ shared_collector = dsa_get_address(area, state->shared_collector);
+ }
+
+ /* Do the real work */
+ pgpa_trim_shared_advice(area, 0);
+
+ LWLockRelease(&state->lock);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable SRF to return advice collected in backend-local memory
+ */
+Datum
+pg_get_collected_local_advice(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ pgpa_local_advice *la = local_collector;
+ Oid userid = GetUserId();
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ if (la == NULL)
+ return (Datum) 0;
+
+ /* Loop over all entries. */
+ for (uint64 id = la->oldest_id; id < la->next_id; ++id)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_collected_advice *ca;
+ Datum values[PG_GET_ADVICE_COLUMNS];
+ bool nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+ chunk_number = (id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ ca = la->chunks[chunk_number]->entries[chunk_offset];
+
+ if (!member_can_set_role(userid, ca->userid))
+ continue;
+
+ values[0] = UInt64GetDatum(id);
+ values[1] = ObjectIdGetDatum(ca->userid);
+ values[2] = ObjectIdGetDatum(ca->dbid);
+ values[3] = UInt64GetDatum(ca->queryid);
+ values[4] = TimestampGetDatum(ca->timestamp);
+ values[5] = CStringGetTextDatum(query_string(ca));
+ values[6] = CStringGetTextDatum(advice_string(ca));
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * SQL-callable SRF to return advice collected in shared memory
+ */
+Datum
+pg_get_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+ dsa_pointer *chunk_array;
+ pgpa_shared_advice *sa = shared_collector;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* Lock the shared state. */
+ LWLockAcquire(&state->lock, LW_SHARED);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now;
+ * but if the collector doesn't even exist, we can return without doing
+ * anything else.
+ */
+ if (sa == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ {
+ LWLockRelease(&state->lock);
+ return (Datum) 0;
+ }
+ shared_collector = sa = dsa_get_address(area, state->shared_collector);
+ }
+
+ /* Get a pointer to the chunk array. */
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Loop over all entries. */
+ for (uint64 id = sa->oldest_id; id < sa->next_id; ++id)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_advice_chunk *chunk;
+ pgpa_collected_advice *ca;
+ Datum values[PG_GET_ADVICE_COLUMNS];
+ bool nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+ chunk_number = (id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+ ca = dsa_get_address(area, chunk->entries[chunk_offset]);
+
+ values[0] = UInt64GetDatum(id);
+ values[1] = ObjectIdGetDatum(ca->userid);
+ values[2] = ObjectIdGetDatum(ca->dbid);
+ values[3] = UInt64GetDatum(ca->queryid);
+ values[4] = TimestampGetDatum(ca->timestamp);
+ values[5] = CStringGetTextDatum(query_string(ca));
+ values[6] = CStringGetTextDatum(advice_string(ca));
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ /* Release lock on shared state. */
+ LWLockRelease(&state->lock);
+
+ return (Datum) 0;
+}
diff --git a/contrib/pg_plan_advice/pgpa_collector.h b/contrib/pg_plan_advice/pgpa_collector.h
new file mode 100644
index 00000000000..b6e746a06d7
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.h
@@ -0,0 +1,18 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.h
+ * collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_collector.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_COLLECTOR_H
+#define PGPA_COLLECTOR_H
+
+extern void pgpa_collect_advice(uint64 queryId, const char *query_string,
+ const char *advice_string);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c
new file mode 100644
index 00000000000..6f8f53e7cf0
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.c
@@ -0,0 +1,481 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.c
+ * create appropriate identifiers for range table entries
+ *
+ * The goal of this module is to be able to produce identifiers for range
+ * table entries that are unique, understandable to human beings, and
+ * able to be reconstructed during future planning cycles. As an
+ * exception, we do not care about, or want to produce, identifiers for
+ * RTE_JOIN entries. This is because (1) we would end up with a ton of
+ * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
+ * RTEs; and (3) we intend to refer to joins by their constituent members
+ * rather than by reference to the join RTE.
+ *
+ * In general, we construct identifiers of the following form:
+ *
+ * alias_name#occurrence_number/child_table_name@subquery_name
+ *
+ * However, occurrence_number is omitted when it is the first occurrence
+ * within the same subquery, child_table_name is omitted for relations that
+ * are not child tables, and subquery_name is omitted for the topmost
+ * query level. Whenever an item is omitted, the preceding punctuation mark
+ * is also omitted. Identifier-style escaping is applied to alias_name and
+ * subquery_name. Whenever we include child_table_name, we always
+ * schema-qualified name, but writing their own plan advice are not required
+ * to do so. Identifier-style escaping is applied to the schema and to the
+ * relation names separately.
+ *
+ * The upshot of all of these rules is that in simple cases, the relation
+ * identifier is textually identical to the alias name, making life easier
+ * for users. However, even in complex cases, every relation identifier
+ * for a given query will be unique (or at least we hope so: if not, this
+ * code is buggy and the identifier format might need to be rethought).
+ *
+ * A key goal of this system is that we want to be able to reconstruct the
+ * same identifiers during a future planning cycle for the same query, so
+ * that if a certain behavior is specified for a certain identifier, we can
+ * properly identify the RTI for which that behavior is mandated. In order
+ * for this to work, subquery names must be unique and known before the
+ * subquery is planned, and the remainder of the identifier must not depend
+ * on any part of the query outside of the current subquery level. In
+ * particular, occurrence_number must be calculated relative to the range
+ * table for the relevant subquery, not the final flattened range table.
+ *
+ * NB: All of this code must use rt_fetch(), not planner_rt_fetch()!
+ * Join removal and self-join elimination remove rels from the arrays
+ * that planner_rt_fetch() uses; using rt_fetch() is necessary to get
+ * stable results.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_identifier.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_identifier.h"
+
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
+ List *appinfos);
+static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+ SubPlanRTInfo *rtinfo, Index rti);
+
+/*
+ * Create a range table identifier from scratch.
+ *
+ * This function leaves the caller to do all the heavy lifting, so it's
+ * generally better to use one of the functions below instead.
+ *
+ * See the file header comments for more details on the format of an
+ * identifier.
+ */
+const char *
+pgpa_identifier_string(const pgpa_identifier *rid)
+{
+ const char *result;
+
+ Assert(rid->alias_name != NULL);
+ result = quote_identifier(rid->alias_name);
+
+ Assert(rid->occurrence >= 0);
+ if (rid->occurrence > 1)
+ result = psprintf("%s#%d", result, rid->occurrence);
+
+ if (rid->partrel != NULL)
+ {
+ if (rid->partnsp == NULL)
+ result = psprintf("%s/%s", result,
+ quote_identifier(rid->partrel));
+ else
+ result = psprintf("%s/%s.%s", result,
+ quote_identifier(rid->partnsp),
+ quote_identifier(rid->partrel));
+ }
+
+ if (rid->plan_name != NULL)
+ result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
+
+ return result;
+}
+
+/*
+ * Compute a relation identifier for a particular RTI.
+ *
+ * The caller provides root and rti, and gets the necessary details back via
+ * the remaining parameters.
+ */
+void
+pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+ pgpa_identifier *rid)
+{
+ Index top_rti = rti;
+ int occurrence = 1;
+ RangeTblEntry *rte;
+ RangeTblEntry *top_rte;
+ char *partnsp = NULL;
+ char *partrel = NULL;
+
+ /*
+ * If this is a child RTE, find the topmost parent that is still of type
+ * RTE_RELATION. We do this because we identify children of partitioned
+ * tables by the name of the child table, but subqueries can also have
+ * child rels and we don't care about those here.
+ */
+ for (;;)
+ {
+ AppendRelInfo *appinfo;
+ RangeTblEntry *parent_rte;
+
+ /* append_rel_array can be NULL if there are no children */
+ if (root->append_rel_array == NULL ||
+ (appinfo = root->append_rel_array[top_rti]) == NULL)
+ break;
+
+ parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
+ if (parent_rte->rtekind != RTE_RELATION)
+ break;
+
+ top_rti = appinfo->parent_relid;
+ }
+
+ /* Get the range table entries for the RTI and top RTI. */
+ rte = rt_fetch(rti, root->parse->rtable);
+ top_rte = rt_fetch(top_rti, root->parse->rtable);
+ Assert(rte->rtekind != RTE_JOIN);
+ Assert(top_rte->rtekind != RTE_JOIN);
+
+ /* Work out the correct occurrence number. */
+ for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
+ {
+ RangeTblEntry *prior_rte;
+ AppendRelInfo *appinfo;
+
+ /*
+ * If this is a child rel of a parent that is a relation, skip it.
+ *
+ * Such range table entries are disambiguated by mentioning the schema
+ * and name of the table, not by counting them as separate occurrences
+ * of the same table.
+ *
+ * NB: append_rel_array can be NULL if there are no children
+ */
+ if (root->append_rel_array != NULL &&
+ (appinfo = root->append_rel_array[prior_rti]) != NULL)
+ {
+ RangeTblEntry *parent_rte;
+
+ parent_rte = rt_fetch(appinfo->parent_relid, root->parse->rtable);
+ if (parent_rte->rtekind == RTE_RELATION)
+ continue;
+ }
+
+ /* Skip NULL entries and joins. */
+ prior_rte = rt_fetch(prior_rti, root->parse->rtable);
+ if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
+ continue;
+
+ /* Skip if the alias name differs. */
+ if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+ continue;
+
+ /* Looks like a true duplicate. */
+ ++occurrence;
+ }
+
+ /* If this is a child table, get the schema and relation names. */
+ if (rti != top_rti)
+ {
+ partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+ partrel = get_rel_name(rte->relid);
+ }
+
+ /* OK, we have all the answers we need. Return them to the caller. */
+ rid->alias_name = top_rte->eref->aliasname;
+ rid->occurrence = occurrence;
+ rid->partnsp = partnsp;
+ rid->partrel = partrel;
+ rid->plan_name = root->plan_name;
+}
+
+/*
+ * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
+ * RTIs that may be present.
+ *
+ * RTE_JOIN entries are excluded because they cannot be mentioned by plan
+ * advice.
+ *
+ * The caller is responsible for making sure that the tkeys array is large
+ * enough to store the results.
+ *
+ * The return value is the number of identifiers computed.
+ */
+int
+pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
+ pgpa_identifier *rids)
+{
+ int count = 0;
+ int rti = -1;
+
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ RangeTblEntry *rte = rt_fetch(rti, root->parse->rtable);
+
+ if (rte->rtekind == RTE_JOIN)
+ continue;
+ pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
+ }
+
+ Assert(count > 0);
+ return count;
+}
+
+/*
+ * Create an array of range table identifiers for all the non-NULL,
+ * non-RTE_JOIN entries in the PlannedStmt's range table.
+ */
+pgpa_identifier *
+pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
+{
+ Index rtable_length = list_length(pstmt->rtable);
+ pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
+ Index *top_rti_map;
+ int rtinfoindex = 0;
+ SubPlanRTInfo *rtinfo = NULL;
+ SubPlanRTInfo *nextrtinfo = NULL;
+
+ /*
+ * Account for relations addded by inheritance expansion of partitioned
+ * tables.
+ */
+ top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
+ pstmt->appendRelations);
+
+ /*
+ * When we begin iterating, we're processing the portion of the range
+ * table that originated from the top-level PlannerInfo, so subrtinfo is
+ * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
+ * portion of the range table we are processing. nextrtinfo is always the
+ * SubPlanRTInfo that follows the current one, if any, so when we're
+ * processing the top-level query's portion of the range table, the next
+ * SubPlanRTInfo is the very first one.
+ */
+ if (pstmt->subrtinfos != NULL)
+ nextrtinfo = linitial(pstmt->subrtinfos);
+
+ /* Main loop over the range table. */
+ for (Index rti = 1; rti <= rtable_length; rti++)
+ {
+ const char *plan_name;
+ Index top_rti;
+ RangeTblEntry *rte;
+ RangeTblEntry *top_rte;
+ char *partnsp = NULL;
+ char *partrel = NULL;
+ int occurrence;
+ pgpa_identifier *rid;
+
+ /*
+ * Advance to the next SubPlanRTInfo, if it's time to do that.
+ *
+ * This loop probably shouldn't ever iterate more than once, because
+ * that would imply that a subquery was planned but added nothing to
+ * the range table; but let's be defensive and assume it can happen.
+ */
+ while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
+ {
+ rtinfo = nextrtinfo;
+ if (++rtinfoindex >= list_length(pstmt->subrtinfos))
+ nextrtinfo = NULL;
+ else
+ nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
+ }
+
+ /* Fetch the range table entry, if any. */
+ rte = rt_fetch(rti, pstmt->rtable);
+
+ /*
+ * We can't and don't need to identify null entries, and we don't want
+ * to identify join entries.
+ */
+ if (rte == NULL || rte->rtekind == RTE_JOIN)
+ continue;
+
+ /*
+ * If this is not a relation added by partitioned table expansion,
+ * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
+ * we need the information for the top RTI/RTE, and must also fetch
+ * the partition schema and name.
+ */
+ top_rti = top_rti_map[rti - 1];
+ if (rti == top_rti)
+ top_rte = rte;
+ else
+ {
+ top_rte = rt_fetch(top_rti, pstmt->rtable);
+ partnsp =
+ get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+ partrel = get_rel_name(rte->relid);
+ }
+
+ /* Compute the correct occurrence number. */
+ occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
+ rtinfo, top_rti);
+
+ /* Get the name of the current plan (NULL for toplevel query). */
+ plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
+
+ /* Save all the details we've derived. */
+ rid = &result[rti - 1];
+ rid->alias_name = top_rte->eref->aliasname;
+ rid->occurrence = occurrence;
+ rid->partnsp = partnsp;
+ rid->partrel = partrel;
+ rid->plan_name = plan_name;
+ }
+
+ return result;
+}
+
+/*
+ * Search for a pgpa_identifier in the array of identifiers computed for the
+ * range table. If exactly one match is found, return the matching RTI; else
+ * return 0.
+ */
+Index
+pgpa_compute_rti_from_identifier(int rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_identifier *rid)
+{
+ Index result = 0;
+
+ for (Index rti = 1; rti <= rtable_length; ++rti)
+ {
+ pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
+
+ /* If there's no identifier for this RTI, skip it. */
+ if (rti_rid->alias_name == NULL)
+ continue;
+
+ /*
+ * If it matches, return this RTI. As usual, an omitted partition
+ * schema matches anything, but partition and plan names must either
+ * match exactly or be omitted on both sides.
+ */
+ if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
+ rid->occurrence == rti_rid->occurrence &&
+ (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
+ strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
+ strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
+ strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
+ {
+ if (result != 0)
+ {
+ /* Multiple matches were found. */
+ return 0;
+ }
+ result = rti;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Build a mapping from each RTI to the RTI whose alias_name will be used to
+ * construct the range table identifier.
+ *
+ * For child relations, this is the topmost parent that is still of type
+ * RTE_RELATION. For other relations, it's just the original RTI.
+ *
+ * Since we're eventually going to need this information for every RTI in
+ * the range table, it's best to compute all the answers in a single pass over
+ * the AppendRelInfo list. Otherwise, we might end up searching through that
+ * list repeatedly for entries of interest.
+ *
+ * Note that the returned array is uses zero-based indexing, while RTIs use
+ * 1-based indexing, so subtract 1 from the RTI before looking it up in the
+ * array.
+ */
+static Index *
+pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
+{
+ Index *top_rti_map = palloc0_array(Index, rtable_length);
+
+ /* Initially, make every RTI point to itself. */
+ for (Index rti = 1; rti <= rtable_length; ++rti)
+ top_rti_map[rti - 1] = rti;
+
+ /* Update the map for each AppendRelInfo object. */
+ foreach_node(AppendRelInfo, appinfo, appinfos)
+ {
+ Index parent_rti = appinfo->parent_relid;
+ RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
+
+ /* If the parent is not RTE_RELATION, ignore this entry. */
+ if (parent_rte->rtekind != RTE_RELATION)
+ continue;
+
+ /*
+ * Map the child to wherever we mapped the parent. Parents always
+ * precede their children in the AppendRelInfo list, so this should
+ * work out.
+ */
+ top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
+ }
+
+ return top_rti_map;
+}
+
+/*
+ * Find the occurence number of a certain relation within a certain subquery.
+ *
+ * The same alias name can occur multiple times within a subquery, but we want
+ * to disambiguate by giving different occurrences different integer indexes.
+ * However, child tables are disambiguated by including the table name rather
+ * than by incrementing the occurrence number; and joins are not named and so
+ * shouldn't increment the occurence number either.
+ */
+static int
+pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+ SubPlanRTInfo *rtinfo, Index rti)
+{
+ Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
+ int occurrence = 1;
+ RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+ for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
+ {
+ RangeTblEntry *prior_rte;
+
+ /*
+ * If this is a child rel of a parent that is a relation, skip it.
+ *
+ * Such range table entries are disambiguated by mentioning the schema
+ * and name of the table, not by counting them as separate occurrences
+ * of the same table.
+ */
+ if (top_rti_map[prior_rti - 1] != prior_rti)
+ continue;
+
+ /* Skip joins. */
+ prior_rte = rt_fetch(prior_rti, rtable);
+ if (prior_rte->rtekind == RTE_JOIN)
+ continue;
+
+ /* Skip if the alias name differs. */
+ if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+ continue;
+
+ /* Looks like a true duplicate. */
+ ++occurrence;
+ }
+
+ return occurrence;
+}
diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h
new file mode 100644
index 00000000000..b000d2b7081
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.h
+ * create appropriate identifiers for range table entries
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_identifier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGPA_IDENTIFIER_H
+#define PGPA_IDENTIFIER_H
+
+#include "nodes/pathnodes.h"
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_identifier
+{
+ const char *alias_name;
+ int occurrence;
+ const char *partnsp;
+ const char *partrel;
+ const char *plan_name;
+} pgpa_identifier;
+
+/* Convenience function for comparing possibly-NULL strings. */
+static inline bool
+strings_equal_or_both_null(const char *a, const char *b)
+{
+ if (a == b)
+ return true;
+ else if (a == NULL || b == NULL)
+ return false;
+ else
+ return strcmp(a, b) == 0;
+}
+
+extern const char *pgpa_identifier_string(const pgpa_identifier *rid);
+extern void pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
+ pgpa_identifier *rid);
+extern int pgpa_compute_identifiers_by_relids(PlannerInfo *root,
+ Bitmapset *relids,
+ pgpa_identifier *rids);
+extern pgpa_identifier *pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt);
+
+extern Index pgpa_compute_rti_from_identifier(int rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_identifier *rid);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c
new file mode 100644
index 00000000000..b6c588dfe2b
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.c
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.c
+ * analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_join.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/pathnodes.h"
+#include "nodes/print.h"
+#include "parser/parsetree.h"
+
+/*
+ * Temporary object used when unrolling a join tree.
+ */
+struct pgpa_join_unroller
+{
+ unsigned nallocated;
+ unsigned nused;
+ Plan *outer_subplan;
+ ElidedNode *outer_elided_node;
+ bool outer_beneath_any_gather;
+ pgpa_join_strategy *strategy;
+ Plan **inner_subplans;
+ ElidedNode **inner_elided_nodes;
+ pgpa_join_unroller **inner_unrollers;
+ bool *inner_beneath_any_gather;
+};
+
+static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker,
+ Plan *plan,
+ Plan **realouter,
+ Plan **realinner,
+ ElidedNode **elidedrealouter,
+ ElidedNode **elidedrealinner,
+ bool *found_any_outer_gather,
+ bool *found_any_inner_gather);
+static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan);
+static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+ bool *found_any_gather);
+static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+ ElidedNode **elided_node);
+
+static bool is_result_node_with_child(Plan *plan);
+static bool is_sorting_plan(Plan *plan);
+
+/*
+ * Create an initially-empty object for unrolling joins.
+ *
+ * This function creates a helper object that can later be used to create a
+ * pgpa_unrolled_join, after first calling pgpa_unroll_join one or more times.
+ */
+pgpa_join_unroller *
+pgpa_create_join_unroller(void)
+{
+ pgpa_join_unroller *join_unroller;
+
+ join_unroller = palloc0_object(pgpa_join_unroller);
+ join_unroller->nallocated = 4;
+ join_unroller->strategy =
+ palloc_array(pgpa_join_strategy, join_unroller->nallocated);
+ join_unroller->inner_subplans =
+ palloc_array(Plan *, join_unroller->nallocated);
+ join_unroller->inner_elided_nodes =
+ palloc_array(ElidedNode *, join_unroller->nallocated);
+ join_unroller->inner_unrollers =
+ palloc_array(pgpa_join_unroller *, join_unroller->nallocated);
+ join_unroller->inner_beneath_any_gather =
+ palloc_array(bool, join_unroller->nallocated);
+
+ return join_unroller;
+}
+
+/*
+ * Unroll one level of an unrollable join tree.
+ *
+ * Our basic goal here is to unroll join trees as they occur in the Plan
+ * tree into a simpler and more regular structure that we can more easily
+ * use for further processing. Unrolling is outer-deep, so if the plan tree
+ * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
+ * used for Join1 and Join2, but a different one will be needed for Join3,
+ * since that involves a join within the *inner* side of another join.
+ *
+ * pgpa_plan_walker creates a "top level" join unroller object when it
+ * encounters a join in a portion of the plan tree in which no join unroller
+ * is already active. From there, this function is responsible for determing
+ * to what portion of the plan tree that join unroller applies, and for
+ * creating any subordinate join unroller objects that are needed as a result
+ * of non-outer-deep join trees. We do this by returning the join unroller
+ * objects that should be used for further traversal of the outer and inner
+ * subtrees of the current plan node via *outer_join_unroller and
+ * *inner_join_unroller, respectively.
+ */
+void
+pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan,
+ bool beneath_any_gather,
+ pgpa_join_unroller *join_unroller,
+ pgpa_join_unroller **outer_join_unroller,
+ pgpa_join_unroller **inner_join_unroller)
+{
+ pgpa_join_strategy strategy;
+ Plan *realinner,
+ *realouter;
+ ElidedNode *elidedinner,
+ *elidedouter;
+ int n;
+ bool found_any_outer_gather = false;
+ bool found_any_inner_gather = false;
+
+ Assert(join_unroller != NULL);
+
+ /*
+ * We need to pass the join_unroller object down through certain types of
+ * plan nodes -- anything that's considered part of the join strategy, and
+ * any other nodes that can occur in a join tree despite not being scans
+ * or joins.
+ *
+ * This includes:
+ *
+ * (1) Materialize, Memoize, and Hash nodes, which are part of the join
+ * strategy,
+ *
+ * (2) Gather and Gather Merge nodes, which can occur at any point in the
+ * join tree where the planner decided to initiate parallelism,
+ *
+ * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
+ * or GatherMerge,
+ *
+ * (4) Agg and Unique nodes, which can occur when we decide to make the
+ * nullable side of a semijoin unique and then join the result, and
+ *
+ * (5) Result nodes with children, which can be added either to project to
+ * enforce a one-time filter (but Result nodes without children are
+ * degenerate scans or joins).
+ */
+ if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
+ || IsA(plan, Gather) || IsA(plan, GatherMerge)
+ || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique)
+ || is_result_node_with_child(plan))
+ {
+ *outer_join_unroller = join_unroller;
+ return;
+ }
+
+ /*
+ * Since we've already handled nodes that require pass-through treatment,
+ * this should be an unrollable join.
+ */
+ strategy = pgpa_decompose_join(walker, plan,
+ &realouter, &realinner,
+ &elidedouter, &elidedinner,
+ &found_any_outer_gather,
+ &found_any_inner_gather);
+
+ /* If our workspace is full, expand it. */
+ if (join_unroller->nused >= join_unroller->nallocated)
+ {
+ join_unroller->nallocated *= 2;
+ join_unroller->strategy =
+ repalloc_array(join_unroller->strategy,
+ pgpa_join_strategy,
+ join_unroller->nallocated);
+ join_unroller->inner_subplans =
+ repalloc_array(join_unroller->inner_subplans,
+ Plan *,
+ join_unroller->nallocated);
+ join_unroller->inner_elided_nodes =
+ repalloc_array(join_unroller->inner_elided_nodes,
+ ElidedNode *,
+ join_unroller->nallocated);
+ join_unroller->inner_beneath_any_gather =
+ repalloc_array(join_unroller->inner_beneath_any_gather,
+ bool,
+ join_unroller->nallocated);
+ join_unroller->inner_unrollers =
+ repalloc_array(join_unroller->inner_unrollers,
+ pgpa_join_unroller *,
+ join_unroller->nallocated);
+ }
+
+ /*
+ * Since we're flattening outer-deep join trees, it follows that if the
+ * outer side is still an unrollable join, it should be unrolled into this
+ * same object. Otherwise, we've reached the limit of what we can unroll
+ * into this object and must remember the outer side as the final outer
+ * subplan.
+ */
+ if (elidedouter == NULL && pgpa_is_join(realouter))
+ *outer_join_unroller = join_unroller;
+ else
+ {
+ join_unroller->outer_subplan = realouter;
+ join_unroller->outer_elided_node = elidedouter;
+ join_unroller->outer_beneath_any_gather =
+ beneath_any_gather || found_any_outer_gather;
+ }
+
+ /*
+ * Store the inner subplan. If it's an unrollable join, it needs to be
+ * flattened in turn, but into a new unroller object, not this one.
+ */
+ n = join_unroller->nused++;
+ join_unroller->strategy[n] = strategy;
+ join_unroller->inner_subplans[n] = realinner;
+ join_unroller->inner_elided_nodes[n] = elidedinner;
+ join_unroller->inner_beneath_any_gather[n] =
+ beneath_any_gather || found_any_inner_gather;
+ if (elidedinner == NULL && pgpa_is_join(realinner))
+ *inner_join_unroller = pgpa_create_join_unroller();
+ else
+ *inner_join_unroller = NULL;
+ join_unroller->inner_unrollers[n] = *inner_join_unroller;
+}
+
+/*
+ * Use the data we've accumulated in a pgpa_join_unroller object to construct
+ * a pgpa_unrolled_join.
+ */
+pgpa_unrolled_join *
+pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+ pgpa_join_unroller *join_unroller)
+{
+ pgpa_unrolled_join *ujoin;
+ int i;
+
+ /*
+ * We shouldn't have gone even so far as to create a join unroller unless
+ * we found at least one unrollable join.
+ */
+ Assert(join_unroller->nused > 0);
+
+ /* Allocate result structures. */
+ ujoin = palloc0_object(pgpa_unrolled_join);
+ ujoin->ninner = join_unroller->nused;
+ ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused);
+ ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused);
+
+ /* Handle the outermost join. */
+ ujoin->outer.plan = join_unroller->outer_subplan;
+ ujoin->outer.elided_node = join_unroller->outer_elided_node;
+ ujoin->outer.scan =
+ pgpa_build_scan(walker, ujoin->outer.plan,
+ ujoin->outer.elided_node,
+ join_unroller->outer_beneath_any_gather,
+ true);
+
+ /*
+ * We want the joins from the deepest part of the plan tree to appear
+ * first in the result object, but the join unroller adds them in exactly
+ * the reverse of that order, so we need to flip the order of the arrays
+ * when constructing the final result.
+ */
+ for (i = 0; i < join_unroller->nused; ++i)
+ {
+ int k = join_unroller->nused - i - 1;
+
+ /* Copy strategy, Plan, and ElidedNode. */
+ ujoin->strategy[i] = join_unroller->strategy[k];
+ ujoin->inner[i].plan = join_unroller->inner_subplans[k];
+ ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
+
+ /*
+ * Fill in remaining details, using either the nested join unroller,
+ * or by deriving them from the plan and elided nodes.
+ */
+ if (join_unroller->inner_unrollers[k] != NULL)
+ ujoin->inner[i].unrolled_join =
+ pgpa_build_unrolled_join(walker,
+ join_unroller->inner_unrollers[k]);
+ else
+ ujoin->inner[i].scan =
+ pgpa_build_scan(walker, ujoin->inner[i].plan,
+ ujoin->inner[i].elided_node,
+ join_unroller->inner_beneath_any_gather[k],
+ true);
+ }
+
+ return ujoin;
+}
+
+/*
+ * Free memory allocated for pgpa_join_unroller.
+ */
+void
+pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
+{
+ pfree(join_unroller->strategy);
+ pfree(join_unroller->inner_subplans);
+ pfree(join_unroller->inner_elided_nodes);
+ pfree(join_unroller->inner_unrollers);
+ pfree(join_unroller);
+}
+
+/*
+ * Identify the join strategy used by a join and the "real" inner and outer
+ * plans.
+ *
+ * For example, a Hash Join always has a Hash node on the inner side, but
+ * for all intents and purposes the real inner input is the Hash node's child,
+ * not the Hash node itself.
+ *
+ * Likewise, a Merge Join may have Sort note on the inner or outer side; if
+ * it does, the real input to the join is the Sort node's child, not the
+ * Sort node itself.
+ *
+ * In addition, with a Merge Join or a Nested Loop, the join planning code
+ * may add additional nodes such as Materialize or Memoize. We regard these
+ * as an aspect of the join strategy. As in the previous cases, the true input
+ * to the join is the underlying node.
+ *
+ * However, if any involved child node previously had a now-elided node stacked
+ * on top, then we can't "look through" that node -- indeed, what's going to be
+ * relevant for our purposes is the ElidedNode on top of that plan node, rather
+ * than the plan node itself.
+ *
+ * If there are multiple elided nodes, we want that one that would have been
+ * uppermost in the plan tree prior to setrefs processing; we expect to find
+ * that one last in the list of elided nodes.
+ *
+ * On return *realouter and *realinner will have been set to the real inner
+ * and real outer plans that we identified, and *elidedrealouter and
+ * *elidedrealinner to the last of any correspoding elided nodes.
+ * Additionally, *found_any_outer_gather and *found_any_inner_gather will
+ * be set to true if we looked through a Gather or Gather Merge node on
+ * that side of the join, and false otherwise.
+ */
+static pgpa_join_strategy
+pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
+ Plan **realouter, Plan **realinner,
+ ElidedNode **elidedrealouter, ElidedNode **elidedrealinner,
+ bool *found_any_outer_gather, bool *found_any_inner_gather)
+{
+ PlannedStmt *pstmt = walker->pstmt;
+ JoinType jointype = ((Join *) plan)->jointype;
+ Plan *outerplan = plan->lefttree;
+ Plan *innerplan = plan->righttree;
+ ElidedNode *elidedouter;
+ ElidedNode *elidedinner;
+ pgpa_join_strategy strategy;
+ bool uniqueouter;
+ bool uniqueinner;
+
+ elidedouter = pgpa_last_elided_node(pstmt, outerplan);
+ elidedinner = pgpa_last_elided_node(pstmt, innerplan);
+ *found_any_outer_gather = false;
+ *found_any_inner_gather = false;
+
+ switch (nodeTag(plan))
+ {
+ case T_MergeJoin:
+
+ /*
+ * The planner may have chosen to place a Material node on the
+ * inner side of the MergeJoin; if this is present, we record it
+ * as part of the join strategy.
+ */
+ if (elidedinner == NULL && IsA(innerplan, Material))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_MERGE_JOIN_MATERIALIZE;
+ }
+ else
+ strategy = JSTRAT_MERGE_JOIN_PLAIN;
+
+ /*
+ * For a MergeJoin, either the outer or the inner subplan, or
+ * both, may have needed to be sorted; we must disregard any Sort
+ * or IncrementalSort node to find the real inner or outer
+ * subplan.
+ */
+ if (elidedouter == NULL && is_sorting_plan(outerplan))
+ elidedouter = pgpa_descend_node(pstmt, &outerplan);
+ if (elidedinner == NULL && is_sorting_plan(innerplan))
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ break;
+
+ case T_NestLoop:
+
+ /*
+ * The planner may have chosen to place a Material or Memoize node
+ * on the inner side of the NestLoop; if this is present, we
+ * record it as part of the join strategy.
+ */
+ if (elidedinner == NULL && IsA(innerplan, Material))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_NESTED_LOOP_MATERIALIZE;
+ }
+ else if (elidedinner == NULL && IsA(innerplan, Memoize))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_NESTED_LOOP_MEMOIZE;
+ }
+ else
+ strategy = JSTRAT_NESTED_LOOP_PLAIN;
+ break;
+
+ case T_HashJoin:
+
+ /*
+ * The inner subplan of a HashJoin is always a Hash node; the real
+ * inner subplan is the Hash node's child.
+ */
+ Assert(IsA(innerplan, Hash));
+ Assert(elidedinner == NULL);
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_HASH_JOIN;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+ }
+
+ /*
+ * The planner may have decided to implement a semijoin by first making
+ * the nullable side of the plan unique, and then performing a normal join
+ * against the result. Therefore, we might need to descend through a
+ * unique node on either side of the plan.
+ */
+ uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter);
+ uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner);
+
+ /*
+ * Can we see a Result node here, to project above a Gather? So far I've
+ * found no example that behaves that way; rather, the Gather or Gather
+ * Merge is made to project. Hence, don't test is_result_node_with_child()
+ * at this point.
+ */
+
+ /*
+ * The planner may have decided to parallelize part of the join tree, so
+ * we could find a Gather or Gather Merge node here. Note that, if
+ * present, this will appear below nodes we considered as part of the join
+ * strategy, but we could find another uniqueness-enforcing node below the
+ * Gather or Gather Merge, if present.
+ */
+ if (elidedouter == NULL)
+ {
+ elidedouter = pgpa_descend_any_gather(pstmt, &outerplan,
+ found_any_outer_gather);
+ if (found_any_outer_gather &&
+ pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter))
+ uniqueouter = true;
+ }
+ if (elidedinner == NULL)
+ {
+ elidedinner = pgpa_descend_any_gather(pstmt, &innerplan,
+ found_any_inner_gather);
+ if (found_any_inner_gather &&
+ pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner))
+ uniqueinner = true;
+ }
+
+ /*
+ * It's possible that Result node has been inserted either to project a
+ * target list or to implement a one-time filter. If so, we can descend
+ * throught it. Note that a result node without a child would be a
+ * degenerate scan or join, and not something we could descend through.
+ */
+ if (elidedouter == NULL && is_result_node_with_child(outerplan))
+ elidedouter = pgpa_descend_node(pstmt, &outerplan);
+ if (elidedinner == NULL && is_result_node_with_child(innerplan))
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+
+ /*
+ * If this is a semijoin that was converted to an inner join by making one
+ * side or the other unique, make a note that the inner or outer subplan,
+ * as appropriate, should be treated as a query plan feature when the main
+ * tree traversal reaches it.
+ *
+ * Conversely, if the planner could have made one side of the join unique
+ * and thereby converted it to an inner join, and chose not to do so, that
+ * is also worth noting.
+ *
+ * NB: This code could appear slightly higher up in in this function, but
+ * none of the nodes through which we just descended should have
+ * associated RTIs.
+ *
+ * NB: This seems like a somewhat hacky way of passing information up to
+ * the main tree walk, but I don't currently have a better idea.
+ */
+ if (uniqueouter)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan);
+ else if (jointype == JOIN_RIGHT_SEMI)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan);
+ if (uniqueinner)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan);
+ else if (jointype == JOIN_SEMI)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan);
+
+ /* Set output parameters. */
+ *realouter = outerplan;
+ *realinner = innerplan;
+ *elidedrealouter = elidedouter;
+ *elidedrealinner = elidedinner;
+ return strategy;
+}
+
+/*
+ * Descend through a Plan node in a join tree that the caller has determined
+ * to be irrelevant.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node.
+ */
+static ElidedNode *
+pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
+{
+ *plan = (*plan)->lefttree;
+ return pgpa_last_elided_node(pstmt, *plan);
+}
+
+/*
+ * Descend through a Gather or Gather Merge node, if present, and any Sort
+ * or IncrementalSort node occurring under a Gather Merge.
+ *
+ * Caller should have verified that there is no ElidedNode pertaining to
+ * the initial value of *plan.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node. Sets *found_any_gather = true if either Gather or
+ * Gather Merge was found, and otherwise leaves it unchanged.
+ */
+static ElidedNode *
+pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan,
+ bool *found_any_gather)
+{
+ if (IsA(*plan, Gather))
+ {
+ *found_any_gather = true;
+ return pgpa_descend_node(pstmt, plan);
+ }
+
+ if (IsA(*plan, GatherMerge))
+ {
+ ElidedNode *elided = pgpa_descend_node(pstmt, plan);
+
+ if (elided == NULL && is_sorting_plan(*plan))
+ elided = pgpa_descend_node(pstmt, plan);
+
+ *found_any_gather = true;
+ return elided;
+ }
+
+ return NULL;
+}
+
+/*
+ * If *plan is an Agg or Unique node, we want to descend through it, unless
+ * it has a corresponding elided node. If its immediate child is a Sort or
+ * IncrementalSort, we also want to descend through that, unless it has a
+ * corresponding elided node.
+ *
+ * On entry, *elided_node must be the last of any elided nodes corresponding
+ * to *plan; on exit, this will still be true, but *plan may have been updated.
+ *
+ * The reason we don't want to descend through elided nodes is that a single
+ * join tree can't cross through any sort of elided node: subqueries are
+ * planned separately, and planning inside an Append or MergeAppend is
+ * separate from planning outside of it.
+ *
+ * The return value is true if we descend through a node that we believe is
+ * making one side of a semijoin unique, and otherwise false.
+ */
+static bool
+pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+ ElidedNode **elided_node)
+{
+ bool descend = false;
+ bool sjunique = false;
+
+ if (*elided_node != NULL)
+ return sjunique;
+
+ if (IsA(*plan, Unique))
+ {
+ descend = true;
+ sjunique = true;
+ }
+ else if (IsA(*plan, Agg))
+ {
+ /*
+ * If this is a simple Agg node, then assume it's here to implement
+ * semijoin uniqueness. Otherwise, assume it's completing an eager
+ * aggregation or partitionwise aggregation operation that began at a
+ * higher level of the plan tree.
+ *
+ * (Note that when we're using an Agg node for uniqueness, there's no
+ * need for any case other than AGGSPLIT_SIMPLE, because there's no
+ * aggregated column being * computed. However, the fact that
+ * AGGSPLIT_SIMPLE is in use doesn't prove that this Agg is here for
+ * the semijoin uniqueness. Maybe we should adjust an Agg node to
+ * carry a "purpose" field so that code like this can be more certain
+ * of its analysis.)
+ */
+ descend = true;
+ sjunique = (((Agg *) *plan)->aggsplit == AGGSPLIT_SIMPLE);
+ }
+
+ if (descend)
+ {
+ *elided_node = pgpa_descend_node(pstmt, plan);
+
+ if (*elided_node == NULL && is_sorting_plan(*plan))
+ *elided_node = pgpa_descend_node(pstmt, plan);
+ }
+
+ return sjunique;
+}
+
+/*
+ * Is this a Result node that has a child?
+ */
+static bool
+is_result_node_with_child(Plan *plan)
+{
+ return IsA(plan, Result) && plan->lefttree != NULL;
+}
+
+/*
+ * Is this a Plan node whose purpose is put the data in a certain order?
+ */
+static bool
+is_sorting_plan(Plan *plan)
+{
+ return IsA(plan, Sort) || IsA(plan, IncrementalSort);
+}
diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h
new file mode 100644
index 00000000000..4dc72986a70
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.h
@@ -0,0 +1,105 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.h
+ * analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_join.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_JOIN_H
+#define PGPA_JOIN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+typedef struct pgpa_join_unroller pgpa_join_unroller;
+typedef struct pgpa_unrolled_join pgpa_unrolled_join;
+
+/*
+ * Although there are three main join strategies, we try to classify things
+ * more precisely here: merge joins have the option of using materialization
+ * on the inner side, and nested loops can use either materialization or
+ * memoization.
+ */
+typedef enum
+{
+ JSTRAT_MERGE_JOIN_PLAIN = 0,
+ JSTRAT_MERGE_JOIN_MATERIALIZE,
+ JSTRAT_NESTED_LOOP_PLAIN,
+ JSTRAT_NESTED_LOOP_MATERIALIZE,
+ JSTRAT_NESTED_LOOP_MEMOIZE,
+ JSTRAT_HASH_JOIN
+ /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */
+} pgpa_join_strategy;
+
+#define NUM_PGPA_JOIN_STRATEGY ((int) JSTRAT_HASH_JOIN + 1)
+
+/*
+ * In an outer-deep join tree, every member of an unrolled join will be a scan,
+ * but join trees with other shapes can contain unrolled joins.
+ *
+ * The plan node we store here will be the inner or outer child of the join
+ * node, as appropriate, except that we look through subnodes that we regard as
+ * part of the join method itself. For instance, for a Nested Loop that
+ * materializes the inner input, we'll store the child of the Materialize node,
+ * not the Materialize node itself.
+ *
+ * If setrefs processing elided one or more nodes from the plan tree, then
+ * we'll store details about the topmost of those in elided_node; otherwise,
+ * it will be NULL.
+ *
+ * Exactly one of scan and unrolled_join will be non-NULL.
+ */
+typedef struct
+{
+ Plan *plan;
+ ElidedNode *elided_node;
+ struct pgpa_scan *scan;
+ pgpa_unrolled_join *unrolled_join;
+} pgpa_join_member;
+
+/*
+ * We convert outer-deep join trees to a flat structure; that is, ((A JOIN B)
+ * JOIN C) JOIN D gets converted to outer = A, inner = . When joins
+ * aren't outer-deep, substructure is required, e.g. (A JOIN B) JOIN (C JOIN D)
+ * is represented as outer = A, inner = , where X is a pgpa_unrolled_join
+ * covering C-D.
+ */
+struct pgpa_unrolled_join
+{
+ /* Outermost member; must not itself be an unrolled join. */
+ pgpa_join_member outer;
+
+ /* Number of inner members. Length of the strategy and inner arrays. */
+ unsigned ninner;
+
+ /* Array of strategies, one per non-outermost member. */
+ pgpa_join_strategy *strategy;
+
+ /* Array of members, excluding the outermost. Deepest first. */
+ pgpa_join_member *inner;
+};
+
+/*
+ * Does this plan node inherit from Join?
+ */
+static inline bool
+pgpa_is_join(Plan *plan)
+{
+ return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin);
+}
+
+extern pgpa_join_unroller *pgpa_create_join_unroller(void);
+extern void pgpa_unroll_join(pgpa_plan_walker_context *walker,
+ Plan *plan, bool beneath_any_gather,
+ pgpa_join_unroller *join_unroller,
+ pgpa_join_unroller **outer_join_unroller,
+ pgpa_join_unroller **inner_join_unroller);
+extern pgpa_unrolled_join *pgpa_build_unrolled_join(pgpa_plan_walker_context *walker,
+ pgpa_join_unroller *join_unroller);
+extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c
new file mode 100644
index 00000000000..67647acdf5a
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.c
@@ -0,0 +1,571 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.c
+ * produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_output.h"
+#include "pgpa_scan.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+/*
+ * Context object for textual advice generation.
+ *
+ * rt_identifiers is the caller-provided array of range table identifiers.
+ * See the comments at the top of pgpa_identifier.c for more details.
+ *
+ * buf is the caller-provided output buffer.
+ *
+ * wrap_column is the wrap column, so that we don't create output that is
+ * too wide. See pgpa_maybe_linebreak() and comments in pgpa_output_advice.
+ */
+typedef struct pgpa_output_context
+{
+ const char **rid_strings;
+ StringInfo buf;
+ int wrap_column;
+} pgpa_output_context;
+
+static void pgpa_output_unrolled_join(pgpa_output_context *context,
+ pgpa_unrolled_join *join);
+static void pgpa_output_join_member(pgpa_output_context *context,
+ pgpa_join_member *member);
+static void pgpa_output_scan_strategy(pgpa_output_context *context,
+ pgpa_scan_strategy strategy,
+ List *scans);
+static void pgpa_output_relation_name(pgpa_output_context *context, Oid relid);
+static void pgpa_output_query_feature(pgpa_output_context *context,
+ pgpa_qf_type type,
+ List *query_features);
+static void pgpa_output_simple_strategy(pgpa_output_context *context,
+ char *strategy,
+ List *relid_sets);
+static void pgpa_output_no_gather(pgpa_output_context *context,
+ Bitmapset *relids);
+static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+ Bitmapset *relids);
+
+static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy);
+static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy);
+static char *pgpa_cstring_query_feature_type(pgpa_qf_type type);
+
+static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column);
+
+/*
+ * Append query advice to the provided buffer.
+ *
+ * Before calling this function, 'walker' must be used to iterate over the
+ * main plan tree and all subplans from the PlannedStmt.
+ *
+ * 'rt_identifiers' is a table of unique identifiers, one for each RTI.
+ * See pgpa_create_identifiers_for_planned_stmt().
+ *
+ * Results will be appended to 'buf'.
+ */
+void
+pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker,
+ pgpa_identifier *rt_identifiers)
+{
+ Index rtable_length = list_length(walker->pstmt->rtable);
+ ListCell *lc;
+ pgpa_output_context context;
+
+ /* Basic initialization. */
+ memset(&context, 0, sizeof(pgpa_output_context));
+ context.buf = buf;
+
+ /*
+ * Convert identifiers to string form. Note that the loop variable here is
+ * not an RTI, because RTIs are 1-based. Some RTIs will have no
+ * identifier, either because the reloptkind is RTE_JOIN or because that
+ * portion of the query didn't make it into the final plan.
+ */
+ context.rid_strings = palloc0_array(const char *, rtable_length);
+ for (int i = 0; i < rtable_length; ++i)
+ if (rt_identifiers[i].alias_name != NULL)
+ context.rid_strings[i] = pgpa_identifier_string(&rt_identifiers[i]);
+
+ /*
+ * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window
+ * from a psql client with default settings, psql will add one space to
+ * the left of the output and EXPLAIN will add two more to the left of the
+ * advice. Thus, lines of more than 77 characters will wrap. We set the
+ * wrap limit to 76 here so that the output won't reach all the way to the
+ * very last column of the terminal.
+ *
+ * Of course, this is fairly arbitrary set of assumptions, and one could
+ * well make an argument for a different wrap limit, or for a configurable
+ * one.
+ */
+ context.wrap_column = 76;
+
+ /*
+ * Each piece of JOIN_ORDER() advice fully describes the join order for a
+ * a single unrolled join. Merging is not permitted, because that would
+ * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential
+ * scans should be used for all of those relations, and is thus equivalent
+ * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a"
+ * is the driving table which is then joined to "b" then "c" then "d",
+ * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d).
+ */
+ foreach(lc, walker->toplevel_unrolled_joins)
+ {
+ pgpa_unrolled_join *ujoin = lfirst(lc);
+
+ if (buf->len > 0)
+ appendStringInfoChar(buf, '\n');
+ appendStringInfo(context.buf, "JOIN_ORDER(");
+ pgpa_output_unrolled_join(&context, ujoin);
+ appendStringInfoChar(context.buf, ')');
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ }
+
+ /* Emit join strategy advice. */
+ for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s)
+ {
+ char *strategy = pgpa_cstring_join_strategy(s);
+
+ pgpa_output_simple_strategy(&context,
+ strategy,
+ walker->join_strategies[s]);
+ }
+
+ /*
+ * Emit scan strategy advice (but not for ordinary scans, which are
+ * definitionally uninteresting).
+ */
+ for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c)
+ if (c != PGPA_SCAN_ORDINARY)
+ pgpa_output_scan_strategy(&context, c, walker->scans[c]);
+
+ /* Emit query feature advice. */
+ for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t)
+ pgpa_output_query_feature(&context, t, walker->query_features[t]);
+
+ /* Emit NO_GATHER advice. */
+ pgpa_output_no_gather(&context, walker->no_gather_scans);
+}
+
+/*
+ * Output the members of an unrolled join, first the outermost member, and
+ * then the inner members one by one, as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_unrolled_join(pgpa_output_context *context,
+ pgpa_unrolled_join *join)
+{
+ pgpa_output_join_member(context, &join->outer);
+
+ for (int k = 0; k < join->ninner; ++k)
+ {
+ pgpa_join_member *member = &join->inner[k];
+
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ pgpa_output_join_member(context, member);
+ }
+}
+
+/*
+ * Output a single member of an unrolled join as part of JOIN_ORDER() advice.
+ */
+static void
+pgpa_output_join_member(pgpa_output_context *context,
+ pgpa_join_member *member)
+{
+ if (member->unrolled_join != NULL)
+ {
+ appendStringInfoChar(context->buf, '(');
+ pgpa_output_unrolled_join(context, member->unrolled_join);
+ appendStringInfoChar(context->buf, ')');
+ }
+ else
+ {
+ pgpa_scan *scan = member->scan;
+
+ Assert(scan != NULL);
+ if (bms_membership(scan->relids) == BMS_SINGLETON)
+ pgpa_output_relations(context, context->buf, scan->relids);
+ else
+ {
+ appendStringInfoChar(context->buf, '{');
+ pgpa_output_relations(context, context->buf, scan->relids);
+ appendStringInfoChar(context->buf, '}');
+ }
+ }
+}
+
+/*
+ * Output advice for a List of pgpa_scan objects.
+ *
+ * All the scans must use the strategy specified by the "strategy" argument.
+ */
+static void
+pgpa_output_scan_strategy(pgpa_output_context *context,
+ pgpa_scan_strategy strategy,
+ List *scans)
+{
+ bool first = true;
+
+ if (scans == NIL)
+ return;
+
+ if (context->buf->len > 0)
+ appendStringInfoChar(context->buf, '\n');
+ appendStringInfo(context->buf, "%s(",
+ pgpa_cstring_scan_strategy(strategy));
+
+ foreach_ptr(pgpa_scan, scan, scans)
+ {
+ Plan *plan = scan->plan;
+
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ }
+
+ /* Output the relation identifiers. */
+ if (bms_membership(scan->relids) == BMS_SINGLETON)
+ pgpa_output_relations(context, context->buf, scan->relids);
+ else
+ {
+ appendStringInfoChar(context->buf, '(');
+ pgpa_output_relations(context, context->buf, scan->relids);
+ appendStringInfoChar(context->buf, ')');
+ }
+
+ /* For index or index-only scans, output index information. */
+ if (strategy == PGPA_SCAN_INDEX)
+ {
+ Assert(IsA(plan, IndexScan));
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ pgpa_output_relation_name(context, ((IndexScan *) plan)->indexid);
+ }
+ else if (strategy == PGPA_SCAN_INDEX_ONLY)
+ {
+ Assert(IsA(plan, IndexOnlyScan));
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ pgpa_output_relation_name(context,
+ ((IndexOnlyScan *) plan)->indexid);
+ }
+ }
+
+ appendStringInfoChar(context->buf, ')');
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output a schema-qualified relation name.
+ */
+static void
+pgpa_output_relation_name(pgpa_output_context *context, Oid relid)
+{
+ Oid nspoid = get_rel_namespace(relid);
+ char *relnamespace = get_namespace_name_or_temp(nspoid);
+ char *relname = get_rel_name(relid);
+
+ appendStringInfoString(context->buf, quote_identifier(relnamespace));
+ appendStringInfoChar(context->buf, '.');
+ appendStringInfoString(context->buf, quote_identifier(relname));
+}
+
+/*
+ * Output advice for a List of pgpa_query_feature objects.
+ *
+ * All features must be of the type specified by the "type" argument.
+ */
+static void
+pgpa_output_query_feature(pgpa_output_context *context, pgpa_qf_type type,
+ List *query_features)
+{
+ bool first = true;
+
+ if (query_features == NIL)
+ return;
+
+ if (context->buf->len > 0)
+ appendStringInfoChar(context->buf, '\n');
+ appendStringInfo(context->buf, "%s(",
+ pgpa_cstring_query_feature_type(type));
+
+ foreach_ptr(pgpa_query_feature, qf, query_features)
+ {
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ }
+
+ if (bms_membership(qf->relids) == BMS_SINGLETON)
+ pgpa_output_relations(context, context->buf, qf->relids);
+ else
+ {
+ appendStringInfoChar(context->buf, '(');
+ pgpa_output_relations(context, context->buf, qf->relids);
+ appendStringInfoChar(context->buf, ')');
+ }
+ }
+
+ appendStringInfoChar(context->buf, ')');
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output "simple" advice for a List of Bitmapset objects each of which
+ * contains one or more RTIs.
+ *
+ * By simple, we just mean that the advice emitted follows the most
+ * straightforward pattern: the strategy name, followed by a list of items
+ * separated by spaces and surrounded by parentheses. Individual items in
+ * the list are a single relation identifier for a Bitmapset that contains
+ * just one member, or a sub-list again separated by spaces and surrounded
+ * by parentheses for a Bitmapset with multiple members. Bitmapsets with
+ * no members probably shouldn't occur here, but if they do they'll be
+ * rendered as an empty sub-list.
+ */
+static void
+pgpa_output_simple_strategy(pgpa_output_context *context, char *strategy,
+ List *relid_sets)
+{
+ bool first = true;
+
+ if (relid_sets == NIL)
+ return;
+
+ if (context->buf->len > 0)
+ appendStringInfoChar(context->buf, '\n');
+ appendStringInfo(context->buf, "%s(", strategy);
+
+ foreach_node(Bitmapset, relids, relid_sets)
+ {
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ }
+
+ if (bms_membership(relids) == BMS_SINGLETON)
+ pgpa_output_relations(context, context->buf, relids);
+ else
+ {
+ appendStringInfoChar(context->buf, '(');
+ pgpa_output_relations(context, context->buf, relids);
+ appendStringInfoChar(context->buf, ')');
+ }
+ }
+
+ appendStringInfoChar(context->buf, ')');
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+}
+
+/*
+ * Output NO_GATHER advice for all relations not appearing beneath any
+ * Gather or Gather Merge node.
+ */
+static void
+pgpa_output_no_gather(pgpa_output_context *context, Bitmapset *relids)
+{
+ if (relids == NULL)
+ return;
+ if (context->buf->len > 0)
+ appendStringInfoChar(context->buf, '\n');
+ appendStringInfoString(context->buf, "NO_GATHER(");
+ pgpa_output_relations(context, context->buf, relids);
+ appendStringInfoChar(context->buf, ')');
+}
+
+/*
+ * Output the identifiers for each RTI in the provided set.
+ *
+ * Identifiers are separated by spaces, and a line break is possible after
+ * each one.
+ */
+static void
+pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+ Bitmapset *relids)
+{
+ int rti = -1;
+ bool first = true;
+
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ const char *rid_string = context->rid_strings[rti - 1];
+
+ if (rid_string == NULL)
+ elog(ERROR, "no identifier for RTI %d", rti);
+
+ if (first)
+ {
+ first = false;
+ appendStringInfoString(buf, rid_string);
+ }
+ else
+ {
+ pgpa_maybe_linebreak(buf, context->wrap_column);
+ appendStringInfo(buf, " %s", rid_string);
+ }
+ }
+}
+
+/*
+ * Get a C string that corresponds to the specified join strategy.
+ */
+static char *
+pgpa_cstring_join_strategy(pgpa_join_strategy strategy)
+{
+ switch (strategy)
+ {
+ case JSTRAT_MERGE_JOIN_PLAIN:
+ return "MERGE_JOIN_PLAIN";
+ case JSTRAT_MERGE_JOIN_MATERIALIZE:
+ return "MERGE_JOIN_MATERIALIZE";
+ case JSTRAT_NESTED_LOOP_PLAIN:
+ return "NESTED_LOOP_PLAIN";
+ case JSTRAT_NESTED_LOOP_MATERIALIZE:
+ return "NESTED_LOOP_MATERIALIZE";
+ case JSTRAT_NESTED_LOOP_MEMOIZE:
+ return "NESTED_LOOP_MEMOIZE";
+ case JSTRAT_HASH_JOIN:
+ return "HASH_JOIN";
+ }
+
+ pg_unreachable();
+ return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy)
+{
+ switch (strategy)
+ {
+ case PGPA_SCAN_ORDINARY:
+ return "ORDINARY_SCAN";
+ case PGPA_SCAN_SEQ:
+ return "SEQ_SCAN";
+ case PGPA_SCAN_BITMAP_HEAP:
+ return "BITMAP_HEAP_SCAN";
+ case PGPA_SCAN_FOREIGN:
+ return "FOREIGN_JOIN";
+ case PGPA_SCAN_INDEX:
+ return "INDEX_SCAN";
+ case PGPA_SCAN_INDEX_ONLY:
+ return "INDEX_ONLY_SCAN";
+ case PGPA_SCAN_PARTITIONWISE:
+ return "PARTITIONWISE";
+ case PGPA_SCAN_TID:
+ return "TID_SCAN";
+ }
+
+ pg_unreachable();
+ return NULL;
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_query_feature_type(pgpa_qf_type type)
+{
+ switch (type)
+ {
+ case PGPAQF_GATHER:
+ return "GATHER";
+ case PGPAQF_GATHER_MERGE:
+ return "GATHER_MERGE";
+ case PGPAQF_SEMIJOIN_NON_UNIQUE:
+ return "SEMIJOIN_NON_UNIQUE";
+ case PGPAQF_SEMIJOIN_UNIQUE:
+ return "SEMIJOIN_UNIQUE";
+ }
+
+
+ pg_unreachable();
+ return NULL;
+}
+
+/*
+ * Insert a line break into the StringInfoData, if needed.
+ *
+ * If wrap_column is zero or negative, this does nothing. Otherwise, we
+ * consider inserting a newline. We only insert a newline if the length of
+ * the last line in the buffer exceeds wrap_column, and not if we'd be
+ * inserting a newline at or before the beginning of the current line.
+ *
+ * The position at which the newline is inserted is simply wherever the
+ * buffer ended the last time this function was called. In other words,
+ * the caller is expected to call this function every time we reach a good
+ * place for a line break.
+ */
+static void
+pgpa_maybe_linebreak(StringInfo buf, int wrap_column)
+{
+ char *trailing_nl;
+ int line_start;
+ int save_cursor;
+
+ /* If line wrapping is disabled, exit quickly. */
+ if (wrap_column <= 0)
+ return;
+
+ /*
+ * Set line_start to the byte offset within buf->data of the first
+ * character of the current line, where the current line means the last
+ * one in the buffer. Note that line_start could be the offset of the
+ * trailing '\0' if the last character in the buffer is a line break.
+ */
+ trailing_nl = strrchr(buf->data, '\n');
+ if (trailing_nl == NULL)
+ line_start = 0;
+ else
+ line_start = (trailing_nl - buf->data) + 1;
+
+ /*
+ * Remember that the current end of the buffer is a potential location to
+ * insert a line break on a future call to this function.
+ */
+ save_cursor = buf->cursor;
+ buf->cursor = buf->len;
+
+ /* If we haven't passed the wrap column, we don't need a newline. */
+ if (buf->len - line_start <= wrap_column)
+ return;
+
+ /*
+ * It only makes sense to insert a newline at a position later than the
+ * beginning of the current line.
+ */
+ if (buf->cursor <= line_start)
+ return;
+
+ /* Insert a newline at the previous cursor location. */
+ enlargeStringInfo(buf, 1);
+ memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor],
+ buf->len - save_cursor);
+ ++buf->cursor;
+ buf->data[++buf->len] = '\0';
+ buf->data[save_cursor] = '\n';
+}
diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h
new file mode 100644
index 00000000000..47496d76f52
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.h
@@ -0,0 +1,22 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.h
+ * produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_OUTPUT_H
+#define PGPA_OUTPUT_H
+
+#include "pgpa_identifier.h"
+#include "pgpa_walker.h"
+
+extern void pgpa_output_advice(StringInfo buf,
+ pgpa_plan_walker_context *walker,
+ pgpa_identifier *rt_identifiers);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_parser.y b/contrib/pg_plan_advice/pgpa_parser.y
new file mode 100644
index 00000000000..4c3a3ed6db9
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_parser.y
@@ -0,0 +1,301 @@
+%{
+/*
+ * Parser for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_parser.y
+ */
+
+#include "postgres.h"
+
+#include
+#include
+
+#include "fmgr.h"
+#include "nodes/miscnodes.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc. This prevents
+ * memory leaks if we error out during parsing.
+ */
+#define YYMALLOC palloc
+#define YYFREE pfree
+%}
+
+/* BISON Declarations */
+%parse-param {List **result}
+%parse-param {char **parse_error_msg_p}
+%parse-param {yyscan_t yyscanner}
+%lex-param {List **result}
+%lex-param {char **parse_error_msg_p}
+%lex-param {yyscan_t yyscanner}
+%pure-parser
+%expect 0
+%name-prefix="pgpa_yy"
+
+%union
+{
+ char *str;
+ int integer;
+ List *list;
+ pgpa_advice_item *item;
+ pgpa_advice_target *target;
+ pgpa_index_target *itarget;
+}
+%token TOK_IDENT TOK_TAG_JOIN_ORDER TOK_TAG_INDEX
+%token TOK_TAG_SIMPLE TOK_TAG_GENERIC
+%token TOK_INTEGER
+
+%type opt_ri_occurrence
+%type advice_item
+%type advice_item_list generic_target_list
+%type index_target_list join_order_target_list
+%type opt_partition simple_target_list
+%type identifier opt_plan_name
+%type generic_sublist join_order_sublist
+%type relation_identifier
+%type index_name
+
+%start parse_toplevel
+
+/* Grammar follows */
+%%
+
+parse_toplevel: advice_item_list
+ {
+ (void) yynerrs; /* suppress compiler warning */
+ *result = $1;
+ }
+ ;
+
+advice_item_list: advice_item_list advice_item
+ { $$ = lappend($1, $2); }
+ |
+ { $$ = NIL; }
+ ;
+
+advice_item: TOK_TAG_JOIN_ORDER '(' join_order_target_list ')'
+ {
+ $$ = palloc0_object(pgpa_advice_item);
+ $$->tag = PGPA_TAG_JOIN_ORDER;
+ $$->targets = $3;
+ if ($3 == NIL)
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "JOIN_ORDER must have at least one target");
+ }
+ | TOK_TAG_INDEX '(' index_target_list ')'
+ {
+ $$ = palloc0_object(pgpa_advice_item);
+ if (strcmp($1, "index_only_scan") == 0)
+ $$->tag = PGPA_TAG_INDEX_ONLY_SCAN;
+ else if (strcmp($1, "index_scan") == 0)
+ $$->tag = PGPA_TAG_INDEX_SCAN;
+ else
+ elog(ERROR, "tag parsing failed: %s", $1);
+ $$->targets = $3;
+ }
+ | TOK_TAG_SIMPLE '(' simple_target_list ')'
+ {
+ $$ = palloc0_object(pgpa_advice_item);
+ if (strcmp($1, "bitmap_heap_scan") == 0)
+ $$->tag = PGPA_TAG_BITMAP_HEAP_SCAN;
+ else if (strcmp($1, "no_gather") == 0)
+ $$->tag = PGPA_TAG_NO_GATHER;
+ else if (strcmp($1, "seq_scan") == 0)
+ $$->tag = PGPA_TAG_SEQ_SCAN;
+ else if (strcmp($1, "tid_scan") == 0)
+ $$->tag = PGPA_TAG_TID_SCAN;
+ else
+ elog(ERROR, "tag parsing failed: %s", $1);
+ $$->targets = $3;
+ }
+ | TOK_TAG_GENERIC '(' generic_target_list ')'
+ {
+ bool fail;
+
+ $$ = palloc0_object(pgpa_advice_item);
+ $$->tag = pgpa_parse_advice_tag($1, &fail);
+ if (fail)
+ {
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "unrecognized advice tag");
+ }
+
+ if ($$->tag == PGPA_TAG_FOREIGN_JOIN)
+ {
+ foreach_ptr(pgpa_advice_target, target, $3)
+ {
+ if (target->ttype == PGPA_TARGET_IDENTIFIER ||
+ list_length(target->children) == 1)
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "FOREIGN_JOIN targets must contain more than one relation identifier");
+ }
+ }
+
+ $$->targets = $3;
+ }
+ ;
+
+relation_identifier: identifier opt_ri_occurrence opt_partition opt_plan_name
+ {
+ $$ = palloc0_object(pgpa_advice_target);
+ $$->ttype = PGPA_TARGET_IDENTIFIER;
+ $$->rid.alias_name = $1;
+ $$->rid.occurrence = $2;
+ if (list_length($3) == 2)
+ {
+ $$->rid.partnsp = linitial($3);
+ $$->rid.partrel = lsecond($3);
+ }
+ else if ($3 != NIL)
+ $$->rid.partrel = linitial($3);
+ $$->rid.plan_name = $4;
+ }
+ ;
+
+index_name: identifier
+ {
+ $$ = palloc0_object(pgpa_index_target);
+ $$->indname = $1;
+ }
+ | identifier '.' identifier
+ {
+ $$ = palloc0_object(pgpa_index_target);
+ $$->indnamespace = $1;
+ $$->indname = $3;
+ }
+ ;
+
+opt_ri_occurrence:
+ '#' TOK_INTEGER
+ {
+ if ($2 <= 0)
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "only positive occurrence numbers are permitted");
+ $$ = $2;
+ }
+ |
+ {
+ /* The default occurrence number is 1. */
+ $$ = 1;
+ }
+ ;
+
+identifier: TOK_IDENT
+ | TOK_TAG_JOIN_ORDER
+ | TOK_TAG_INDEX
+ | TOK_TAG_SIMPLE
+ | TOK_TAG_GENERIC
+ ;
+
+/*
+ * When generating advice, we always schema-qualify the partition name, but
+ * when parsing advice, we accept a specification that lacks one.
+ */
+opt_partition:
+ '/' TOK_IDENT '.' TOK_IDENT
+ { $$ = list_make2($2, $4); }
+ | '/' TOK_IDENT
+ { $$ = list_make1($2); }
+ |
+ { $$ = NIL; }
+ ;
+
+opt_plan_name:
+ '@' TOK_IDENT
+ { $$ = $2; }
+ |
+ { $$ = NULL; }
+ ;
+
+generic_target_list: generic_target_list relation_identifier
+ { $$ = lappend($1, $2); }
+ | generic_target_list generic_sublist
+ { $$ = lappend($1, $2); }
+ |
+ { $$ = NIL; }
+ ;
+
+generic_sublist: '(' simple_target_list ')'
+ {
+ $$ = palloc0_object(pgpa_advice_target);
+ $$->ttype = PGPA_TARGET_ORDERED_LIST;
+ $$->children = $2;
+ }
+ ;
+
+index_target_list:
+ index_target_list relation_identifier index_name
+ {
+ $2->itarget = $3;
+ $$ = lappend($1, $2);
+ }
+ |
+ { $$ = NIL; }
+ ;
+
+join_order_target_list: join_order_target_list relation_identifier
+ { $$ = lappend($1, $2); }
+ | join_order_target_list join_order_sublist
+ { $$ = lappend($1, $2); }
+ |
+ { $$ = NIL; }
+ ;
+
+join_order_sublist:
+ '(' join_order_target_list ')'
+ {
+ $$ = palloc0_object(pgpa_advice_target);
+ $$->ttype = PGPA_TARGET_ORDERED_LIST;
+ $$->children = $2;
+ }
+ | '{' simple_target_list '}'
+ {
+ $$ = palloc0_object(pgpa_advice_target);
+ $$->ttype = PGPA_TARGET_UNORDERED_LIST;
+ $$->children = $2;
+ }
+ ;
+
+simple_target_list: simple_target_list relation_identifier
+ { $$ = lappend($1, $2); }
+ |
+ { $$ = NIL; }
+ ;
+
+%%
+
+/*
+ * Parse an advice_string and return the resulting list of pgpa_advice_item
+ * objects. If a parse error occurs, instead return NULL.
+ *
+ * If the return value is NULL, *error_p will be set to the error message;
+ * otherwise, *error_p will be set to NULL.
+ */
+List *
+pgpa_parse(const char *advice_string, char **error_p)
+{
+ yyscan_t scanner;
+ List *result;
+ char *error = NULL;
+
+ pgpa_scanner_init(advice_string, &scanner);
+ pgpa_yyparse(&result, &error, scanner);
+ pgpa_scanner_finish(scanner);
+
+ if (error != NULL)
+ {
+ *error_p = error;
+ return NULL;
+ }
+
+ *error_p = NULL;
+ return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.c b/contrib/pg_plan_advice/pgpa_planner.c
new file mode 100644
index 00000000000..c77d68dc145
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_planner.c
@@ -0,0 +1,2166 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.c
+ * Use planner hooks to observe and modifiy planner behavior
+ *
+ * All interaction with the core planner happens here. Much of it has to
+ * do with enforcing supplied advice, but we also need these hooks to
+ * generate advice strings (though the heavy lifting in that case is
+ * mostly done by pgpa_walker.c).
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_planner.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_planner.h"
+#include "pgpa_trove.h"
+#include "pgpa_walker.h"
+
+#include "commands/defrem.h"
+#include "common/hashfn_unstable.h"
+#include "nodes/makefuncs.h"
+#include "optimizer/extendplan.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/plancat.h"
+#include "optimizer/planner.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * When assertions are enabled, we try generating relation identifiers during
+ * planning, saving them in a hash table, and then cross-checking them against
+ * the ones generated after planning is complete.
+ */
+typedef struct pgpa_ri_checker_key
+{
+ char *plan_name;
+ Index rti;
+} pgpa_ri_checker_key;
+
+typedef struct pgpa_ri_checker
+{
+ pgpa_ri_checker_key key;
+ uint32 status;
+ const char *rid_string;
+} pgpa_ri_checker;
+
+static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key);
+
+static inline bool
+pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b)
+{
+ if (a.rti != b.rti)
+ return false;
+ if (a.plan_name == NULL)
+ return (b.plan_name == NULL);
+ if (b.plan_name == NULL)
+ return false;
+ return strcmp(a.plan_name, b.plan_name) == 0;
+}
+
+#define SH_PREFIX pgpa_ri_check
+#define SH_ELEMENT_TYPE pgpa_ri_checker
+#define SH_KEY_TYPE pgpa_ri_checker_key
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) pgpa_ri_checker_hash_key(key)
+#define SH_EQUAL(tb, a, b) pgpa_ri_checker_compare_key(a, b)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+#endif
+
+typedef enum pgpa_jo_outcome
+{
+ PGPA_JO_PERMITTED, /* permit this join order */
+ PGPA_JO_DENIED, /* deny this join order */
+ PGPA_JO_INDIFFERENT /* do neither */
+} pgpa_jo_outcome;
+
+typedef struct pgpa_planner_state
+{
+ ExplainState *explain_state;
+ bool generate_advice_feedback;
+ bool generate_advice_string;
+ pgpa_trove *trove;
+ MemoryContext trove_cxt;
+ List *sj_unique_rels;
+
+#ifdef USE_ASSERT_CHECKING
+ pgpa_ri_check_hash *ri_check_hash;
+#endif
+} pgpa_planner_state;
+
+typedef struct pgpa_join_state
+{
+ /* Most-recently-considered outer rel. */
+ RelOptInfo *outerrel;
+
+ /* Most-recently-considered inner rel. */
+ RelOptInfo *innerrel;
+
+ /*
+ * Array of relation identifiers for all members of this joinrel, with
+ * outerrel idenifiers before innerrel identifiers.
+ */
+ pgpa_identifier *rids;
+
+ /* Number of outer rel identifiers. */
+ int outer_count;
+
+ /* Number of inner rel identifiers. */
+ int inner_count;
+
+ /*
+ * Trove lookup results.
+ *
+ * join_entries and rel_entries are arrays of entries, and join_indexes
+ * and rel_indexes are the integer offsets within those arrays of entries
+ * potentially relevant to us. The "join" fields correspond to a lookup
+ * using PGPA_TROVE_LOOKUP_JOIN and the "rel" fields to a lookup using
+ * PGPA_TROVE_LOOKUP_REL.
+ */
+ pgpa_trove_entry *join_entries;
+ Bitmapset *join_indexes;
+ pgpa_trove_entry *rel_entries;
+ Bitmapset *rel_indexes;
+} pgpa_join_state;
+
+/* Saved hook values */
+static build_simple_rel_hook_type prev_build_simple_rel = NULL;
+static join_path_setup_hook_type prev_join_path_setup = NULL;
+static joinrel_setup_hook_type prev_joinrel_setup = NULL;
+static planner_setup_hook_type prev_planner_setup = NULL;
+static planner_shutdown_hook_type prev_planner_shutdown = NULL;
+
+/* Other global variabes */
+static int planner_extension_id = -1;
+
+/* Function prototypes. */
+static void pgpa_planner_setup(PlannerGlobal *glob, Query *parse,
+ const char *query_string,
+ int cursorOptions,
+ double *tuple_fraction,
+ ExplainState *es);
+static void pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+ const char *query_string, PlannedStmt *pstmt);
+static void pgpa_build_simple_rel(PlannerInfo *root,
+ RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void pgpa_joinrel_setup(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ SpecialJoinInfo *sjinfo,
+ List *restrictlist);
+static void pgpa_join_path_setup(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
+static pgpa_join_state *pgpa_get_join_state(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel);
+static void pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p,
+ char *plan_name,
+ pgpa_join_state *pjs);
+static void pgpa_planner_apply_join_path_advice(JoinType jointype,
+ uint64 *pgs_mask_p,
+ char *plan_name,
+ pgpa_join_state *pjs);
+static void pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+ pgpa_trove_entry *scan_entries,
+ Bitmapset *scan_indexes,
+ pgpa_trove_entry *rel_entries,
+ Bitmapset *rel_indexes);
+static uint64 pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag);
+static pgpa_jo_outcome pgpa_join_order_permits_join(int outer_count,
+ int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry);
+static bool pgpa_join_method_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool *restrict_method);
+static bool pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool *restrict_method);
+static bool pgpa_semijoin_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool outer_side_nullable,
+ bool *restrict_method);
+
+static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+ pgpa_trove_lookup_type type,
+ pgpa_identifier *rt_identifiers,
+ pgpa_plan_walker_context *walker);
+static void pgpa_planner_feedback_warning(List *feedback);
+
+static inline void pgpa_ri_checker_save(pgpa_planner_state *pps,
+ PlannerInfo *root,
+ RelOptInfo *rel);
+static void pgpa_ri_checker_validate(pgpa_planner_state *pps,
+ PlannedStmt *pstmt);
+
+static char *pgpa_bms_to_cstring(Bitmapset *bms);
+static const char *pgpa_jointype_to_cstring(JoinType jointype);
+
+/*
+ * Install planner-related hooks.
+ */
+void
+pgpa_planner_install_hooks(void)
+{
+ planner_extension_id = GetPlannerExtensionId("pg_plan_advice");
+ prev_planner_setup = planner_setup_hook;
+ planner_setup_hook = pgpa_planner_setup;
+ prev_planner_shutdown = planner_shutdown_hook;
+ planner_shutdown_hook = pgpa_planner_shutdown;
+ prev_build_simple_rel = build_simple_rel_hook;
+ build_simple_rel_hook = pgpa_build_simple_rel;
+ prev_joinrel_setup = joinrel_setup_hook;
+ joinrel_setup_hook = pgpa_joinrel_setup;
+ prev_join_path_setup = join_path_setup_hook;
+ join_path_setup_hook = pgpa_join_path_setup;
+}
+
+/*
+ * Carry out whatever setup work we need to do before planning.
+ */
+static void
+pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string,
+ int cursorOptions, double *tuple_fraction,
+ ExplainState *es)
+{
+ pgpa_trove *trove = NULL;
+ pgpa_planner_state *pps;
+ char *supplied_advice;
+ bool generate_advice_feedback = false;
+ bool generate_advice_string = false;
+ bool needs_pps = false;
+
+ /*
+ * Decide whether we need to generate an advice string. We must do this if
+ * the user has told us to do it categorically, or if at least one
+ * collector is enabled, or if the user has requested it using the EXPLAIN
+ * (PLAN_ADVICE) option.
+ */
+ generate_advice_string = (pg_plan_advice_always_store_advice_details ||
+ pg_plan_advice_local_collector ||
+ pg_plan_advice_shared_collector ||
+ pg_plan_advice_should_explain(es));
+ if (generate_advice_string)
+ needs_pps = true;
+
+ /*
+ * If any advice was provided, build a trove of advice for use during
+ * planning.
+ */
+ supplied_advice = pg_plan_advice_get_supplied_query_advice(glob, parse,
+ query_string,
+ cursorOptions,
+ es);
+ if (supplied_advice != NULL && supplied_advice[0] != '\0')
+ {
+ List *advice_items;
+ char *error;
+
+ /*
+ * If the supplied advice string comes from pg_plan_advice.advice,
+ * parsing shouldn't fail here, because we must have previously parsed
+ * successfully in pg_plan_advice_advice_check_hook. However, it might
+ * also be come from a hook registered via pg_plan_advice_add_advisor,
+ * and we can't be sure whether that's valid. (Plus, having an error
+ * check of here seems like a good idea anyway, just for safety.)
+ */
+ advice_items = pgpa_parse(supplied_advice, &error);
+ if (error)
+ ereport(WARNING,
+ errmsg("could not parse supplied advice: %s", error));
+
+ /*
+ * It's possible that the advice string was non-empty but contained no
+ * actual advice, e.g. it was all whitespace.
+ */
+ if (advice_items != NIL)
+ {
+ trove = pgpa_build_trove(advice_items);
+ needs_pps = true;
+
+ /*
+ * If we know that we're running under EXPLAIN, or if the user has
+ * told us to always do the work, generate advice feedback.
+ */
+ if (es != NULL || pg_plan_advice_feedback_warnings ||
+ pg_plan_advice_always_store_advice_details)
+ generate_advice_feedback = true;
+ }
+ }
+
+#ifdef USE_ASSERT_CHECKING
+
+ /*
+ * If asserts are enabled, always build a private state object for
+ * cross-checks.
+ */
+ needs_pps = true;
+#endif
+
+ /*
+ * We only create and initialize a private state object if it's needed for
+ * some purpose. That could be (1) recording that we will need to generate
+ * an advice string, (2) storing a trove of supplied advice, or (3)
+ * facilitating debugging cross-checks when asserts are enabled.
+ */
+ if (needs_pps)
+ {
+ pps = palloc0_object(pgpa_planner_state);
+ pps->explain_state = es;
+ pps->generate_advice_feedback = generate_advice_feedback;
+ pps->generate_advice_string = generate_advice_string;
+ pps->trove = trove;
+#ifdef USE_ASSERT_CHECKING
+ pps->ri_check_hash =
+ pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL);
+#endif
+ SetPlannerGlobalExtensionState(glob, planner_extension_id, pps);
+ }
+}
+
+/*
+ * Carry out whatever work we want to do after planning is complete.
+ */
+static void
+pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
+ const char *query_string, PlannedStmt *pstmt)
+{
+ pgpa_planner_state *pps;
+ pgpa_trove *trove = NULL;
+ pgpa_plan_walker_context walker = {0}; /* placate compiler */
+ bool generate_advice_feedback = false;
+ bool generate_advice_string = false;
+ List *pgpa_items = NIL;
+ pgpa_identifier *rt_identifiers = NULL;
+
+ /* Fetch our private state, set up by pgpa_planner_setup(). */
+ pps = GetPlannerGlobalExtensionState(glob, planner_extension_id);
+ if (pps != NULL)
+ {
+ trove = pps->trove;
+ generate_advice_feedback = pps->generate_advice_feedback;
+ generate_advice_string = pps->generate_advice_string;
+ }
+
+ /*
+ * If we're trying to generate an advice string or if we're trying to
+ * provide advice feedback, then we will need to create range table
+ * identifiers.
+ */
+ if (generate_advice_string || generate_advice_feedback)
+ {
+ pgpa_plan_walker(&walker, pstmt, pps->sj_unique_rels);
+ rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+ }
+
+ /* Generate the advice string, if we need to do so. */
+ if (generate_advice_string)
+ {
+ char *advice_string;
+ StringInfoData buf;
+
+ /* Generate a textual advice string. */
+ initStringInfo(&buf);
+ pgpa_output_advice(&buf, &walker, rt_identifiers);
+ advice_string = buf.data;
+
+ /* If the advice string is empty, don't bother collecting it. */
+ if (advice_string[0] != '\0')
+ pgpa_collect_advice(pstmt->queryId, query_string, advice_string);
+
+ /* Save the advice string in the final plan. */
+ pgpa_items = lappend(pgpa_items,
+ makeDefElem("advice_string",
+ (Node *) makeString(advice_string),
+ -1));
+ }
+
+ /*
+ * If we're trying to provide advice feedback, then we will need to
+ * analyze how successful the advice was.
+ */
+ if (generate_advice_feedback)
+ {
+ List *feedback = NIL;
+
+ /*
+ * Inject a Node-tree representation of all the trove-entry flags into
+ * the PlannedStmt.
+ */
+ feedback = pgpa_planner_append_feedback(feedback,
+ trove,
+ PGPA_TROVE_LOOKUP_SCAN,
+ rt_identifiers, &walker);
+ feedback = pgpa_planner_append_feedback(feedback,
+ trove,
+ PGPA_TROVE_LOOKUP_JOIN,
+ rt_identifiers, &walker);
+ feedback = pgpa_planner_append_feedback(feedback,
+ trove,
+ PGPA_TROVE_LOOKUP_REL,
+ rt_identifiers, &walker);
+
+ pgpa_items = lappend(pgpa_items, makeDefElem("feedback",
+ (Node *) feedback, -1));
+
+ /* If we were asked to generate feedback warnings, do so. */
+ if (pg_plan_advice_feedback_warnings)
+ pgpa_planner_feedback_warning(feedback);
+ }
+
+ /* Push whatever data we're saving into the PlannedStmt. */
+ if (pgpa_items != NIL)
+ pstmt->extension_state =
+ lappend(pstmt->extension_state,
+ makeDefElem("pg_plan_advice", (Node *) pgpa_items, -1));
+
+ /*
+ * If assertions are enabled, cross-check the generated range table
+ * identifiers.
+ */
+ if (pps != NULL)
+ pgpa_ri_checker_validate(pps, pstmt);
+}
+
+/*
+ * Hook function for build_simple_rel().
+ *
+ * We can apply scan advice at this point, and we also usee this as an
+ * opportunity to do range-table identifier cross-checking in assert-enabled
+ * builds.
+ */
+static void
+pgpa_build_simple_rel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ pgpa_planner_state *pps;
+
+ /* Fetch our private state, set up by pgpa_planner_setup(). */
+ pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+
+ /* Save details needed for range table identifier cross-checking. */
+ if (pps != NULL)
+ pgpa_ri_checker_save(pps, root, rel);
+
+ /* If query advice was provided, search for relevant entries. */
+ if (pps != NULL && pps->trove != NULL)
+ {
+ pgpa_identifier rid;
+ pgpa_trove_result tresult_scan;
+ pgpa_trove_result tresult_rel;
+
+ /* Search for scan advice and general rel advice. */
+ pgpa_compute_identifier_by_rti(root, rel->relid, &rid);
+ pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_SCAN, 1, &rid,
+ &tresult_scan);
+ pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL, 1, &rid,
+ &tresult_rel);
+
+ /* If relevant entries were found, apply them. */
+ if (tresult_scan.indexes != NULL || tresult_rel.indexes != NULL)
+ {
+ uint64 original_mask = rel->pgs_mask;
+
+ pgpa_planner_apply_scan_advice(rel,
+ tresult_scan.entries,
+ tresult_scan.indexes,
+ tresult_rel.entries,
+ tresult_rel.indexes);
+
+ /* Emit debugging message, if enabled. */
+ if (pg_plan_advice_trace_mask && original_mask != rel->pgs_mask)
+ ereport(WARNING,
+ (errmsg("strategy mask for RTI %u changed from 0x%" PRIx64 " to 0x%" PRIx64,
+ rel->relid, original_mask, rel->pgs_mask)));
+ }
+ }
+
+ /* Pass call to previous hook. */
+ if (prev_build_simple_rel)
+ (*prev_build_simple_rel) (root, rel, rte);
+}
+
+/*
+ * Enforce any provided advice that is relevant to any method of implementing
+ * this join.
+ *
+ * Although we're passed the outerrel and innerrel here, those are just
+ * whatever values happened to prompt the creation of this joinrel; they
+ * shouldn't really influence our choice of what advice to apply.
+ */
+static void
+pgpa_joinrel_setup(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ SpecialJoinInfo *sjinfo, List *restrictlist)
+{
+ pgpa_join_state *pjs;
+
+ Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+ /* Get our private state information for this join. */
+ pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+ /* If there is relevant advice, call a helper function to apply it. */
+ if (pjs != NULL)
+ {
+ uint64 original_mask = joinrel->pgs_mask;
+
+ pgpa_planner_apply_joinrel_advice(&joinrel->pgs_mask,
+ root->plan_name,
+ pjs);
+
+ /* Emit debugging message, if enabled. */
+ if (pg_plan_advice_trace_mask && original_mask != joinrel->pgs_mask)
+ ereport(WARNING,
+ (errmsg("strategy mask for join on RTIs %s changed from 0x%" PRIx64 " to 0x%" PRIx64,
+ pgpa_bms_to_cstring(joinrel->relids),
+ original_mask,
+ joinrel->pgs_mask)));
+ }
+
+ /* Pass call to previous hook. */
+ if (prev_joinrel_setup)
+ (*prev_joinrel_setup) (root, joinrel, outerrel, innerrel,
+ sjinfo, restrictlist);
+}
+
+/*
+ * Enforce any provided advice that is relevant to this particular method of
+ * implementing this particular join.
+ */
+static void
+pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel,
+ JoinType jointype, JoinPathExtraData *extra)
+{
+ pgpa_join_state *pjs;
+
+ Assert(bms_membership(joinrel->relids) == BMS_MULTIPLE);
+
+ /*
+ * If we're considering implementing a semijoin by making one side unique,
+ * make a note of it in the pgpa_planner_state. See comments for
+ * pgpa_sj_unique_rel for why we do this.
+ */
+ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
+ {
+ pgpa_planner_state *pps;
+ RelOptInfo *uniquerel;
+
+ uniquerel = jointype == JOIN_UNIQUE_OUTER ? outerrel : innerrel;
+ pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+ if (pps != NULL &&
+ (pps->generate_advice_string || pps->generate_advice_feedback))
+ {
+ bool found = false;
+
+ /* Avoid adding duplicates. */
+ foreach_ptr(pgpa_sj_unique_rel, ur, pps->sj_unique_rels)
+ {
+ /*
+ * We should always use the same pointer for the same plan
+ * name, so we need not use strcmp() here.
+ */
+ if (root->plan_name == ur->plan_name &&
+ bms_equal(uniquerel->relids, ur->relids))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ /* If not a duplicate, append to the list. */
+ if (!found)
+ {
+ pgpa_sj_unique_rel *ur = palloc_object(pgpa_sj_unique_rel);
+
+ ur->plan_name = root->plan_name;
+ ur->relids = uniquerel->relids;
+ pps->sj_unique_rels = lappend(pps->sj_unique_rels, ur);
+ }
+ }
+ }
+
+ /* Get our private state information for this join. */
+ pjs = pgpa_get_join_state(root, joinrel, outerrel, innerrel);
+
+ /* If there is relevant advice, call a helper function to apply it. */
+ if (pjs != NULL)
+ {
+ uint64 original_mask = extra->pgs_mask;
+
+ pgpa_planner_apply_join_path_advice(jointype,
+ &extra->pgs_mask,
+ root->plan_name,
+ pjs);
+
+ /* Emit debugging message, if enabled. */
+ if (pg_plan_advice_trace_mask && original_mask != extra->pgs_mask)
+ ereport(WARNING,
+ (errmsg("strategy mask for %s join on %s with outer %s and inner %s changed from 0x%" PRIx64 " to 0x%" PRIx64,
+ pgpa_jointype_to_cstring(jointype),
+ pgpa_bms_to_cstring(joinrel->relids),
+ pgpa_bms_to_cstring(outerrel->relids),
+ pgpa_bms_to_cstring(innerrel->relids),
+ original_mask,
+ extra->pgs_mask)));
+ }
+
+ /* Pass call to previous hook. */
+ if (prev_join_path_setup)
+ (*prev_join_path_setup) (root, joinrel, outerrel, innerrel,
+ jointype, extra);
+}
+
+/*
+ * Search for advice pertaining to a proposed join.
+ */
+static pgpa_join_state *
+pgpa_get_join_state(PlannerInfo *root, RelOptInfo *joinrel,
+ RelOptInfo *outerrel, RelOptInfo *innerrel)
+{
+ pgpa_planner_state *pps;
+ pgpa_join_state *pjs;
+ bool new_pjs = false;
+
+ /* Fetch our private state, set up by pgpa_planner_setup(). */
+ pps = GetPlannerGlobalExtensionState(root->glob, planner_extension_id);
+ if (pps == NULL || pps->trove == NULL)
+ {
+ /* No advice applies to this query, hence none to this joinrel. */
+ return NULL;
+ }
+
+ /*
+ * See whether we've previously associated a pgpa_join_state with this
+ * joinrel. If we have not, we need to try to construct one. If we have,
+ * then there are two cases: (a) if innerrel and outerrel are unchanged,
+ * we can simply use it, and (b) if they have changed, we need to rejigger
+ * the array of identifiers but can still skip the trove lookup.
+ */
+ pjs = GetRelOptInfoExtensionState(joinrel, planner_extension_id);
+ if (pjs != NULL)
+ {
+ if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+ {
+ /*
+ * If there's no potentially relevant advice, then the presence of
+ * this pgpa_join_state acts like a negative cache entry: it tells
+ * us not to bother searching the trove for advice, because we
+ * will not find any.
+ */
+ return NULL;
+ }
+
+ if (pjs->outerrel == outerrel && pjs->innerrel == innerrel)
+ {
+ /* No updates required, so just return. */
+ /* XXX. Does this need to do something different under GEQO? */
+ return pjs;
+ }
+ }
+
+ /*
+ * If there's no pgpa_join_state yet, we need to allocate one. Trove keys
+ * will not get built for RTE_JOIN RTEs, so the array may end up being
+ * larger than needed. It's not worth trying to compute a perfectly
+ * accurate count here.
+ */
+ if (pjs == NULL)
+ {
+ int pessimistic_count = bms_num_members(joinrel->relids);
+
+ pjs = palloc0_object(pgpa_join_state);
+ pjs->rids = palloc_array(pgpa_identifier, pessimistic_count);
+ new_pjs = true;
+ }
+
+ /*
+ * Either we just allocated a new pgpa_join_state, or the existing one
+ * needs reconfiguring for a new innerrel and outerrel. The required array
+ * size can't change, so we can overwrite the existing one.
+ */
+ pjs->outerrel = outerrel;
+ pjs->innerrel = innerrel;
+ pjs->outer_count =
+ pgpa_compute_identifiers_by_relids(root, outerrel->relids, pjs->rids);
+ pjs->inner_count =
+ pgpa_compute_identifiers_by_relids(root, innerrel->relids,
+ pjs->rids + pjs->outer_count);
+
+ /*
+ * If we allocated a new pgpa_join_state, search our trove of advice for
+ * relevant entries. The trove lookup will return the same results for
+ * every outerrel/innerrel combination, so we don't need to repeat that
+ * work every time.
+ */
+ if (new_pjs)
+ {
+ pgpa_trove_result tresult;
+
+ /* Find join entries. */
+ pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_JOIN,
+ pjs->outer_count + pjs->inner_count,
+ pjs->rids, &tresult);
+ pjs->join_entries = tresult.entries;
+ pjs->join_indexes = tresult.indexes;
+
+ /* Find rel entries. */
+ pgpa_trove_lookup(pps->trove, PGPA_TROVE_LOOKUP_REL,
+ pjs->outer_count + pjs->inner_count,
+ pjs->rids, &tresult);
+ pjs->rel_entries = tresult.entries;
+ pjs->rel_indexes = tresult.indexes;
+
+ /* Now that the new pgpa_join_state is fully valid, save a pointer. */
+ SetRelOptInfoExtensionState(joinrel, planner_extension_id, pjs);
+
+ /*
+ * If there was no relevant advice found, just return NULL. This
+ * pgpa_join_state will stick around as a sort of negative cache
+ * entry, so that future calls for this same joinrel quickly return
+ * NULL.
+ */
+ if (pjs->join_indexes == NULL && pjs->rel_indexes == NULL)
+ return NULL;
+ }
+
+ return pjs;
+}
+
+/*
+ * Enforce overall restrictions on a join relation that apply uniformly
+ * regardless of the choice of inner and outer rel.
+ */
+static void
+pgpa_planner_apply_joinrel_advice(uint64 *pgs_mask_p, char *plan_name,
+ pgpa_join_state *pjs)
+{
+ int i = -1;
+ int flags;
+ bool gather_conflict = false;
+ uint64 gather_mask = 0;
+ Bitmapset *gather_partial_match = NULL;
+ Bitmapset *gather_full_match = NULL;
+ bool partitionwise_conflict = false;
+ int partitionwise_outcome = 0;
+ Bitmapset *partitionwise_partial_match = NULL;
+ Bitmapset *partitionwise_full_match = NULL;
+
+ /* Iterate over all possibly-relevant advice. */
+ while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *entry = &pjs->rel_entries[i];
+ pgpa_itm_type itm;
+ bool full_match = false;
+ uint64 my_gather_mask = 0;
+ int my_partitionwise_outcome = 0; /* >0 yes, <0 no */
+
+ /*
+ * For GATHER and GATHER_MERGE, if the specified relations exactly
+ * match this joinrel, do whatever the advice says; otherwise, don't
+ * allow Gather or Gather Merge at this level. For NO_GATHER, there
+ * must be a single target relation which must be included in this
+ * joinrel, so just don't allow Gather or Gather Merge here, full
+ * stop.
+ */
+ if (entry->tag == PGPA_TAG_NO_GATHER)
+ {
+ my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+ full_match = true;
+ }
+ else
+ {
+ int total_count;
+
+ total_count = pjs->outer_count + pjs->inner_count;
+ itm = pgpa_identifiers_match_target(total_count, pjs->rids,
+ entry->target);
+ Assert(itm != PGPA_ITM_DISJOINT);
+
+ if (itm == PGPA_ITM_EQUAL)
+ {
+ full_match = true;
+ if (entry->tag == PGPA_TAG_PARTITIONWISE)
+ my_partitionwise_outcome = 1;
+ else if (entry->tag == PGPA_TAG_GATHER)
+ my_gather_mask = PGS_GATHER;
+ else if (entry->tag == PGPA_TAG_GATHER_MERGE)
+ my_gather_mask = PGS_GATHER_MERGE;
+ else
+ elog(ERROR, "unexpected advice tag: %d",
+ (int) entry->tag);
+ }
+ else
+ {
+ /*
+ * If specified relations don't exactly match this joinrel,
+ * then we should do the opposite of whatever the advice says.
+ * For instance, if we have PARTITIONWISE((a b c)) or
+ * GATHER((a b c)) and this joinrel covers {a, b} or {a, b, c,
+ * d} or {a, d}, we shouldn't plan it partititionwise or put a
+ * Gather or Gather Merge on it here.
+ *
+ * Also, we can't put a Gather or Gather Merge at this level
+ * if there is PARTITIONWISE advice that overlaps with it,
+ * unless the PARTITIONWISE advice covers a subset of the
+ * relations in the joinrel. To continue the previous example,
+ * PARTITIONWISE((a b c)) is logically incompatible with
+ * GATHER((a b)) or GATHER((a d)), but not with GATHER((a b c
+ * d)).
+ *
+ * Conversely, we can't proceed partitionwise at this level if
+ * there is overlapping GATHER or GATHER_MERGE advice, unless
+ * that advice covers a superset of the relations in this
+ * joinrel. This is just the flip side of the preceding point.
+ */
+ if (entry->tag == PGPA_TAG_PARTITIONWISE)
+ {
+ my_partitionwise_outcome = -1;
+ if (itm != PGPA_ITM_TARGETS_ARE_SUBSET)
+ my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+ }
+ else if (entry->tag == PGPA_TAG_GATHER ||
+ entry->tag == PGPA_TAG_GATHER_MERGE)
+ {
+ my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+ if (itm != PGPA_ITM_KEYS_ARE_SUBSET)
+ my_partitionwise_outcome = -1;
+ }
+ else
+ elog(ERROR, "unexpected advice tag: %d",
+ (int) entry->tag);
+ }
+ }
+
+ /*
+ * If we set my_gather_mask up above, then we (1) make a note if the
+ * advice conflicted, (2) remember the mask value, and (3) remember
+ * whether this was a full or partial match.
+ */
+ if (my_gather_mask != 0)
+ {
+ if (gather_mask != 0 && gather_mask != my_gather_mask)
+ gather_conflict = true;
+ gather_mask = my_gather_mask;
+ if (full_match)
+ gather_full_match = bms_add_member(gather_full_match, i);
+ else
+ gather_partial_match = bms_add_member(gather_partial_match, i);
+ }
+
+ /*
+ * Likewise, if we set my_partitionwise_outcome up above, then we (1)
+ * make a note if the advice conflicted, (2) remember what the desired
+ * outcome was, and (3) remember whether this was a full or partial
+ * match.
+ */
+ if (my_partitionwise_outcome != 0)
+ {
+ if (partitionwise_outcome != 0 &&
+ partitionwise_outcome != my_partitionwise_outcome)
+ partitionwise_conflict = true;
+ partitionwise_outcome = my_partitionwise_outcome;
+ if (full_match)
+ partitionwise_full_match =
+ bms_add_member(partitionwise_full_match, i);
+ else
+ partitionwise_partial_match =
+ bms_add_member(partitionwise_partial_match, i);
+ }
+ }
+
+ /*
+ * Mark every Gather-related piece of advice as partially matched, and if
+ * the set of targets exactly matched this relation, fully matched. If
+ * there was a conflict, mark them all as conflicting.
+ */
+ flags = PGPA_TE_MATCH_PARTIAL;
+ if (gather_conflict)
+ flags |= PGPA_TE_CONFLICTING;
+ pgpa_trove_set_flags(pjs->rel_entries, gather_partial_match, flags);
+ flags |= PGPA_TE_MATCH_FULL;
+ pgpa_trove_set_flags(pjs->rel_entries, gather_full_match, flags);
+
+ /* Likewise for partitionwise advice. */
+ flags = PGPA_TE_MATCH_PARTIAL;
+ if (partitionwise_conflict)
+ flags |= PGPA_TE_CONFLICTING;
+ pgpa_trove_set_flags(pjs->rel_entries, partitionwise_partial_match, flags);
+ flags |= PGPA_TE_MATCH_FULL;
+ pgpa_trove_set_flags(pjs->rel_entries, partitionwise_full_match, flags);
+
+ /*
+ * Enforce restrictions on the Gather/Gather Merge. Only clear bits here,
+ * so that we still respect the enable_* GUCs. Do nothing if the advise
+ * conflicts.
+ */
+ if (gather_mask != 0 && !gather_conflict)
+ {
+ uint64 all_gather_mask;
+
+ all_gather_mask =
+ PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL;
+ *pgs_mask_p &= ~(all_gather_mask & ~gather_mask);
+ }
+
+ /*
+ * As above, but for partitionwise advice.
+ *
+ * To induce a partitionwise join, we disable all the ordinary means of
+ * performing a join, so that an Append or MergeAppend path will hopefully
+ * be chosen.
+ *
+ * To prevent one, we just disable Append and MergeAppend. Note that we
+ * must not unset PGS_CONSIDER_PARTITIONWISE even when we don't want a
+ * partitionwise join here, because we might want one at a higher level
+ * that is constructing using paths from this level.
+ */
+ if (partitionwise_outcome != 0 && !partitionwise_conflict)
+ {
+ if (partitionwise_outcome > 0)
+ *pgs_mask_p = (*pgs_mask_p & ~PGS_JOIN_ANY);
+ else
+ *pgs_mask_p &= ~(PGS_APPEND | PGS_MERGE_APPEND);
+ }
+}
+
+/*
+ * Enforce restrictions on the join order or join method.
+ */
+static void
+pgpa_planner_apply_join_path_advice(JoinType jointype, uint64 *pgs_mask_p,
+ char *plan_name,
+ pgpa_join_state *pjs)
+{
+ int i = -1;
+ Bitmapset *jo_permit_indexes = NULL;
+ Bitmapset *jo_deny_indexes = NULL;
+ Bitmapset *jo_deny_rel_indexes = NULL;
+ Bitmapset *jm_indexes = NULL;
+ bool jm_conflict = false;
+ uint32 join_mask = 0;
+ Bitmapset *sj_permit_indexes = NULL;
+ Bitmapset *sj_deny_indexes = NULL;
+
+ /*
+ * Reconsider PARTITIONWISE(...) advice.
+ *
+ * We already thought about this for the joinrel as a whole, but in some
+ * cases, partitionwise advice can also constrain the join order. For
+ * instance, if the advice says PARTITIONWISE((t1 t2)), we shouldn't build
+ * join paths for a any joinrel that includes t1 or t2 unless it also
+ * includes the other. In general, the paritionwise operation must have
+ * already been completed within one side of the current join or the
+ * other, else the join order is impermissible.
+ *
+ * NB: It might seem tempting to try to deal with PARTITIONWISE advise
+ * entirely in this function, but that doesn't work. Here, we can only
+ * affect the pgs_mask within a particular JoinPathExtraData, that is, for
+ * a particular choice of innerrel and outerrel. Partitionwise paths are
+ * not built that way, so we must set pgs_mask for the RelOptInfo, which
+ * is best done in pgpa_planner_apply_joinrel_advice.
+ */
+ while ((i = bms_next_member(pjs->rel_indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *entry = &pjs->rel_entries[i];
+ pgpa_itm_type inner_itm;
+ pgpa_itm_type outer_itm;
+
+ if (entry->tag != PGPA_TAG_PARTITIONWISE)
+ continue;
+
+ outer_itm = pgpa_identifiers_match_target(pjs->outer_count,
+ pjs->rids, entry->target);
+ if (outer_itm == PGPA_ITM_EQUAL ||
+ outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ continue;
+
+ inner_itm = pgpa_identifiers_match_target(pjs->inner_count,
+ pjs->rids + pjs->outer_count,
+ entry->target);
+ if (inner_itm == PGPA_ITM_EQUAL ||
+ inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ continue;
+
+ jo_deny_rel_indexes = bms_add_member(jo_deny_rel_indexes, i);
+ }
+
+ /* Iterate over advice that pertains to the join order and method. */
+ i = -1;
+ while ((i = bms_next_member(pjs->join_indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *entry = &pjs->join_entries[i];
+ uint32 my_join_mask;
+
+ /* Handle join order advice. */
+ if (entry->tag == PGPA_TAG_JOIN_ORDER)
+ {
+ pgpa_jo_outcome jo_outcome;
+
+ jo_outcome = pgpa_join_order_permits_join(pjs->outer_count,
+ pjs->inner_count,
+ pjs->rids,
+ entry);
+ if (jo_outcome == PGPA_JO_PERMITTED)
+ jo_permit_indexes = bms_add_member(jo_permit_indexes, i);
+ else if (jo_outcome == PGPA_JO_DENIED)
+ jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+ continue;
+ }
+
+ /* Handle join method advice. */
+ my_join_mask = pgpa_join_strategy_mask_from_advice_tag(entry->tag);
+ if (my_join_mask != 0)
+ {
+ bool permit;
+ bool restrict_method;
+
+ if (entry->tag == PGPA_TAG_FOREIGN_JOIN)
+ permit = pgpa_opaque_join_permits_join(pjs->outer_count,
+ pjs->inner_count,
+ pjs->rids,
+ entry,
+ &restrict_method);
+ else
+ permit = pgpa_join_method_permits_join(pjs->outer_count,
+ pjs->inner_count,
+ pjs->rids,
+ entry,
+ &restrict_method);
+ if (!permit)
+ jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+ else if (restrict_method)
+ {
+ jm_indexes = bms_add_member(jm_indexes, i);
+ if (join_mask != 0 && join_mask != my_join_mask)
+ jm_conflict = true;
+ join_mask = my_join_mask;
+ }
+ continue;
+ }
+
+ /* Handle semijoin uniqueness advice. */
+ if (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE ||
+ entry->tag == PGPA_TAG_SEMIJOIN_NON_UNIQUE)
+ {
+ bool outer_side_nullable;
+ bool restrict_method;
+
+ /* Planner has nullable side of the semijoin on the outer side? */
+ outer_side_nullable = (jointype == JOIN_UNIQUE_OUTER ||
+ jointype == JOIN_RIGHT_SEMI);
+
+ if (!pgpa_semijoin_permits_join(pjs->outer_count,
+ pjs->inner_count,
+ pjs->rids,
+ entry,
+ outer_side_nullable,
+ &restrict_method))
+ jo_deny_indexes = bms_add_member(jo_deny_indexes, i);
+ else if (restrict_method)
+ {
+ bool advice_unique;
+ bool jt_unique;
+ bool jt_non_unique;
+
+ /* Advice wants to unique-ify and use a regular join? */
+ advice_unique = (entry->tag == PGPA_TAG_SEMIJOIN_UNIQUE);
+
+ /* Planner is trying to unique-ify and use a regular join? */
+ jt_unique = (jointype == JOIN_UNIQUE_INNER ||
+ jointype == JOIN_UNIQUE_OUTER);
+
+ /* Planner is trying a semi-join, without unique-ifying? */
+ jt_non_unique = (jointype == JOIN_SEMI ||
+ jointype == JOIN_RIGHT_SEMI);
+
+ if (!jt_unique && !jt_non_unique)
+ {
+ /*
+ * This doesn't seem to be a semijoin to which SJ_UNIQUE
+ * or SJ_NON_UNIQUE can be applied.
+ */
+ entry->flags |= PGPA_TE_INAPPLICABLE;
+ }
+ else if (advice_unique != jt_unique)
+ sj_deny_indexes = bms_add_member(sj_deny_indexes, i);
+ else
+ sj_permit_indexes = bms_add_member(sj_permit_indexes, i);
+ }
+ continue;
+ }
+ }
+
+ /*
+ * If the advice indicates both that this join order is permissible and
+ * also that it isn't, then mark advice related to the join order as
+ * conflicting.
+ */
+ if (jo_permit_indexes != NULL &&
+ (jo_deny_indexes != NULL || jo_deny_rel_indexes != NULL))
+ {
+ pgpa_trove_set_flags(pjs->join_entries, jo_permit_indexes,
+ PGPA_TE_CONFLICTING);
+ pgpa_trove_set_flags(pjs->join_entries, jo_deny_indexes,
+ PGPA_TE_CONFLICTING);
+ pgpa_trove_set_flags(pjs->rel_entries, jo_deny_rel_indexes,
+ PGPA_TE_CONFLICTING);
+ }
+
+ /*
+ * If more than one join method specification is relevant here and they
+ * differ, mark them all as conflicting.
+ */
+ if (jm_conflict)
+ pgpa_trove_set_flags(pjs->join_entries, jm_indexes,
+ PGPA_TE_CONFLICTING);
+
+ /* If semijoin advice says both yes and no, mark it all as conflicting. */
+ if (sj_permit_indexes != NULL && sj_deny_indexes != NULL)
+ {
+ pgpa_trove_set_flags(pjs->join_entries, sj_permit_indexes,
+ PGPA_TE_CONFLICTING);
+ pgpa_trove_set_flags(pjs->join_entries, sj_deny_indexes,
+ PGPA_TE_CONFLICTING);
+ }
+
+ /*
+ * Enforce restrictions on the join order and join method, and any
+ * semijoin-related restrictions. Only clear bits here, so that we still
+ * respect the enable_* GUCs. Do nothing in cases where the advice on a
+ * single topic conflicts.
+ */
+ if ((jo_deny_indexes != NULL || jo_deny_rel_indexes != NULL) &&
+ jo_permit_indexes == NULL)
+ *pgs_mask_p &= ~PGS_JOIN_ANY;
+ if (join_mask != 0 && !jm_conflict)
+ *pgs_mask_p &= ~(PGS_JOIN_ANY & ~join_mask);
+ if (sj_deny_indexes != NULL && sj_permit_indexes == NULL)
+ *pgs_mask_p &= ~PGS_JOIN_ANY;
+}
+
+/*
+ * Translate an advice tag into a path generation strategy mask.
+ *
+ * This function can be called with tag types that don't represent join
+ * strategies. In such cases, we just return 0, which can't be confused with
+ * a valid mask.
+ */
+static uint64
+pgpa_join_strategy_mask_from_advice_tag(pgpa_advice_tag_type tag)
+{
+ switch (tag)
+ {
+ case PGPA_TAG_FOREIGN_JOIN:
+ return PGS_FOREIGNJOIN;
+ case PGPA_TAG_MERGE_JOIN_PLAIN:
+ return PGS_MERGEJOIN_PLAIN;
+ case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+ return PGS_MERGEJOIN_MATERIALIZE;
+ case PGPA_TAG_NESTED_LOOP_PLAIN:
+ return PGS_NESTLOOP_PLAIN;
+ case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+ return PGS_NESTLOOP_MATERIALIZE;
+ case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+ return PGS_NESTLOOP_MEMOIZE;
+ case PGPA_TAG_HASH_JOIN:
+ return PGS_HASHJOIN;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Does a certain item of join order advice permit a certain join?
+ *
+ * Returns PGPA_JO_DENIED if the advice is incompatible with the proposed
+ * join order.
+ *
+ * Returns PGPA_JO_PERMITTED if the advice specifies exactly the proposed
+ * join order. This implies that a partitionwise join should not be
+ * performed at this level; rather, one of the traditional join methods
+ * should be used.
+ *
+ * Returns PGPA_JO_INDIFFERENT if the advice does not care what happens.
+ * We use this for unordered JOIN_ORDER sublists, which are compatible with
+ * partitionwise join but do not mandate it.
+ */
+static pgpa_jo_outcome
+pgpa_join_order_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry)
+{
+ bool loop = true;
+ bool sublist = false;
+ int length;
+ int outer_length;
+ pgpa_advice_target *target = entry->target;
+ pgpa_advice_target *prefix_target;
+
+ /* We definitely have at least a partial match for this trove entry. */
+ entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+ /*
+ * Find the innermost sublist that contains all keys; if no sublist does,
+ * then continue processing with the toplevel list.
+ *
+ * For example, if the advice says JOIN_ORDER(t1 t2 (t3 t4 t5)), then we
+ * should evaluate joins that only involve t3, t4, and/or t5 against the
+ * (t3 t4 t5) sublist, and others against the full list.
+ *
+ * Note that (1) outermost sublist is always ordered and (2) whenever we
+ * zoom into an unordered sublist, we instantly return
+ * PGPA_JO_INDIFFERENT.
+ */
+ while (loop)
+ {
+ Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+ loop = false;
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ pgpa_itm_type itm;
+
+ if (child_target->ttype == PGPA_TARGET_IDENTIFIER)
+ continue;
+
+ itm = pgpa_identifiers_match_target(outer_count + inner_count,
+ rids, child_target);
+ if (itm == PGPA_ITM_EQUAL || itm == PGPA_ITM_KEYS_ARE_SUBSET)
+ {
+ if (child_target->ttype == PGPA_TARGET_ORDERED_LIST)
+ {
+ target = child_target;
+ sublist = true;
+ loop = true;
+ break;
+ }
+ else
+ {
+ Assert(child_target->ttype == PGPA_TARGET_UNORDERED_LIST);
+ return PGPA_JO_INDIFFERENT;
+ }
+ }
+ }
+ }
+
+ /*
+ * Try to find a prefix of the selected join order list that is exactly
+ * equal to the outer side of the proposed join.
+ */
+ length = list_length(target->children);
+ prefix_target = palloc0_object(pgpa_advice_target);
+ prefix_target->ttype = PGPA_TARGET_ORDERED_LIST;
+ for (outer_length = 1; outer_length <= length; ++outer_length)
+ {
+ pgpa_itm_type itm;
+
+ /* Avoid leaking memory in every loop iteration. */
+ if (prefix_target->children != NULL)
+ list_free(prefix_target->children);
+ prefix_target->children = list_copy_head(target->children,
+ outer_length);
+
+ /* Search, hoping to find an exact match. */
+ itm = pgpa_identifiers_match_target(outer_count, rids, prefix_target);
+ if (itm == PGPA_ITM_EQUAL)
+ break;
+
+ /*
+ * If the prefix of the join order list that we're considering
+ * includes some but not all of the outer rels, we can make the prefix
+ * longer to find an exact match. But the advice hasn't mentioned
+ * everything that's part of our outer rel yet, but has mentioned
+ * things that are not, then this join doesn't match the join order
+ * list.
+ */
+ if (itm != PGPA_ITM_TARGETS_ARE_SUBSET)
+ return PGPA_JO_DENIED;
+ }
+
+ /*
+ * If the previous looped stopped before the prefix_target included the
+ * entire join order list, then the next member of the join order list
+ * must exactly match the inner side of the join.
+ *
+ * Example: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), if the outer side of the
+ * current join includes only t1, then the inner side must be exactly t2;
+ * if the outer side includes both t1 and t2, then the inner side must
+ * include exactly t3, t4, and t5.
+ */
+ if (outer_length < length)
+ {
+ pgpa_advice_target *inner_target;
+ pgpa_itm_type itm;
+
+ inner_target = list_nth(target->children, outer_length);
+
+ itm = pgpa_identifiers_match_target(inner_count, rids + outer_count,
+ inner_target);
+
+ /*
+ * Before returning, consider whether we need to mark this entry as
+ * fully matched. If we're considering the full list rather than a
+ * sublist, and if we found every item but one on the outer side of
+ * the join and the last item on the inner side of the join, then the
+ * answer is yes.
+ */
+ if (!sublist && outer_length + 1 == length && itm == PGPA_ITM_EQUAL)
+ entry->flags |= PGPA_TE_MATCH_FULL;
+
+ return (itm == PGPA_ITM_EQUAL) ? PGPA_JO_PERMITTED : PGPA_JO_DENIED;
+ }
+
+ /*
+ * If we get here, then the outer side of the join includes the entirety
+ * of the join order list. In this case, we behave differently depending
+ * on whether we're looking at the top-level join order list or sublist.
+ * At the top-level, we treat the specified list as mandating that the
+ * actual join order has the given list as a prefix, but a sublist
+ * requires an exact match.
+ *
+ * Exmaple: Given JOIN_ORDER(t1 t2 (t3 t4 t5)), we must start by joining
+ * all five of those relations and in that sequence, but once that is
+ * done, it's OK to join any other rels that are part of the join problem.
+ * This allows a user to specify the driving table and perhaps the first
+ * few things to which it should be joined while leaving the rest of the
+ * join order up the optimizer. But it seems like it would be surprising,
+ * given that specification, if the user could add t6 to the (t3 t4 t5)
+ * sub-join, so we don't allow that. If we did want to allow it, the logic
+ * earlier in this function would require substantial adjustment: we could
+ * allow the t3-t4-t5-t6 join to be built here, but the next step of
+ * joining t1-t2 to the result would still be rejected.
+ */
+ if (!sublist)
+ entry->flags |= PGPA_TE_MATCH_FULL;
+ return sublist ? PGPA_JO_DENIED : PGPA_JO_PERMITTED;
+}
+
+/*
+ * Does a certain item of join method advice permit a certain join?
+ *
+ * Advice such as HASH_JOIN((x y)) means that there should be a hash join with
+ * exactly x and y on the inner side. Obviously, this means that if we are
+ * considering a join with exactly x and y on the inner side, we should enforce
+ * the use of a hash join. However, it also means that we must reject some
+ * incompatible join orders entirely. For example, a join with exactly x
+ * and y on the outer side shouldn't be allowed, because such paths might win
+ * over the advice-driven path on cost.
+ *
+ * To accommodate these requirements, this function returns true if the join
+ * should be allowed and false if it should not. Furthermore, *restrict_method
+ * is set to true if the join method should be enforced and false if not.
+ */
+static bool
+pgpa_join_method_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool *restrict_method)
+{
+ pgpa_advice_target *target = entry->target;
+ pgpa_itm_type inner_itm;
+ pgpa_itm_type outer_itm;
+ pgpa_itm_type join_itm;
+
+ /* We definitely have at least a partial match for this trove entry. */
+ entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+ *restrict_method = false;
+
+ /*
+ * If our inner rel mentions exactly the same relations as the advice
+ * target, allow the join and enforce the join method restriction.
+ *
+ * If our inner rel mentions a superset of the target relations, allow the
+ * join. The join we care about has already taken place, and this advice
+ * imposes no further restrictions.
+ */
+ inner_itm = pgpa_identifiers_match_target(inner_count,
+ rids + outer_count,
+ target);
+ if (inner_itm == PGPA_ITM_EQUAL)
+ {
+ entry->flags |= PGPA_TE_MATCH_FULL;
+ *restrict_method = true;
+ return true;
+ }
+ else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ return true;
+
+ /*
+ * If our outer rel mentions a supserset of the relations in the advice
+ * target, no restrictions apply. The join we care has already taken
+ * place, and this advice imposes no further restrictions.
+ *
+ * On the other hand, if our outer rel mentions exactly the relations
+ * mentioned in the advice target, the planner is trying to reverse the
+ * sides of the join as compared with our desired outcome. Reject that.
+ */
+ outer_itm = pgpa_identifiers_match_target(outer_count,
+ rids, target);
+ if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ return true;
+ else if (outer_itm == PGPA_ITM_EQUAL)
+ return false;
+
+ /*
+ * If the advice target mentions only a single relation, the test below
+ * cannot ever pass, so save some work by exiting now.
+ */
+ if (target->ttype == PGPA_TARGET_IDENTIFIER)
+ return false;
+
+ /*
+ * If everything in the joinrel appears in the advice target, we're below
+ * the level of the join we want to control.
+ *
+ * For example, HASH_JOIN((x y)) doesn't restrict how x and y can be
+ * joined.
+ *
+ * This lookup shouldn't return PGPA_ITM_DISJOINT, because any such advice
+ * should not have been returned from the trove in the first place.
+ */
+ join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+ rids, target);
+ Assert(join_itm != PGPA_ITM_DISJOINT);
+ if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+ join_itm == PGPA_ITM_EQUAL)
+ return true;
+
+ /*
+ * We've already permitted all allowable cases, so reject this.
+ *
+ * If we reach this point, then the advice overlaps with this join but
+ * isn't entirely contained within either side, and there's also at least
+ * one relation present in the join that isn't mentioned by the advice.
+ *
+ * For instance, in the HASH_JOIN((x y)) example, we would reach here if x
+ * were on one side of the join, y on the other, and at least one of the
+ * two sides also included some other relation, say t. In that case,
+ * accepting this join would allow the (x y t) joinrel to contain
+ * non-disabled paths that do not put (x y) on the inner side of a hash
+ * join; we could instead end up with something like (x JOIN t) JOIN y.
+ */
+ return false;
+}
+
+/*
+ * Does advice concerning an opaque join permit a certain join?
+ *
+ * By an opaque join, we mean one where the exact mechanism by which the
+ * join is performed is not visible to PostgreSQL. Currently this is the
+ * case only for foreign joins: FOREIGN_JOIN((x y z)) means that x, y, and
+ * z are joined on the remote side, but we know nothing about the join order
+ * or join methods used over there.
+ *
+ * The logic here needs to differ from pgpa_join_method_permits_join because,
+ * for other join types, the advice target is the set of inner rels; here, it
+ * includes both inner and outer rels.
+ */
+static bool
+pgpa_opaque_join_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool *restrict_method)
+{
+ pgpa_advice_target *target = entry->target;
+ pgpa_itm_type join_itm;
+
+ /* We definitely have at least a partial match for this trove entry. */
+ entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+ *restrict_method = false;
+
+ join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+ rids, target);
+ if (join_itm == PGPA_ITM_EQUAL)
+ {
+ /*
+ * We have an exact match, and should therefore allow the join and
+ * enforce the use of the relevant opaque join method.
+ */
+ entry->flags |= PGPA_TE_MATCH_FULL;
+ *restrict_method = true;
+ return true;
+ }
+
+ if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+ join_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ {
+ /*
+ * If join_itm == PGPA_ITM_TARGETS_ARE_SUBSET, then the join we care
+ * about has already taken place and no further restrictions apply.
+ *
+ * If join_itm == PGPA_ITM_KEYS_ARE_SUBSET, we're still building up to
+ * the join we care about and have not introduced any extraneous
+ * relations not named in the advice. Note that ForeignScan paths for
+ * joins are built up from ForeignScan paths from underlying joins and
+ * scans, so we must not disable this join when considering a subset
+ * of the relations we ultimately want.
+ */
+ return true;
+ }
+
+ /*
+ * The advice overlaps the join, but at least one relation is present in
+ * the join that isn't mentioned by the advice. We want to disable such
+ * paths so that we actually push down the join as intended.
+ */
+ return false;
+}
+
+/*
+ * Does advice concerning a semijoin permit a certain join?
+ *
+ * Unlike join method advice, which lists the rels on the inner side of the
+ * join, semijoin uniqueness advice lists the rels on the nullable side of the
+ * join. Those can be the same, if the join type is JOIN_UNIQUE_INNER or
+ * JOIN_SEMI, or they can be different, in case of JOIN_UNIQUE_OUTER or
+ * JOIN_RIGHT_SEMI.
+ *
+ * We don't know here whether the caller specified SEMIJOIN_UNIQUE or
+ * SEMIJOIN_NON_UNIQUE. The caller should check the join type against the
+ * advice type if and only if we set *restrict_method to true.
+ */
+static bool
+pgpa_semijoin_permits_join(int outer_count, int inner_count,
+ pgpa_identifier *rids,
+ pgpa_trove_entry *entry,
+ bool outer_is_nullable,
+ bool *restrict_method)
+{
+ pgpa_advice_target *target = entry->target;
+ pgpa_itm_type join_itm;
+ pgpa_itm_type inner_itm;
+ pgpa_itm_type outer_itm;
+
+ *restrict_method = false;
+
+ /* We definitely have at least a partial match for this trove entry. */
+ entry->flags |= PGPA_TE_MATCH_PARTIAL;
+
+ /*
+ * If outer rel is the nullable side and contains exactly the same
+ * relations as the advice target, then the join order is allowable, but
+ * the caller must check whether the advice tag (either SEMIJOIN_UNIQUE or
+ * SEMIJOIN_NON_UNIQUE) matches the join type.
+ *
+ * If the outer rel is a superset of the target relations, the join we
+ * care about has already taken place, so we should impose no futher
+ * restritions.
+ */
+ outer_itm = pgpa_identifiers_match_target(outer_count,
+ rids, target);
+ if (outer_itm == PGPA_ITM_EQUAL)
+ {
+ entry->flags |= PGPA_TE_MATCH_FULL;
+ if (outer_is_nullable)
+ {
+ *restrict_method = true;
+ return true;
+ }
+ }
+ else if (outer_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ return true;
+
+ /* As above, but for the inner rel. */
+ inner_itm = pgpa_identifiers_match_target(inner_count,
+ rids + outer_count,
+ target);
+ if (inner_itm == PGPA_ITM_EQUAL)
+ {
+ entry->flags |= PGPA_TE_MATCH_FULL;
+ if (!outer_is_nullable)
+ {
+ *restrict_method = true;
+ return true;
+ }
+ }
+ else if (inner_itm == PGPA_ITM_TARGETS_ARE_SUBSET)
+ return true;
+
+ /*
+ * If everything in the joinrel appears in the advice target, we're below
+ * the level of the join we want to control.
+ */
+ join_itm = pgpa_identifiers_match_target(outer_count + inner_count,
+ rids, target);
+ Assert(join_itm != PGPA_ITM_DISJOINT);
+ if (join_itm == PGPA_ITM_KEYS_ARE_SUBSET ||
+ join_itm == PGPA_ITM_EQUAL)
+ return true;
+
+ /*
+ * We've tested for all allowable possibilities, and so must reject this
+ * join order. This can happen in two ways.
+ *
+ * First, we migh be considering a semijoin that overlaps incompletely
+ * with one or both sides of the join. For example, if the user has
+ * specified SEMIJOIN_UNIQUE((t1 t2)) or SEMIJOIN_NON_UNIQUE((t1 t2)), we
+ * should reject a proposed t2-t3 join, since that could not result in a
+ * final plan compatible with the advice.
+ *
+ * Second, we might be considering a semijoin where the advice target
+ * perfectly matches one side of the join, but it's the wrong one. For
+ * example, in the example above, we might see a 3-way join between t1,
+ * t2, and t3, with (t1 t2) on the non-nullable side. That, too, would be
+ * incompatible with the advice.
+ */
+ return false;
+}
+
+/*
+ * Apply scan advice to a RelOptInfo.
+ */
+static void
+pgpa_planner_apply_scan_advice(RelOptInfo *rel,
+ pgpa_trove_entry *scan_entries,
+ Bitmapset *scan_indexes,
+ pgpa_trove_entry *rel_entries,
+ Bitmapset *rel_indexes)
+{
+ bool gather_conflict = false;
+ Bitmapset *gather_partial_match = NULL;
+ Bitmapset *gather_full_match = NULL;
+ int i = -1;
+ pgpa_trove_entry *scan_entry = NULL;
+ int flags;
+ bool scan_type_conflict = false;
+ Bitmapset *scan_type_indexes = NULL;
+ Bitmapset *scan_type_rel_indexes = NULL;
+ uint64 gather_mask = 0;
+ uint64 scan_type = 0;
+
+ /* Scrutinize available scan advice. */
+ while ((i = bms_next_member(scan_indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *my_entry = &scan_entries[i];
+ uint64 my_scan_type = 0;
+
+ /* Translate our advice tags to a scan strategy advice value. */
+ if (my_entry->tag == PGPA_TAG_BITMAP_HEAP_SCAN)
+ {
+ /*
+ * Currently, PGS_CONSIDER_INDEXONLY can suppress Bitmap Heap
+ * Scans, so don't clear it when such a scan is requested. This
+ * happens because build_index_scan() thinks that the possibility
+ * of an index-only scan is a sufficient reason to consider using
+ * an otherwise-useless index, and get_index_paths() thinks that
+ * the same paths that are useful for index or index-only scans
+ * should also be considered for bitmap scans. Perhaps that logic
+ * should be tightened up, but until then we need to include
+ * PGS_CONSIDER_INDEXONLY in my_scan_type here.
+ */
+ my_scan_type = PGS_BITMAPSCAN | PGS_CONSIDER_INDEXONLY;
+ }
+ else if (my_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN)
+ my_scan_type = PGS_INDEXONLYSCAN | PGS_CONSIDER_INDEXONLY;
+ else if (my_entry->tag == PGPA_TAG_INDEX_SCAN)
+ my_scan_type = PGS_INDEXSCAN;
+ else if (my_entry->tag == PGPA_TAG_SEQ_SCAN)
+ my_scan_type = PGS_SEQSCAN;
+ else if (my_entry->tag == PGPA_TAG_TID_SCAN)
+ my_scan_type = PGS_TIDSCAN;
+
+ /*
+ * If this is understandable scan advice, hang on to the entry, the
+ * inferred scan type type, and the index at which we found it.
+ *
+ * Also make a note if we see conflicting scan type advice. Note that
+ * we regard two index specifications as conflicting unless they match
+ * exactly. In theory, perhaps we could regard INDEX_SCAN(a c) and
+ * INDEX_SCAN(a b.c) as non-conflicting if it happens that the only
+ * index named c is in schema b, but it doesn't seem worth the code.
+ */
+ if (my_scan_type != 0)
+ {
+ if (scan_type != 0 && scan_type != my_scan_type)
+ scan_type_conflict = true;
+ if (!scan_type_conflict && scan_entry != NULL &&
+ my_entry->target->itarget != NULL &&
+ scan_entry->target->itarget != NULL &&
+ !pgpa_index_targets_equal(scan_entry->target->itarget,
+ my_entry->target->itarget))
+ scan_type_conflict = true;
+ scan_entry = my_entry;
+ scan_type = my_scan_type;
+ scan_type_indexes = bms_add_member(scan_type_indexes, i);
+ }
+ }
+
+ /* Scrutinize available gather-related and partitionwise advice. */
+ i = -1;
+ while ((i = bms_next_member(rel_indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *my_entry = &rel_entries[i];
+ uint64 my_gather_mask = 0;
+ bool just_one_rel;
+
+ just_one_rel = my_entry->target->ttype == PGPA_TARGET_IDENTIFIER
+ || list_length(my_entry->target->children) == 1;
+
+ /*
+ * PARTITIONWISE behaves like a scan type, except that if there's more
+ * than one relation targeted, it has no effect at this level.
+ */
+ if (my_entry->tag == PGPA_TAG_PARTITIONWISE)
+ {
+ if (just_one_rel)
+ {
+ const uint64 my_scan_type = PGS_APPEND | PGS_MERGE_APPEND;
+
+ if (scan_type != 0 && scan_type != my_scan_type)
+ scan_type_conflict = true;
+ scan_entry = my_entry;
+ scan_type = my_scan_type;
+ scan_type_rel_indexes =
+ bms_add_member(scan_type_rel_indexes, i);
+ }
+ continue;
+ }
+
+ /*
+ * GATHER and GATHER_MERGE applied to a single rel mean that we should
+ * use the correspondings strategy here, while applying either to more
+ * than one rel means we should not use those strategies here, but
+ * rather at the level of the joinrel that corresponds to what was
+ * specified. NO_GATHER can only be applied to single rels.
+ *
+ * Note that setting PGS_CONSIDER_NONPARTIAL in my_gather_mask is
+ * equivalent to allowing the non-use of either form of Gather here.
+ */
+ if (my_entry->tag == PGPA_TAG_GATHER ||
+ my_entry->tag == PGPA_TAG_GATHER_MERGE)
+ {
+ if (!just_one_rel)
+ my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+ else if (my_entry->tag == PGPA_TAG_GATHER)
+ my_gather_mask = PGS_GATHER;
+ else
+ my_gather_mask = PGS_GATHER_MERGE;
+ }
+ else if (my_entry->tag == PGPA_TAG_NO_GATHER)
+ {
+ Assert(just_one_rel);
+ my_gather_mask = PGS_CONSIDER_NONPARTIAL;
+ }
+
+ /*
+ * If we set my_gather_mask up above, then we (1) make a note if the
+ * advice conflicted, (2) remember the mask value, and (3) remember
+ * whether this was a full or partial match.
+ */
+ if (my_gather_mask != 0)
+ {
+ if (gather_mask != 0 && gather_mask != my_gather_mask)
+ gather_conflict = true;
+ gather_mask = my_gather_mask;
+ if (just_one_rel)
+ gather_full_match = bms_add_member(gather_full_match, i);
+ else
+ gather_partial_match = bms_add_member(gather_partial_match, i);
+ }
+ }
+
+ /* Enforce choice of index. */
+ if (scan_entry != NULL && !scan_type_conflict &&
+ (scan_entry->tag == PGPA_TAG_INDEX_SCAN ||
+ scan_entry->tag == PGPA_TAG_INDEX_ONLY_SCAN))
+ {
+ pgpa_index_target *itarget = scan_entry->target->itarget;
+ IndexOptInfo *matched_index = NULL;
+
+ foreach_node(IndexOptInfo, index, rel->indexlist)
+ {
+ char *relname = get_rel_name(index->indexoid);
+ Oid nspoid = get_rel_namespace(index->indexoid);
+ char *relnamespace = get_namespace_name_or_temp(nspoid);
+
+ if (strcmp(itarget->indname, relname) == 0 &&
+ (itarget->indnamespace == NULL ||
+ strcmp(itarget->indnamespace, relnamespace) == 0))
+ {
+ matched_index = index;
+ break;
+ }
+ }
+
+ if (matched_index == NULL)
+ {
+ /* Don't force the scan type if the index doesn't exist. */
+ scan_type = 0;
+
+ /* Mark advice as inapplicable. */
+ pgpa_trove_set_flags(scan_entries, scan_type_indexes,
+ PGPA_TE_INAPPLICABLE);
+ }
+ else
+ {
+ /* Disable every other index. */
+ foreach_node(IndexOptInfo, index, rel->indexlist)
+ {
+ if (index != matched_index)
+ index->disabled = true;
+ }
+ }
+ }
+
+ /*
+ * Mark all the scan method entries as fully matched; and if they specify
+ * different things, mark them all as conflicting.
+ */
+ flags = PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL;
+ if (scan_type_conflict)
+ flags |= PGPA_TE_CONFLICTING;
+ pgpa_trove_set_flags(scan_entries, scan_type_indexes, flags);
+ pgpa_trove_set_flags(rel_entries, scan_type_rel_indexes, flags);
+
+ /*
+ * Mark every Gather-related piece of advice as partially matched. Mark
+ * the ones that included this relation as a target by itself as fully
+ * matched. If there was a conflict, mark them all as conflicting.
+ */
+ flags = PGPA_TE_MATCH_PARTIAL;
+ if (gather_conflict)
+ flags |= PGPA_TE_CONFLICTING;
+ pgpa_trove_set_flags(rel_entries, gather_partial_match, flags);
+ flags |= PGPA_TE_MATCH_FULL;
+ pgpa_trove_set_flags(rel_entries, gather_full_match, flags);
+
+ /*
+ * Enforce restrictions on the scan type and use of Gather/Gather Merge.
+ * Only clear bits here, so that we still respect the enable_* GUCs. Do
+ * nothing in cases where the advice on a single topic conflicts.
+ */
+ if (scan_type != 0 && !scan_type_conflict)
+ {
+ uint64 all_scan_mask;
+
+ all_scan_mask = PGS_SCAN_ANY | PGS_APPEND | PGS_MERGE_APPEND |
+ PGS_CONSIDER_INDEXONLY;
+ rel->pgs_mask &= ~(all_scan_mask & ~scan_type);
+ }
+ if (gather_mask != 0 && !gather_conflict)
+ {
+ uint64 all_gather_mask;
+
+ all_gather_mask =
+ PGS_GATHER | PGS_GATHER_MERGE | PGS_CONSIDER_NONPARTIAL;
+ rel->pgs_mask &= ~(all_gather_mask & ~gather_mask);
+ }
+}
+
+/*
+ * Add feedback entries to for one trove slice to the provided list and
+ * return the resulting list.
+ *
+ * Feedback entries are generated from the trove entry's flags. It's assumed
+ * that the caller has already set all relevant flags with the exception of
+ * PGPA_TE_FAILED. We set that flag here if appropriate.
+ */
+static List *
+pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
+ pgpa_trove_lookup_type type,
+ pgpa_identifier *rt_identifiers,
+ pgpa_plan_walker_context *walker)
+{
+ pgpa_trove_entry *entries;
+ int nentries;
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ pgpa_trove_lookup_all(trove, type, &entries, &nentries);
+ for (int i = 0; i < nentries; ++i)
+ {
+ pgpa_trove_entry *entry = &entries[i];
+ DefElem *item;
+
+ /*
+ * If this entry was fully matched, check whether generating advice
+ * from this plan would produce such an entry. If not, label the entry
+ * as failed.
+ */
+ if ((entry->flags & PGPA_TE_MATCH_FULL) != 0 &&
+ !pgpa_walker_would_advise(walker, rt_identifiers,
+ entry->tag, entry->target))
+ entry->flags |= PGPA_TE_FAILED;
+
+ item = makeDefElem(pgpa_cstring_trove_entry(entry),
+ (Node *) makeInteger(entry->flags), -1);
+ list = lappend(list, item);
+ }
+
+ return list;
+}
+
+/*
+ * Emit a WARNING tell the user about a problem with the supplied plan advice.
+ */
+static void
+pgpa_planner_feedback_warning(List *feedback)
+{
+ StringInfoData detailbuf;
+ StringInfoData flagbuf;
+
+ /* Quick exit if there's no feedback. */
+ if (feedback == NIL)
+ return;
+
+ /* Initialize buffers. */
+ initStringInfo(&detailbuf);
+ initStringInfo(&flagbuf);
+
+ /* Main loop. */
+ foreach_node(DefElem, item, feedback)
+ {
+ int flags = defGetInt32(item);
+
+ /*
+ * Don't emit anything if it was fully matched with no problems found.
+ *
+ * NB: Feedback should never be marked fully matched without also
+ * being marked partially matched.
+ */
+ if (flags == (PGPA_TE_MATCH_PARTIAL | PGPA_TE_MATCH_FULL))
+ continue;
+
+ /*
+ * Terminate each detail line except the last with a newline. This is
+ * also a convenient place to reset flagbuf.
+ */
+ if (detailbuf.len > 0)
+ {
+ appendStringInfoChar(&detailbuf, '\n');
+ resetStringInfo(&flagbuf);
+ }
+
+ /* Generate output. */
+ pgpa_trove_append_flags(&flagbuf, flags);
+ appendStringInfo(&detailbuf, _("advice %s feedback is \"%s\""),
+ item->defname, flagbuf.data);
+ }
+
+ /* Emit the warning, if any problems were found. */
+ if (detailbuf.len > 0)
+ ereport(WARNING,
+ errmsg("supplied plan advice was not enforced"),
+ errdetail("%s", detailbuf.data));
+}
+
+#ifdef USE_ASSERT_CHECKING
+
+/*
+ * Fast hash function for a key consisting of an RTI and plan name.
+ */
+static uint32
+pgpa_ri_checker_hash_key(pgpa_ri_checker_key key)
+{
+ fasthash_state hs;
+ int sp_len;
+
+ fasthash_init(&hs, 0);
+
+ hs.accum = key.rti;
+ fasthash_combine(&hs);
+
+ /* plan_name can be NULL */
+ if (key.plan_name == NULL)
+ sp_len = 0;
+ else
+ sp_len = fasthash_accum_cstring(&hs, key.plan_name);
+
+ /* hashfn_unstable.h recommends using string length as tweak */
+ return fasthash_final32(&hs, sp_len);
+}
+
+#endif
+
+/*
+ * Save the range table identifier for one relation for future cross-checking.
+ */
+static void
+pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root,
+ RelOptInfo *rel)
+{
+#ifdef USE_ASSERT_CHECKING
+ pgpa_ri_checker_key key;
+ pgpa_ri_checker *check;
+ pgpa_identifier rid;
+ const char *rid_string;
+ bool found;
+
+ key.rti = bms_singleton_member(rel->relids);
+ key.plan_name = root->plan_name;
+ pgpa_compute_identifier_by_rti(root, key.rti, &rid);
+ rid_string = pgpa_identifier_string(&rid);
+ check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found);
+ Assert(!found || strcmp(check->rid_string, rid_string) == 0);
+ check->rid_string = rid_string;
+#endif
+}
+
+/*
+ * Validate that the range table identifiers we were able to generate during
+ * planning match the ones we generated from the final plan.
+ */
+static void
+pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
+{
+#ifdef USE_ASSERT_CHECKING
+ pgpa_identifier *rt_identifiers;
+ pgpa_ri_check_iterator it;
+ pgpa_ri_checker *check;
+
+ /* Create identifiers from the planned statement. */
+ rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+
+ /* Iterate over identifiers created during planning, so we can compare. */
+ pgpa_ri_check_start_iterate(pps->ri_check_hash, &it);
+ while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL)
+ {
+ int rtoffset = 0;
+ const char *rid_string;
+ Index flat_rti;
+
+ /*
+ * If there's no plan name associated with this entry, then the
+ * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to
+ * find the rtoffset.
+ */
+ if (check->key.plan_name != NULL)
+ {
+ foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
+ {
+ /*
+ * If rtinfo->dummy is set, then the subquery's range table
+ * will only have been partially copied to the final range
+ * table. Specifically, only RTE_RELATION entries and
+ * RTE_SUBQUERY entries that were once RTE_RELATION entries
+ * will be copied, as per add_rtes_to_flat_rtable. Therefore,
+ * there's no fixed rtoffset that we can apply to the RTIs
+ * used during planning to locate the corresponding relations
+ * in the final rtable.
+ *
+ * With more complex logic, we could work around that problem
+ * by remembering the whole contents of the subquery's rtable
+ * during planning, determining which of those would have been
+ * copied to the final rtable, and matching them up. But it
+ * doesn't seem like a worthwhile endeavor for right now,
+ * because RTIs from such subqueries won't appear in the plan
+ * tree itself, just in the range table. Hence, we can neither
+ * generate nor accept advice for them.
+ */
+ if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0
+ && !rtinfo->dummy)
+ {
+ rtoffset = rtinfo->rtoffset;
+ Assert(rtoffset > 0);
+ break;
+ }
+ }
+
+ /*
+ * It's not an error if we don't find the plan name: that just
+ * means that we planned a subplan by this name but it ended up
+ * being a dummy subplan and so wasn't included in the final plan
+ * tree.
+ */
+ if (rtoffset == 0)
+ continue;
+ }
+
+ /*
+ * check->key.rti is the RTI that we saw prior to range-table
+ * flattening, so we must add the appropriate RT offset to get the
+ * final RTI.
+ */
+ flat_rti = check->key.rti + rtoffset;
+ Assert(flat_rti <= list_length(pstmt->rtable));
+
+ /* Assert that the string we compute now matches the previous one. */
+ rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]);
+ Assert(strcmp(rid_string, check->rid_string) == 0);
+ }
+#endif
+}
+
+/*
+ * Convert a bitmapset to a C string of comma-separated integers.
+ */
+static char *
+pgpa_bms_to_cstring(Bitmapset *bms)
+{
+ StringInfoData buf;
+ int x = -1;
+
+ if (bms_is_empty(bms))
+ return "none";
+
+ initStringInfo(&buf);
+ while ((x = bms_next_member(bms, x)) >= 0)
+ {
+ if (buf.len > 0)
+ appendStringInfo(&buf, ", %d", x);
+ else
+ appendStringInfo(&buf, "%d", x);
+ }
+
+ return buf.data;
+}
+
+/*
+ * Convert a JoinType to a C string.
+ */
+static const char *
+pgpa_jointype_to_cstring(JoinType jointype)
+{
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ return "inner";
+ case JOIN_LEFT:
+ return "left";
+ case JOIN_FULL:
+ return "full";
+ case JOIN_RIGHT:
+ return "right";
+ case JOIN_SEMI:
+ return "semi";
+ case JOIN_ANTI:
+ return "anti";
+ case JOIN_RIGHT_SEMI:
+ return "right semi";
+ case JOIN_RIGHT_ANTI:
+ return "right anti";
+ case JOIN_UNIQUE_OUTER:
+ return "unique outer";
+ case JOIN_UNIQUE_INNER:
+ return "unique inner";
+ }
+ return "???";
+}
diff --git a/contrib/pg_plan_advice/pgpa_planner.h b/contrib/pg_plan_advice/pgpa_planner.h
new file mode 100644
index 00000000000..7d40b910b00
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_planner.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_planner.h
+ * planner hooks
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_planner.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_PLANNER_H
+#define PGPA_PLANNER_H
+
+extern void pgpa_planner_install_hooks(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c
new file mode 100644
index 00000000000..a04f9eca8e1
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.c
@@ -0,0 +1,269 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.c
+ * analysis of scans in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+
+static pgpa_scan *pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+ pgpa_scan_strategy strategy,
+ Bitmapset *relids);
+
+
+static RTEKind unique_nonjoin_rtekind(Bitmapset *relids, List *rtable);
+
+/*
+ * Build a pgpa_scan object for a Plan node and update the plan walker
+ * context as appopriate. If this is an Append or MergeAppend scan, also
+ * build pgpa_scan for any scans that were consolidated into this one by
+ * Append/MergeAppend pull-up.
+ *
+ * If there is at least one ElidedNode for this plan node, pass the uppermost
+ * one as elided_node, else pass NULL.
+ *
+ * Set the 'beneath_any_gather' node if we are underneath a Gather or
+ * Gather Merge node (except for a single-copy Gather node, for which
+ * GATHER or GATHER_MERGE advice should not be emitted).
+ *
+ * Set the 'within_join_problem' flag if we're inside of a join problem and
+ * not otherwise.
+ */
+pgpa_scan *
+pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+ ElidedNode *elided_node,
+ bool beneath_any_gather, bool within_join_problem)
+{
+ pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY;
+ Bitmapset *relids = NULL;
+ int rti = -1;
+ List *child_append_relid_sets = NIL;
+ NodeTag nodetype = nodeTag(plan);
+
+ if (elided_node != NULL)
+ {
+ nodetype = elided_node->elided_type;
+ relids = elided_node->relids;
+
+ /*
+ * If setrefs processing elided an Append or MergeAppend node that had
+ * only one surviving child, it might be a partitionwise operation,
+ * but then this is either a setop over subqueries, or a partitionwise
+ * operation (which might be a scan or a join in reality, but here we
+ * don't care about the distinction and consider it simply a scan).
+ *
+ * A setop over subqueries, or a trivial SubQueryScan that was elided,
+ * is an "ordinary" scan i.e. one for which we need to generate advice
+ * because the planner has not made any meaningful choice.
+ */
+ if ((nodetype == T_Append || nodetype == T_MergeAppend) &&
+ unique_nonjoin_rtekind(relids,
+ walker->pstmt->rtable) == RTE_RELATION)
+ strategy = PGPA_SCAN_PARTITIONWISE;
+ else
+ strategy = PGPA_SCAN_ORDINARY;
+
+ /* Join RTIs can be present, but advice never refers to them. */
+ relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable);
+ }
+ else if ((rti = pgpa_scanrelid(plan)) != 0)
+ {
+ relids = bms_make_singleton(rti);
+
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ strategy = PGPA_SCAN_SEQ;
+ break;
+ case T_BitmapHeapScan:
+ strategy = PGPA_SCAN_BITMAP_HEAP;
+ break;
+ case T_IndexScan:
+ strategy = PGPA_SCAN_INDEX;
+ break;
+ case T_IndexOnlyScan:
+ strategy = PGPA_SCAN_INDEX_ONLY;
+ break;
+ case T_TidScan:
+ case T_TidRangeScan:
+ strategy = PGPA_SCAN_TID;
+ break;
+ default:
+
+ /*
+ * This case includes a ForeignScan targeting a single
+ * relation; no other strategy is possible in that case, but
+ * see below, where things are different in multi-relation
+ * cases.
+ */
+ strategy = PGPA_SCAN_ORDINARY;
+ break;
+ }
+ }
+ else if ((relids = pgpa_relids(plan)) != NULL)
+ {
+ switch (nodeTag(plan))
+ {
+ case T_ForeignScan:
+
+ /*
+ * If multiple relations are being targeted by a single
+ * foreign scan, then the foreign join has been pushed to the
+ * remote side, and we want that to be reflected in the
+ * generated advice.
+ */
+ strategy = PGPA_SCAN_FOREIGN;
+ break;
+ case T_Append:
+
+ /*
+ * Append nodes can represent partitionwise scans of a a
+ * relation, but when they implement a set operation, they are
+ * just ordinary scans.
+ */
+ if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+ == RTE_RELATION)
+ strategy = PGPA_SCAN_PARTITIONWISE;
+ else
+ strategy = PGPA_SCAN_ORDINARY;
+
+ /* Be sure to account for pulled-up scans. */
+ child_append_relid_sets =
+ ((Append *) plan)->child_append_relid_sets;
+ break;
+ case T_MergeAppend:
+ /* Some logic here as for Append, above. */
+ if (unique_nonjoin_rtekind(relids, walker->pstmt->rtable)
+ == RTE_RELATION)
+ strategy = PGPA_SCAN_PARTITIONWISE;
+ else
+ strategy = PGPA_SCAN_ORDINARY;
+
+ /* Be sure to account for pulled-up scans. */
+ child_append_relid_sets =
+ ((MergeAppend *) plan)->child_append_relid_sets;
+ break;
+ default:
+ strategy = PGPA_SCAN_ORDINARY;
+ break;
+ }
+
+
+ /* Join RTIs can be present, but advice never refers to them. */
+ relids = pgpa_filter_out_join_relids(relids, walker->pstmt->rtable);
+ }
+
+ /*
+ * If this is an Append or MergeAppend node into which subordinate Append
+ * or MergeAppend paths were merged, each of those merged paths is
+ * effectively another scan for which we need to account.
+ */
+ foreach_node(Bitmapset, child_relids, child_append_relid_sets)
+ {
+ Bitmapset *child_nonjoin_relids;
+
+ child_nonjoin_relids =
+ pgpa_filter_out_join_relids(child_relids,
+ walker->pstmt->rtable);
+ (void) pgpa_make_scan(walker, plan, strategy,
+ child_nonjoin_relids);
+ }
+
+ /*
+ * If this plan node has no associated RTIs, it's not a scan. When the
+ * 'within_join_problem' flag is set, that's unexpected, so throw an
+ * error, else return quietly.
+ */
+ if (relids == NULL)
+ {
+ if (within_join_problem)
+ elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan));
+ return NULL;
+ }
+
+ /*
+ * Add the appropriate set of RTIs to walker->no_gather_scans.
+ *
+ * Add nothing if we're beneath a Gather or Gather Merge node, since
+ * NO_GATHER advice is clearly inappropriate in that situation.
+ *
+ * Add nothing if this is an Append or MergeAppend node, whether or not
+ * elided. We'll emit NO_GATHER() for the underlying scan, which is good
+ * enough.
+ */
+ if (!beneath_any_gather && nodetype != T_Append &&
+ nodetype != T_MergeAppend)
+ walker->no_gather_scans =
+ bms_add_members(walker->no_gather_scans, relids);
+
+ /* Caller tells us whether NO_GATHER() advice for this scan is needed. */
+ return pgpa_make_scan(walker, plan, strategy, relids);
+}
+
+/*
+ * Create a single pgpa_scan object and update the pgpa_plan_walker_context.
+ */
+static pgpa_scan *
+pgpa_make_scan(pgpa_plan_walker_context *walker, Plan *plan,
+ pgpa_scan_strategy strategy, Bitmapset *relids)
+{
+ pgpa_scan *scan;
+
+ /* Create the scan object. */
+ scan = palloc(sizeof(pgpa_scan));
+ scan->plan = plan;
+ scan->strategy = strategy;
+ scan->relids = relids;
+
+ /* Add it to the appropriate list. */
+ walker->scans[scan->strategy] = lappend(walker->scans[scan->strategy],
+ scan);
+
+ return scan;
+}
+
+/*
+ * Determine the unique rtekind of a set of relids.
+ */
+static RTEKind
+unique_nonjoin_rtekind(Bitmapset *relids, List *rtable)
+{
+ int rti = -1;
+ bool first = true;
+ RTEKind rtekind;
+
+ Assert(relids != NULL);
+
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+ if (rte->rtekind == RTE_JOIN)
+ continue;
+
+ if (first)
+ {
+ rtekind = rte->rtekind;
+ first = false;
+ }
+ else if (rtekind != rte->rtekind)
+ elog(ERROR, "rtekind mismatch: %d vs. %d",
+ rtekind, rte->rtekind);
+ }
+
+ if (first)
+ elog(ERROR, "no non-RTE_JOIN RTEs found");
+
+ return rtekind;
+}
diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h
new file mode 100644
index 00000000000..3bb8726ff1e
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.h
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.h
+ * analysis of scans in Plan trees
+ *
+ * For purposes of this module, a "scan" includes (1) single plan nodes that
+ * scan multiple RTIs, such as a degenerate Result node that replaces what
+ * would otherwise have been a join, and (2) Append and MergeAppend nodes
+ * implementing a partitionwise scan or a partitionwise join. Said
+ * differently, scans are the leaves of the join tree for a single join
+ * problem.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_SCAN_H
+#define PGPA_SCAN_H
+
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_plan_walker_context pgpa_plan_walker_context;
+
+/*
+ * Scan strategies.
+ *
+ * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us
+ * because there is no meaningful planner decision involved. For example,
+ * the only way to scan a subquery is a SubqueryScan, and the only way to
+ * scan a VALUES construct is a ValuesScan. We need not care exactly which
+ * type of planner node was used in such cases, because the same thing will
+ * happen when replanning.
+ *
+ * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans
+ * or even joins that are proved empty. We don't know whether or not the scan
+ * or join will still be provably empty at replanning time, but if it is,
+ * then no scan-type advice is needed, and if it's not, we can't recommend
+ * a scan type based on the current plan.
+ *
+ * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can
+ * be either a partitionwise scan of a partitioned table or a partitionwise
+ * join between several partitioned tables. Note that all decisions about
+ * whether or not to use partitionwise join are meaningful: no matter what
+ * we decided this time, we could do more or fewer things partitionwise the
+ * next time.
+ *
+ * PGPA_SCAN_FOREIGN is only used when there's more than one relation involved;
+ * a single-table foreign scan is classified as ordinary, since there is no
+ * decision to make in that case.
+ *
+ * Other scan strategies map one-to-one to plan nodes.
+ */
+typedef enum
+{
+ PGPA_SCAN_ORDINARY = 0,
+ PGPA_SCAN_SEQ,
+ PGPA_SCAN_BITMAP_HEAP,
+ PGPA_SCAN_FOREIGN,
+ PGPA_SCAN_INDEX,
+ PGPA_SCAN_INDEX_ONLY,
+ PGPA_SCAN_PARTITIONWISE,
+ PGPA_SCAN_TID
+ /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */
+} pgpa_scan_strategy;
+
+#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_TID + 1)
+
+/*
+ * All of the details we need regarding a scan.
+ */
+typedef struct pgpa_scan
+{
+ Plan *plan;
+ pgpa_scan_strategy strategy;
+ Bitmapset *relids;
+} pgpa_scan;
+
+extern pgpa_scan *pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
+ ElidedNode *elided_node,
+ bool beneath_any_gather,
+ bool within_join_problem);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scanner.l b/contrib/pg_plan_advice/pgpa_scanner.l
new file mode 100644
index 00000000000..a887735f314
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scanner.l
@@ -0,0 +1,297 @@
+%top{
+/*
+ * Scanner for plan advice
+ *
+ * Copyright (c) 2000-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scanner.l
+ */
+#include "postgres.h"
+
+#include "common/string.h"
+#include "nodes/miscnodes.h"
+#include "parser/scansup.h"
+
+#include "pgpa_ast.h"
+#include "pgpa_parser.h"
+
+/*
+ * Extra data that we pass around when during scanning.
+ *
+ * 'litbuf' is used to implement the exclusive state, which handles
+ * double-quoted identifiers.
+ */
+typedef struct pgpa_yy_extra_type
+{
+ StringInfoData litbuf;
+} pgpa_yy_extra_type;
+
+}
+
+%{
+/* LCOV_EXCL_START */
+
+#define YY_DECL \
+ extern int pgpa_yylex(union YYSTYPE *yylval_param, List **result, \
+ char **parse_error_msg_p, yyscan_t yyscanner)
+
+/* No reason to constrain amount of data slurped */
+#define YY_READ_BUF_SIZE 16777216
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+ ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+%}
+
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+%option warn
+%option prefix="pgpa_yy"
+%option extra-type="pgpa_yy_extra_type *"
+
+/*
+ * What follows is a severely stripped-down version of the core scanner. We
+ * only care about recognizing identifiers with or without identifier quoting
+ * (i.e. double-quoting), decimal integers, and a small handful of other
+ * things. Keep these rules in sync with src/backend/parser/scan.l. As in that
+ * file, we use an exclusive state called 'xc' for C-style comments, and an
+ * exclusive state called 'xd' for double-quoted identifiers.
+ */
+%x xc
+%x xd
+
+ident_start [A-Za-z\200-\377_]
+ident_cont [A-Za-z\200-\377_0-9\$]
+
+identifier {ident_start}{ident_cont}*
+
+decdigit [0-9]
+decinteger {decdigit}(_?{decdigit})*
+
+space [ \t\n\r\f\v]
+whitespace {space}+
+
+dquote \"
+xdstart {dquote}
+xdstop {dquote}
+xddouble {dquote}{dquote}
+xdinside [^"]+
+
+xcstart \/\*
+xcstop \*+\/
+xcinside [^*/]+
+
+%%
+
+{whitespace} { /* ignore */ }
+
+{identifier} {
+ char *str;
+ bool fail;
+ pgpa_advice_tag_type tag;
+
+ /*
+ * Unlike the core scanner, we don't truncate identifiers
+ * here. There is no obvious reason to do so.
+ */
+ str = downcase_identifier(yytext, yyleng, false, false);
+ yylval->str = str;
+
+ /*
+ * If it's not a tag, just return TOK_IDENT; else, return
+ * a token type based on how further parsing should
+ * proceed.
+ */
+ tag = pgpa_parse_advice_tag(str, &fail);
+ if (fail)
+ return TOK_IDENT;
+ else if (tag == PGPA_TAG_JOIN_ORDER)
+ return TOK_TAG_JOIN_ORDER;
+ else if (tag == PGPA_TAG_INDEX_SCAN ||
+ tag == PGPA_TAG_INDEX_ONLY_SCAN)
+ return TOK_TAG_INDEX;
+ else if (tag == PGPA_TAG_SEQ_SCAN ||
+ tag == PGPA_TAG_TID_SCAN ||
+ tag == PGPA_TAG_BITMAP_HEAP_SCAN ||
+ tag == PGPA_TAG_NO_GATHER)
+ return TOK_TAG_SIMPLE;
+ else
+ return TOK_TAG_GENERIC;
+ }
+
+{decinteger} {
+ char *endptr;
+
+ errno = 0;
+ yylval->integer = strtoint(yytext, &endptr, 10);
+ if (*endptr != '\0' || errno == ERANGE)
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "integer out of range");
+ return TOK_INTEGER;
+ }
+
+{xcstart} {
+ BEGIN(xc);
+ }
+
+{xdstart} {
+ BEGIN(xd);
+ resetStringInfo(&yyextra->litbuf);
+ }
+
+. { return yytext[0]; }
+
+{xcstop} {
+ BEGIN(INITIAL);
+ }
+
+{xcinside} {
+ /* discard multiple characters without slash or asterisk */
+ }
+
+. {
+ /*
+ * Discard any single character. flex prefers longer
+ * matches, so this rule will never be picked when we could
+ * have matched xcstop.
+ *
+ * NB: At present, we don't bother to support nested
+ * C-style comments here, but this logic could be extended
+ * if that restriction poses a problem.
+ */
+ }
+
+<> {
+ BEGIN(INITIAL);
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "unterminated comment");
+ }
+
+{xdstop} {
+ BEGIN(INITIAL);
+ if (yyextra->litbuf.len == 0)
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "zero-length delimited identifier");
+ yylval->str = pstrdup(yyextra->litbuf.data);
+ return TOK_IDENT;
+ }
+
+{xddouble} {
+ appendStringInfoChar(&yyextra->litbuf, '"');
+ }
+
+{xdinside} {
+ appendBinaryStringInfo(&yyextra->litbuf, yytext, yyleng);
+ }
+
+<> {
+ BEGIN(INITIAL);
+ pgpa_yyerror(result, parse_error_msg_p, yyscanner,
+ "unterminated quoted identifier");
+ }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Handler for errors while scanning or parsing advice.
+ *
+ * bison passes the error message to us via 'message', and the context is
+ * available via the 'yytext' macro. We assemble those values into a final
+ * error text and then arrange to pass it back to the caller of pgpa_yyparse()
+ * by storing it into *parse_error_msg_p.
+ */
+void
+pgpa_yyerror(List **result, char **parse_error_msg_p, yyscan_t yyscanner,
+ const char *message)
+{
+ struct yyguts_t *yyg = (struct yyguts_t *) yyscanner; /* needed for yytext
+ * macro */
+
+
+ /* report only the first error in a parse operation */
+ if (*parse_error_msg_p)
+ return;
+
+ if (yytext[0])
+ *parse_error_msg_p = psprintf("%s at or near \"%s\"", message, yytext);
+ else
+ *parse_error_msg_p = psprintf("%s at end of input", message);
+}
+
+/*
+ * Initialize the advice scanner.
+ *
+ * This should be called before parsing begins.
+ */
+void
+pgpa_scanner_init(const char *str, yyscan_t *yyscannerp)
+{
+ yyscan_t yyscanner;
+ pgpa_yy_extra_type *yyext = palloc0_object(pgpa_yy_extra_type);
+
+ if (yylex_init(yyscannerp) != 0)
+ elog(ERROR, "yylex_init() failed: %m");
+
+ yyscanner = *yyscannerp;
+
+ initStringInfo(&yyext->litbuf);
+ pgpa_yyset_extra(yyext, yyscanner);
+
+ yy_scan_string(str, yyscanner);
+}
+
+
+/*
+ * Shut down the advice scanner.
+ *
+ * This should be called after parsing is complete.
+ */
+void
+pgpa_scanner_finish(yyscan_t yyscanner)
+{
+ yylex_destroy(yyscanner);
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+yyalloc(yy_size_t size, yyscan_t yyscanner)
+{
+ return palloc(size);
+}
+
+void *
+yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
+{
+ if (ptr)
+ return repalloc(ptr, size);
+ else
+ return palloc(size);
+}
+
+void
+yyfree(void *ptr, yyscan_t yyscanner)
+{
+ if (ptr)
+ pfree(ptr);
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.c b/contrib/pg_plan_advice/pgpa_trove.c
new file mode 100644
index 00000000000..e924959c010
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_trove.c
@@ -0,0 +1,516 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.c
+ * All of the advice given for a particular query, appropriately
+ * organized for convenient access.
+ *
+ * This name comes from the English expression "trove of advice", which
+ * means a collection of wisdom. This slightly unusual term is chosen to
+ * avoid naming confusion; for example, "collection of advice" would
+ * invite confusion with pgpa_collector.c. Note that, while we don't know
+ * whether the provided advice is actually wise, it's not our job to
+ * question the user's choices.
+ *
+ * The goal of this module is to make it easy to locate the specific
+ * bits of advice that pertain to any given part of a query, or to
+ * determine that there are none.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_trove.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_trove.h"
+
+#include "common/hashfn_unstable.h"
+
+/*
+ * An advice trove is organized into a series of "slices", each of which
+ * contains information about one topic e.g. scan methods. Each slice consists
+ * of an array of trove entries plus a hash table that we can use to determine
+ * which ones are relevant to a particular part of the query.
+ */
+typedef struct pgpa_trove_slice
+{
+ unsigned nallocated;
+ unsigned nused;
+ pgpa_trove_entry *entries;
+ struct pgpa_trove_entry_hash *hash;
+} pgpa_trove_slice;
+
+/*
+ * Scan advice is stored into 'scan'; join advice is stored into 'join'; and
+ * advice that can apply to both cases is stored into 'rel'. This lets callers
+ * ask just for what's relevant. These slices correspond to the possible values
+ * of pgpa_trove_lookup_type.
+ */
+struct pgpa_trove
+{
+ pgpa_trove_slice join;
+ pgpa_trove_slice rel;
+ pgpa_trove_slice scan;
+};
+
+/*
+ * We're going to build a hash table to allow clients of this module to find
+ * relevant advice for a given part of the query quickly. However, we're going
+ * to use only three of the five key fields as hash keys. There are two reasons
+ * for this.
+ *
+ * First, it's allowable to set partition_schema to NULL to match a partition
+ * with the correct name in any schema.
+ *
+ * Second, we expect the "occurrence" and "partition_schema" portions of the
+ * relation identifiers to be mostly uninteresting. Most of the time, the
+ * occurrence field will be 1 and the partition_schema values will all be the
+ * same. Even when there is some variation, the absolute number of entries
+ * that have the same values for all three of these key fields should be
+ * quite small.
+ */
+typedef struct
+{
+ const char *alias_name;
+ const char *partition_name;
+ const char *plan_name;
+} pgpa_trove_entry_key;
+
+typedef struct
+{
+ pgpa_trove_entry_key key;
+ int status;
+ Bitmapset *indexes;
+} pgpa_trove_entry_element;
+
+static uint32 pgpa_trove_entry_hash_key(pgpa_trove_entry_key key);
+
+static inline bool
+pgpa_trove_entry_compare_key(pgpa_trove_entry_key a, pgpa_trove_entry_key b)
+{
+ if (strcmp(a.alias_name, b.alias_name) != 0)
+ return false;
+
+ if (!strings_equal_or_both_null(a.partition_name, b.partition_name))
+ return false;
+
+ if (!strings_equal_or_both_null(a.plan_name, b.plan_name))
+ return false;
+
+ return true;
+}
+
+#define SH_PREFIX pgpa_trove_entry
+#define SH_ELEMENT_TYPE pgpa_trove_entry_element
+#define SH_KEY_TYPE pgpa_trove_entry_key
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) pgpa_trove_entry_hash_key(key)
+#define SH_EQUAL(tb, a, b) pgpa_trove_entry_compare_key(a, b)
+#define SH_SCOPE static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static void pgpa_init_trove_slice(pgpa_trove_slice *tslice);
+static void pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+ pgpa_advice_tag_type tag,
+ pgpa_advice_target *target);
+static void pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash,
+ pgpa_advice_target *target,
+ int index);
+static Bitmapset *pgpa_trove_slice_lookup(pgpa_trove_slice *tslice,
+ pgpa_identifier *rid);
+
+/*
+ * Build a trove of advice from a list of advice items.
+ *
+ * Caller can obtain a list of advice items to pass to this function by
+ * calling pgpa_parse().
+ */
+pgpa_trove *
+pgpa_build_trove(List *advice_items)
+{
+ pgpa_trove *trove = palloc_object(pgpa_trove);
+
+ pgpa_init_trove_slice(&trove->join);
+ pgpa_init_trove_slice(&trove->rel);
+ pgpa_init_trove_slice(&trove->scan);
+
+ foreach_ptr(pgpa_advice_item, item, advice_items)
+ {
+ switch (item->tag)
+ {
+ case PGPA_TAG_JOIN_ORDER:
+ {
+ pgpa_advice_target *target;
+
+ /*
+ * For most advice types, each element in the top-level
+ * list is a separate target, but it's most convenient to
+ * regard the entirety of a JOIN_ORDER specification as a
+ * single target. Since it wasn't represented that way
+ * during parsing, build a surrogate object now.
+ */
+ target = palloc0_object(pgpa_advice_target);
+ target->ttype = PGPA_TARGET_ORDERED_LIST;
+ target->children = item->targets;
+
+ pgpa_trove_add_to_slice(&trove->join,
+ item->tag, target);
+ }
+ break;
+
+ case PGPA_TAG_BITMAP_HEAP_SCAN:
+ case PGPA_TAG_INDEX_ONLY_SCAN:
+ case PGPA_TAG_INDEX_SCAN:
+ case PGPA_TAG_SEQ_SCAN:
+ case PGPA_TAG_TID_SCAN:
+
+ /*
+ * Scan advice.
+ */
+ foreach_ptr(pgpa_advice_target, target, item->targets)
+ {
+ /*
+ * For now, all of our scan types target single relations,
+ * but in the future this might not be true, e.g. a custom
+ * scan could replace a join.
+ */
+ Assert(target->ttype == PGPA_TARGET_IDENTIFIER);
+ pgpa_trove_add_to_slice(&trove->scan,
+ item->tag, target);
+ }
+ break;
+
+ case PGPA_TAG_FOREIGN_JOIN:
+ case PGPA_TAG_HASH_JOIN:
+ case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+ case PGPA_TAG_MERGE_JOIN_PLAIN:
+ case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+ case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+ case PGPA_TAG_NESTED_LOOP_PLAIN:
+ case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+ case PGPA_TAG_SEMIJOIN_UNIQUE:
+
+ /*
+ * Join strategy advice.
+ */
+ foreach_ptr(pgpa_advice_target, target, item->targets)
+ {
+ pgpa_trove_add_to_slice(&trove->join,
+ item->tag, target);
+ }
+ break;
+
+ case PGPA_TAG_PARTITIONWISE:
+ case PGPA_TAG_GATHER:
+ case PGPA_TAG_GATHER_MERGE:
+ case PGPA_TAG_NO_GATHER:
+
+ /*
+ * Advice about a RelOptInfo relevant to both scans and joins.
+ */
+ foreach_ptr(pgpa_advice_target, target, item->targets)
+ {
+ pgpa_trove_add_to_slice(&trove->rel,
+ item->tag, target);
+ }
+ break;
+ }
+ }
+
+ return trove;
+}
+
+/*
+ * Search a trove of advice for relevant entries.
+ *
+ * All parameters are input parameters except for *result, which is an output
+ * parameter used to return results to the caller.
+ */
+void
+pgpa_trove_lookup(pgpa_trove *trove, pgpa_trove_lookup_type type,
+ int nrids, pgpa_identifier *rids, pgpa_trove_result *result)
+{
+ pgpa_trove_slice *tslice;
+ Bitmapset *indexes;
+
+ Assert(nrids > 0);
+
+ if (type == PGPA_TROVE_LOOKUP_SCAN)
+ tslice = &trove->scan;
+ else if (type == PGPA_TROVE_LOOKUP_JOIN)
+ tslice = &trove->join;
+ else
+ tslice = &trove->rel;
+
+ indexes = pgpa_trove_slice_lookup(tslice, &rids[0]);
+ for (int i = 1; i < nrids; ++i)
+ {
+ Bitmapset *other_indexes;
+
+ /*
+ * If the caller is asking about two relations that aren't part of the
+ * same subquery, they've messed up.
+ */
+ Assert(strings_equal_or_both_null(rids[0].plan_name,
+ rids[i].plan_name));
+
+ other_indexes = pgpa_trove_slice_lookup(tslice, &rids[i]);
+ indexes = bms_union(indexes, other_indexes);
+ }
+
+ result->entries = tslice->entries;
+ result->indexes = indexes;
+}
+
+/*
+ * Return all entries in a trove slice to the caller.
+ *
+ * The first two arguments are input arguments, and the remainder are output
+ * arguments.
+ */
+void
+pgpa_trove_lookup_all(pgpa_trove *trove, pgpa_trove_lookup_type type,
+ pgpa_trove_entry **entries, int *nentries)
+{
+ pgpa_trove_slice *tslice;
+
+ if (type == PGPA_TROVE_LOOKUP_SCAN)
+ tslice = &trove->scan;
+ else if (type == PGPA_TROVE_LOOKUP_JOIN)
+ tslice = &trove->join;
+ else
+ tslice = &trove->rel;
+
+ *entries = tslice->entries;
+ *nentries = tslice->nused;
+}
+
+/*
+ * Convert a trove entry to an item of plan advice that would produce it.
+ */
+char *
+pgpa_cstring_trove_entry(pgpa_trove_entry *entry)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ appendStringInfo(&buf, "%s", pgpa_cstring_advice_tag(entry->tag));
+
+ /* JOIN_ORDER tags are transformed by pgpa_build_trove; undo that here */
+ if (entry->tag != PGPA_TAG_JOIN_ORDER)
+ appendStringInfoChar(&buf, '(');
+ else
+ Assert(entry->target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+ pgpa_format_advice_target(&buf, entry->target);
+
+ if (entry->target->itarget != NULL)
+ {
+ appendStringInfoChar(&buf, ' ');
+ pgpa_format_index_target(&buf, entry->target->itarget);
+ }
+
+ if (entry->tag != PGPA_TAG_JOIN_ORDER)
+ appendStringInfoChar(&buf, ')');
+
+ return buf.data;
+}
+
+/*
+ * Set PGPA_TE_* flags on a set of trove entries.
+ */
+void
+pgpa_trove_set_flags(pgpa_trove_entry *entries, Bitmapset *indexes, int flags)
+{
+ int i = -1;
+
+ while ((i = bms_next_member(indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *entry = &entries[i];
+
+ entry->flags |= flags;
+ }
+}
+
+/*
+ * Append a string representation of the specified PGPA_TE_* flags to the
+ * given StringInfo.
+ */
+void
+pgpa_trove_append_flags(StringInfo buf, int flags)
+{
+ if ((flags & PGPA_TE_MATCH_FULL) != 0)
+ {
+ Assert((flags & PGPA_TE_MATCH_PARTIAL) != 0);
+ appendStringInfo(buf, "matched");
+ }
+ else if ((flags & PGPA_TE_MATCH_PARTIAL) != 0)
+ appendStringInfo(buf, "partially matched");
+ else
+ appendStringInfo(buf, "not matched");
+ if ((flags & PGPA_TE_INAPPLICABLE) != 0)
+ appendStringInfo(buf, ", inapplicable");
+ if ((flags & PGPA_TE_CONFLICTING) != 0)
+ appendStringInfo(buf, ", conflicting");
+ if ((flags & PGPA_TE_FAILED) != 0)
+ appendStringInfo(buf, ", failed");
+}
+
+/*
+ * Add a new advice target to an existing pgpa_trove_slice object.
+ */
+static void
+pgpa_trove_add_to_slice(pgpa_trove_slice *tslice,
+ pgpa_advice_tag_type tag,
+ pgpa_advice_target *target)
+{
+ pgpa_trove_entry *entry;
+
+ if (tslice->nused >= tslice->nallocated)
+ {
+ int new_allocated;
+
+ new_allocated = tslice->nallocated * 2;
+ tslice->entries = repalloc_array(tslice->entries, pgpa_trove_entry,
+ new_allocated);
+ tslice->nallocated = new_allocated;
+ }
+
+ entry = &tslice->entries[tslice->nused];
+ entry->tag = tag;
+ entry->target = target;
+ entry->flags = 0;
+
+ pgpa_trove_add_to_hash(tslice->hash, target, tslice->nused);
+
+ tslice->nused++;
+}
+
+/*
+ * Update the hash table for a newly-added advice target.
+ */
+static void
+pgpa_trove_add_to_hash(pgpa_trove_entry_hash *hash, pgpa_advice_target *target,
+ int index)
+{
+ pgpa_trove_entry_key key;
+ pgpa_trove_entry_element *element;
+ bool found;
+
+ /* For non-identifiers, add entries for all descendents. */
+ if (target->ttype != PGPA_TARGET_IDENTIFIER)
+ {
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ pgpa_trove_add_to_hash(hash, child_target, index);
+ }
+ return;
+ }
+
+ /* Sanity checks. */
+ Assert(target->rid.occurrence > 0);
+ Assert(target->rid.alias_name != NULL);
+
+ /* Add an entry for this relation identifier. */
+ key.alias_name = target->rid.alias_name;
+ key.partition_name = target->rid.partrel;
+ key.plan_name = target->rid.plan_name;
+ element = pgpa_trove_entry_insert(hash, key, &found);
+ if (!found)
+ element->indexes = NULL;
+ element->indexes = bms_add_member(element->indexes, index);
+}
+
+/*
+ * Create and initialize a new pgpa_trove_slice object.
+ */
+static void
+pgpa_init_trove_slice(pgpa_trove_slice *tslice)
+{
+ /*
+ * In an ideal world, we'll make tslice->nallocated big enough that the
+ * array and hash table will be large enough to contain the number of
+ * advice items in this trove slice, but a generous default value is not
+ * good for performance, because pgpa_init_trove_slice() has to zero an
+ * amount of memory proportional to tslice->nallocated. Hence, we keep the
+ * starting value quite small, on the theory that advice strings will
+ * often be relatively short.
+ */
+ tslice->nallocated = 16;
+ tslice->nused = 0;
+ tslice->entries = palloc_array(pgpa_trove_entry, tslice->nallocated);
+ tslice->hash = pgpa_trove_entry_create(CurrentMemoryContext,
+ tslice->nallocated, NULL);
+}
+
+/*
+ * Fast hash function for a key consisting of alias_name, partition_name,
+ * and plan_name.
+ */
+static uint32
+pgpa_trove_entry_hash_key(pgpa_trove_entry_key key)
+{
+ fasthash_state hs;
+ int sp_len;
+
+ fasthash_init(&hs, 0);
+
+ /* alias_name may not be NULL */
+ sp_len = fasthash_accum_cstring(&hs, key.alias_name);
+
+ /* partition_name and plan_name, however, can be NULL */
+ if (key.partition_name != NULL)
+ sp_len += fasthash_accum_cstring(&hs, key.partition_name);
+ if (key.plan_name != NULL)
+ sp_len += fasthash_accum_cstring(&hs, key.plan_name);
+
+ /*
+ * hashfn_unstable.h recommends using string length as tweak. It's not
+ * clear to me what to do if there are multiple strings, so for now I'm
+ * just using the total of all of the lengths.
+ */
+ return fasthash_final32(&hs, sp_len);
+}
+
+/*
+ * Look for matching entries.
+ */
+static Bitmapset *
+pgpa_trove_slice_lookup(pgpa_trove_slice *tslice, pgpa_identifier *rid)
+{
+ pgpa_trove_entry_key key;
+ pgpa_trove_entry_element *element;
+ Bitmapset *result = NULL;
+
+ Assert(rid->occurrence >= 1);
+
+ key.alias_name = rid->alias_name;
+ key.partition_name = rid->partrel;
+ key.plan_name = rid->plan_name;
+
+ element = pgpa_trove_entry_lookup(tslice->hash, key);
+
+ if (element != NULL)
+ {
+ int i = -1;
+
+ while ((i = bms_next_member(element->indexes, i)) >= 0)
+ {
+ pgpa_trove_entry *entry = &tslice->entries[i];
+
+ /*
+ * We know that this target or one of its descendents matches the
+ * identifier on the three key fields above, but we don't know
+ * which descendent or whether the occurence and schema also
+ * match.
+ */
+ if (pgpa_identifier_matches_target(rid, entry->target))
+ result = bms_add_member(result, i);
+ }
+ }
+
+ return result;
+}
diff --git a/contrib/pg_plan_advice/pgpa_trove.h b/contrib/pg_plan_advice/pgpa_trove.h
new file mode 100644
index 00000000000..a1b75af724a
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_trove.h
@@ -0,0 +1,114 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_trove.h
+ * All of the advice given for a particular query, appropriately
+ * organized for convenient access.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_trove.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_TROVE_H
+#define PGPA_TROVE_H
+
+#include "pgpa_ast.h"
+
+#include "nodes/bitmapset.h"
+
+typedef struct pgpa_trove pgpa_trove;
+
+/*
+ * Flags that can be set on a pgpa_trove_entry to indicate what happened when
+ * trying to plan using advice.
+ *
+ * PGPA_TE_MATCH_PARTIAL means that we found some part of the query that at
+ * least partially matched the target; e.g. given JOIN_ORDER(a b), this would
+ * be set if we ever saw any joinrel including either "a" or "b".
+ *
+ * PGPA_TE_MATCH_FULL means that we found an exact match for the target; e.g.
+ * given JOIN_ORDER(a b), this would be set if we saw a joinrel containing
+ * exactly "a" and "b" and nothing else.
+ *
+ * PGPA_TE_INAPPLICABLE means that the advice doesn't properly apply to the
+ * target; e.g. INDEX_SCAN(foo bar_idx) would be so marked if bar_idx does not
+ * exist on foo. The fact that this bit has been set does not mean that the
+ * advice had no effect.
+ *
+ * PGPA_TE_CONFLICTING means that a conflict was detected between what this
+ * advice wants and what some other plan advice wants; e.g. JOIN_ORDER(a b)
+ * would conflict with HASH_JOIN(a), because the former requires "a" to be the
+ * outer table while the latter requires it to be the inner table.
+ *
+ * PGPA_TE_FAILED means that the resulting plan did not conform to the advice.
+ */
+#define PGPA_TE_MATCH_PARTIAL 0x0001
+#define PGPA_TE_MATCH_FULL 0x0002
+#define PGPA_TE_INAPPLICABLE 0x0004
+#define PGPA_TE_CONFLICTING 0x0008
+#define PGPA_TE_FAILED 0x0010
+
+/*
+ * Each entry in a trove of advice represents the application of a tag to
+ * a single target.
+ */
+typedef struct pgpa_trove_entry
+{
+ pgpa_advice_tag_type tag;
+ pgpa_advice_target *target;
+ int flags;
+} pgpa_trove_entry;
+
+/*
+ * What kind of information does the caller want to find in a trove?
+ *
+ * PGPA_TROVE_LOOKUP_SCAN means we're looking for scan advice.
+ *
+ * PGPA_TROVE_LOOKUP_JOIN means we're looking for join-related advice.
+ * This includes join order advice, join method advice, and semijoin-uniqueness
+ * advice.
+ *
+ * PGPA_TROVE_LOOKUP_REL means we're looking for general advice about this
+ * a RelOptInfo that may correspond to either a scan or a join. This includes
+ * gather-related advice and partitionwise advice. Note that partitionwise
+ * advice might seem like join advice, but that's not a helpful way of viewing
+ * the matter because (1) partitionwise advice is also relevant at the scan
+ * level and (2) other types of join advice affect only what to do from
+ * join_path_setup_hook, but partitionwise advice affects what to do in
+ * joinrel_setup_hook.
+ */
+typedef enum pgpa_trove_lookup_type
+{
+ PGPA_TROVE_LOOKUP_JOIN,
+ PGPA_TROVE_LOOKUP_REL,
+ PGPA_TROVE_LOOKUP_SCAN
+} pgpa_trove_lookup_type;
+
+/*
+ * This struct is used to store the result of a trove lookup. For each member
+ * of "indexes", the entry at the corresponding offset within "entries" is one
+ * of the results.
+ */
+typedef struct pgpa_trove_result
+{
+ pgpa_trove_entry *entries;
+ Bitmapset *indexes;
+} pgpa_trove_result;
+
+extern pgpa_trove *pgpa_build_trove(List *advice_items);
+extern void pgpa_trove_lookup(pgpa_trove *trove,
+ pgpa_trove_lookup_type type,
+ int nrids,
+ pgpa_identifier *rids,
+ pgpa_trove_result *result);
+extern void pgpa_trove_lookup_all(pgpa_trove *trove,
+ pgpa_trove_lookup_type type,
+ pgpa_trove_entry **entries,
+ int *nentries);
+extern char *pgpa_cstring_trove_entry(pgpa_trove_entry *entry);
+extern void pgpa_trove_set_flags(pgpa_trove_entry *entries,
+ Bitmapset *indexes, int flags);
+extern void pgpa_trove_append_flags(StringInfo buf, int flags);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c
new file mode 100644
index 00000000000..86a6c921f16
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.c
@@ -0,0 +1,1029 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.c
+ * Main entrypoints for analyzing a plan to generate an advice string
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_walker.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/plannodes.h"
+#include "parser/parsetree.h"
+#include "utils/lsyscache.h"
+
+static void pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+ bool within_join_problem,
+ pgpa_join_unroller *join_unroller,
+ List *active_query_features,
+ bool beneath_any_gather);
+static Bitmapset *pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+ pgpa_unrolled_join *ujoin);
+
+static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Plan *plan);
+
+static void pgpa_qf_add_rti(List *active_query_features, Index rti);
+static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids);
+static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan,
+ List *rtable);
+
+static bool pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+ Index rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_target *target,
+ bool toplevel);
+static bool pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+ Index rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_target *target);
+static pgpa_scan *pgpa_walker_find_scan(pgpa_plan_walker_context *walker,
+ pgpa_scan_strategy strategy,
+ Bitmapset *relids);
+static bool pgpa_walker_index_target_matches_plan(pgpa_index_target *itarget,
+ Plan *plan);
+static bool pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Bitmapset *relids);
+static bool pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+ pgpa_join_strategy strategy,
+ Bitmapset *relids);
+static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+ Bitmapset *relids);
+
+/*
+ * Top-level entrypoint for the plan tree walk.
+ *
+ * Populates walker based on a traversal of the Plan trees in pstmt.
+ *
+ * sj_unique_rels is a list of pgpa_sj_unique_rel objects, one for each
+ * relation we considered making unique as part of semijoin planning.
+ */
+void
+pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt,
+ List *sj_unique_rels)
+{
+ ListCell *lc;
+ List *sj_unique_rtis = NULL;
+ List *sj_nonunique_qfs = NULL;
+
+ /* Initialization. */
+ memset(walker, 0, sizeof(pgpa_plan_walker_context));
+ walker->pstmt = pstmt;
+
+ /* Walk the main plan tree. */
+ pgpa_walk_recursively(walker, pstmt->planTree, 0, NULL, NIL, false);
+
+ /* Main plan tree walk won't reach subplans, so walk those. */
+ foreach(lc, pstmt->subplans)
+ {
+ Plan *plan = lfirst(lc);
+
+ if (plan != NULL)
+ pgpa_walk_recursively(walker, plan, 0, NULL, NIL, false);
+ }
+
+ /* Adjust RTIs from sj_unique_rels for the flattened range table. */
+ foreach_ptr(pgpa_sj_unique_rel, ur, sj_unique_rels)
+ {
+ int rtindex = -1;
+ int rtoffset = 0;
+ bool dummy = false;
+ Bitmapset *relids = NULL;
+
+ /* If this is a subplan, find the range table offset. */
+ if (ur->plan_name != NULL)
+ {
+ foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
+ {
+ if (strcmp(ur->plan_name, rtinfo->plan_name) == 0)
+ {
+ rtoffset = rtinfo->rtoffset;
+ dummy = rtinfo->dummy;
+ break;
+ }
+ }
+
+ if (rtoffset == 0)
+ elog(ERROR, "no rtoffset for plan %s", ur->plan_name);
+ }
+
+ /* If this entry pertains to a dummy subquery, ignore it. */
+ if (dummy)
+ continue;
+
+ /* Offset each entry from the original set. */
+ while ((rtindex = bms_next_member(ur->relids, rtindex)) >= 0)
+ relids = bms_add_member(relids, rtindex + rtoffset);
+
+ /* Store the resulting set. */
+ sj_unique_rtis = lappend(sj_unique_rtis, relids);
+ }
+
+ /*
+ * Remove any non-unique semjoin query features for which making the rel
+ * unique wasn't considered.
+ */
+ foreach_ptr(pgpa_query_feature, qf,
+ walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE])
+ {
+ if (list_member(sj_unique_rtis, qf->relids))
+ sj_nonunique_qfs = lappend(sj_nonunique_qfs, qf);
+ }
+ walker->query_features[PGPAQF_SEMIJOIN_NON_UNIQUE] = sj_nonunique_qfs;
+
+ /*
+ * If we find any cases where analysis of the Plan tree shows that the
+ * semijoin was made unique but this possibility was never observed to be
+ * considered during planning, then we have a bug somewhere.
+ */
+ foreach_ptr(pgpa_query_feature, qf,
+ walker->query_features[PGPAQF_SEMIJOIN_UNIQUE])
+ {
+ if (!list_member(sj_unique_rtis, qf->relids))
+ {
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ outBitmapset(&buf, qf->relids);
+ elog(ERROR,
+ "unique semijoin found for relids %s but not observed during planning",
+ buf.data);
+ }
+ }
+
+ /*
+ * It's possible for a Gather or Gather Merge query feature to find no
+ * RTIs when partitionwise aggregation is in use. We shouldn't emit
+ * something like GATHER_MERGE(()), so instead emit nothing. This means
+ * that we won't advise either GATHER or GATHER_MERGE or NO_GATHER in such
+ * cases, which might be something we want to improve in the future.
+ *
+ * (Should the Partial Aggregates in such a case be created in an
+ * UPPERREL_GROUP_AGG with a non-empty relid set? Right now that doesn't
+ * happen, but it seems like it would make life easier for us if it did.)
+ */
+ for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t)
+ {
+ List *query_features = NIL;
+
+ foreach_ptr(pgpa_query_feature, qf, walker->query_features[t])
+ {
+ if (qf->relids != NULL)
+ query_features = lappend(query_features, qf);
+ else
+ Assert(t == PGPAQF_GATHER || t == PGPAQF_GATHER_MERGE);
+ }
+
+ walker->query_features[t] = query_features;
+ }
+}
+
+/*
+ * Main workhorse for the plan tree walk.
+ *
+ * If within_join_problem is true, we encountered a join at some higher level
+ * of the tree walk and haven't yet descended out of the portion of the plan
+ * tree that is part of that same join problem. We're no longer in the same
+ * join problem if (1) we cross into a different subquery or (2) we descend
+ * through an Append or MergeAppend node, below which any further joins would
+ * be partitionwise joins planned separately from the outer join problem.
+ *
+ * If join_unroller != NULL, the join unroller code expects us to find a join
+ * that should be unrolled into that object. This implies that we're within a
+ * join problem, but the reverse is not true: when we've traversed all the
+ * joins but are still looking for the scan that is the leaf of the join tree,
+ * join_unroller will be NULL but within_join_problem will be true.
+ *
+ * Each element of active_query_features corresponds to some item of advice
+ * that needs to enumerate all the relations it affects. We add RTIs we find
+ * during tree traversal to each of these query features.
+ *
+ * If beneath_any_gather == true, some higher level of the tree traversal found
+ * a Gather or Gather Merge node.
+ */
+static void
+pgpa_walk_recursively(pgpa_plan_walker_context *walker, Plan *plan,
+ bool within_join_problem,
+ pgpa_join_unroller *join_unroller,
+ List *active_query_features,
+ bool beneath_any_gather)
+{
+ pgpa_join_unroller *outer_join_unroller = NULL;
+ pgpa_join_unroller *inner_join_unroller = NULL;
+ bool join_unroller_toplevel = false;
+ ListCell *lc;
+ List *extraplans = NIL;
+ List *elided_nodes = NIL;
+
+ Assert(within_join_problem || join_unroller == NULL);
+
+ /*
+ * Check the future_query_features list to see whether this was previously
+ * identified as a plan node that needs to be treated as a query feature.
+ * We must do this before handling elided nodes, because if there's an
+ * elided node associated with a future query feature, the RTIs associated
+ * with the elided node should be the only ones attributed to the query
+ * feature.
+ */
+ foreach_ptr(pgpa_query_feature, qf, walker->future_query_features)
+ {
+ if (qf->plan == plan)
+ {
+ active_query_features = list_copy(active_query_features);
+ active_query_features = lappend(active_query_features, qf);
+ walker->future_query_features =
+ list_delete_ptr(walker->future_query_features, plan);
+ break;
+ }
+ }
+
+ /*
+ * Find all elided nodes for this Plan node.
+ */
+ foreach_node(ElidedNode, n, walker->pstmt->elidedNodes)
+ {
+ if (n->plan_node_id == plan->plan_node_id)
+ elided_nodes = lappend(elided_nodes, n);
+ }
+
+ /* If we found any elided_nodes, handle them. */
+ if (elided_nodes != NIL)
+ {
+ int num_elided_nodes = list_length(elided_nodes);
+ ElidedNode *last_elided_node;
+
+ /*
+ * RTIs for the final -- and thus logically uppermost -- elided node
+ * should be collected for query features passed down by the caller.
+ * However, elided nodes act as barriers to query features, which
+ * means that (1) the remaining elided nodes, if any, should be
+ * ignored for purposes of query features and (2) the list of active
+ * query features should be reset to empty so that we do not add RTIs
+ * from the plan node that is logically beneath the elided node to the
+ * query features passed down from the caller.
+ */
+ last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1);
+ pgpa_qf_add_rtis(active_query_features,
+ pgpa_filter_out_join_relids(last_elided_node->relids,
+ walker->pstmt->rtable));
+ active_query_features = NIL;
+
+ /*
+ * If we're within a join problem, the join_unroller is responsible
+ * for building the scan for the final elided node, so throw it out.
+ */
+ if (within_join_problem)
+ elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1);
+
+ /* Build scans for all (or the remaining) elided nodes. */
+ foreach_node(ElidedNode, elided_node, elided_nodes)
+ {
+ (void) pgpa_build_scan(walker, plan, elided_node,
+ beneath_any_gather, within_join_problem);
+ }
+
+ /*
+ * If there were any elided nodes, then everything beneath those nodes
+ * is not part of the same join problem.
+ *
+ * In more detail, if an Append or MergeAppend was elided, then a
+ * partitionwise join was chosen and only a single child survived; if
+ * a SubqueryScan was elided, the subquery was planned without
+ * flattening it into the parent.
+ */
+ within_join_problem = false;
+ join_unroller = NULL;
+ }
+
+ /*
+ * If this is a Gather or Gather Merge node, directly add it to the list
+ * of currently-active query features. We must do this after handling
+ * elided nodes, since the Gather or Gather Merge node occurs logically
+ * beneath any associated elided nodes.
+ *
+ * Exception: We disregard any single_copy Gather nodes. These are created
+ * by debug_parallel_query, and having them affect the plan advice is
+ * counterproductive, as the result will be to advise the use of a real
+ * Gather node, rather than a single copy one.
+ */
+ if (IsA(plan, Gather) && !((Gather *) plan)->single_copy)
+ {
+ active_query_features =
+ lappend(list_copy(active_query_features),
+ pgpa_add_feature(walker, PGPAQF_GATHER, plan));
+ beneath_any_gather = true;
+ }
+ else if (IsA(plan, GatherMerge))
+ {
+ active_query_features =
+ lappend(list_copy(active_query_features),
+ pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan));
+ beneath_any_gather = true;
+ }
+
+ /*
+ * If we're within a join problem, the join unroller is responsible for
+ * building any required scan for this node. If not, we do it here.
+ */
+ if (!within_join_problem)
+ (void) pgpa_build_scan(walker, plan, NULL, beneath_any_gather, false);
+
+ /*
+ * If this join needs to unrolled but there's no join unroller already
+ * available, create one.
+ */
+ if (join_unroller == NULL && pgpa_is_join(plan))
+ {
+ join_unroller = pgpa_create_join_unroller();
+ join_unroller_toplevel = true;
+ within_join_problem = true;
+ }
+
+ /*
+ * If this join is to be unrolled, pgpa_unroll_join() will return the join
+ * unroller object that should be passed down when we recurse into the
+ * outer and inner sides of the plan.
+ */
+ if (join_unroller != NULL)
+ pgpa_unroll_join(walker, plan, beneath_any_gather, join_unroller,
+ &outer_join_unroller, &inner_join_unroller);
+
+ /* Add RTIs from the plan node to all active query features. */
+ pgpa_qf_add_plan_rtis(active_query_features, plan, walker->pstmt->rtable);
+
+ /*
+ * Recurse into the outer and inner subtrees.
+ *
+ * As an exception, if this is a ForeignScan, don't recurse. postgres_fdw
+ * sometimes stores an EPQ recheck plan in plan->leftree, but that's going
+ * to mention the same set of relations as the ForeignScan itself, and we
+ * have no way to emit advice targeting the EPQ case vs. the non-EPQ case.
+ * Moreover, it's not entirely clear what other FDWs might do with the
+ * left and right subtrees. Maybe some better handling is needed here, but
+ * for now, we just punt.
+ */
+ if (!IsA(plan, ForeignScan))
+ {
+ if (plan->lefttree != NULL)
+ pgpa_walk_recursively(walker, plan->lefttree, within_join_problem,
+ outer_join_unroller, active_query_features,
+ beneath_any_gather);
+ if (plan->righttree != NULL)
+ pgpa_walk_recursively(walker, plan->righttree, within_join_problem,
+ inner_join_unroller, active_query_features,
+ beneath_any_gather);
+ }
+
+ /*
+ * If we created a join unroller up above, then it's also our join to use
+ * it to build the final pgpa_unrolled_join, and to destroy the object.
+ */
+ if (join_unroller_toplevel)
+ {
+ pgpa_unrolled_join *ujoin;
+
+ ujoin = pgpa_build_unrolled_join(walker, join_unroller);
+ walker->toplevel_unrolled_joins =
+ lappend(walker->toplevel_unrolled_joins, ujoin);
+ pgpa_destroy_join_unroller(join_unroller);
+ (void) pgpa_process_unrolled_join(walker, ujoin);
+ }
+
+ /*
+ * Some plan types can have additional children. Nodes like Append that
+ * can have any number of children store them in a List; a SubqueryScan
+ * just has a field for a single additional Plan.
+ */
+ switch (nodeTag(plan))
+ {
+ case T_Append:
+ {
+ Append *aplan = (Append *) plan;
+
+ extraplans = aplan->appendplans;
+ }
+ break;
+ case T_MergeAppend:
+ {
+ MergeAppend *maplan = (MergeAppend *) plan;
+
+ extraplans = maplan->mergeplans;
+ }
+ break;
+ case T_BitmapAnd:
+ extraplans = ((BitmapAnd *) plan)->bitmapplans;
+ break;
+ case T_BitmapOr:
+ extraplans = ((BitmapOr *) plan)->bitmapplans;
+ break;
+ case T_SubqueryScan:
+
+ /*
+ * We don't pass down active_query_features across here, because
+ * those are specific to a subquery level.
+ */
+ pgpa_walk_recursively(walker, ((SubqueryScan *) plan)->subplan,
+ 0, NULL, NIL, beneath_any_gather);
+ break;
+ case T_CustomScan:
+ extraplans = ((CustomScan *) plan)->custom_plans;
+ break;
+ default:
+ break;
+ }
+
+ /* If we found a list of extra children, iterate over it. */
+ foreach(lc, extraplans)
+ {
+ Plan *subplan = lfirst(lc);
+
+ pgpa_walk_recursively(walker, subplan, 0, NULL, NIL,
+ beneath_any_gather);
+ }
+}
+
+/*
+ * Perform final processing of a newly-constructed pgpa_unrolled_join. This
+ * only needs to be called for toplevel pgpa_unrolled_join objects, since it
+ * recurses to sub-joins as needed.
+ *
+ * Our goal is to add the set of inner relids to the relevant join_strategies
+ * list, and to do the same for any sub-joins. To that end, the return value
+ * is the set of relids found beneath the the join, but it is expected that
+ * the toplevel caller will ignore this.
+ */
+static Bitmapset *
+pgpa_process_unrolled_join(pgpa_plan_walker_context *walker,
+ pgpa_unrolled_join *ujoin)
+{
+ Bitmapset *all_relids = bms_copy(ujoin->outer.scan->relids);
+
+ /* If this fails, we didn't unroll properly. */
+ Assert(ujoin->outer.unrolled_join == NULL);
+
+ for (int k = 0; k < ujoin->ninner; ++k)
+ {
+ pgpa_join_member *member = &ujoin->inner[k];
+ Bitmapset *relids;
+
+ if (member->unrolled_join != NULL)
+ relids = pgpa_process_unrolled_join(walker,
+ member->unrolled_join);
+ else
+ {
+ Assert(member->scan != NULL);
+ relids = member->scan->relids;
+ }
+ walker->join_strategies[ujoin->strategy[k]] =
+ lappend(walker->join_strategies[ujoin->strategy[k]], relids);
+ all_relids = bms_add_members(all_relids, relids);
+ }
+
+ return all_relids;
+}
+
+/*
+ * Arrange for the given plan node to be treated as a query feature when the
+ * tree walk reaches it.
+ *
+ * Make sure to only use this for nodes that the tree walk can't have reached
+ * yet!
+ */
+void
+pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type, Plan *plan)
+{
+ pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan);
+
+ walker->future_query_features =
+ lappend(walker->future_query_features, qf);
+}
+
+/*
+ * Return the last of any elided nodes associated with this plan node ID.
+ *
+ * The last elided node is the one that would have been uppermost in the plan
+ * tree had it not been removed during setrefs processig.
+ */
+ElidedNode *
+pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
+{
+ ElidedNode *elided_node = NULL;
+
+ foreach_node(ElidedNode, n, pstmt->elidedNodes)
+ {
+ if (n->plan_node_id == plan->plan_node_id)
+ elided_node = n;
+ }
+
+ return elided_node;
+}
+
+/*
+ * Certain plan nodes can refer to a set of RTIs. Extract and return the set.
+ */
+Bitmapset *
+pgpa_relids(Plan *plan)
+{
+ if (IsA(plan, Result))
+ return ((Result *) plan)->relids;
+ else if (IsA(plan, ForeignScan))
+ return ((ForeignScan *) plan)->fs_relids;
+ else if (IsA(plan, Append))
+ return ((Append *) plan)->apprelids;
+ else if (IsA(plan, MergeAppend))
+ return ((MergeAppend *) plan)->apprelids;
+
+ return NULL;
+}
+
+/*
+ * Extract the scanned RTI from a plan node.
+ *
+ * Returns 0 if there isn't one.
+ */
+Index
+pgpa_scanrelid(Plan *plan)
+{
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ case T_SampleScan:
+ case T_BitmapHeapScan:
+ case T_TidScan:
+ case T_TidRangeScan:
+ case T_SubqueryScan:
+ case T_FunctionScan:
+ case T_TableFuncScan:
+ case T_ValuesScan:
+ case T_CteScan:
+ case T_NamedTuplestoreScan:
+ case T_WorkTableScan:
+ case T_ForeignScan:
+ case T_CustomScan:
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ return ((Scan *) plan)->scanrelid;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'.
+ */
+Bitmapset *
+pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable)
+{
+ int rti = -1;
+ Bitmapset *result = NULL;
+
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+ if (rte->rtekind != RTE_JOIN)
+ result = bms_add_member(result, rti);
+ }
+
+ return result;
+}
+
+/*
+ * Create a pgpa_query_feature and add it to the list of all query features
+ * for this plan.
+ */
+static pgpa_query_feature *
+pgpa_add_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type, Plan *plan)
+{
+ pgpa_query_feature *qf = palloc0_object(pgpa_query_feature);
+
+ qf->type = type;
+ qf->plan = plan;
+
+ walker->query_features[qf->type] =
+ lappend(walker->query_features[qf->type], qf);
+
+ return qf;
+}
+
+/*
+ * Add a single RTI to each active query feature.
+ */
+static void
+pgpa_qf_add_rti(List *active_query_features, Index rti)
+{
+ foreach_ptr(pgpa_query_feature, qf, active_query_features)
+ {
+ qf->relids = bms_add_member(qf->relids, rti);
+ }
+}
+
+/*
+ * Add a set of RTIs to each active query feature.
+ */
+static void
+pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids)
+{
+ foreach_ptr(pgpa_query_feature, qf, active_query_features)
+ {
+ qf->relids = bms_add_members(qf->relids, relids);
+ }
+}
+
+/*
+ * Add RTIs directly contained in a plan node to each active query feature,
+ * but filter out any join RTIs, since advice doesn't mention those.
+ */
+static void
+pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan, List *rtable)
+{
+ Bitmapset *relids;
+ Index rti;
+
+ if ((relids = pgpa_relids(plan)) != NULL)
+ {
+ relids = pgpa_filter_out_join_relids(relids, rtable);
+ pgpa_qf_add_rtis(active_query_features, relids);
+ }
+ else if ((rti = pgpa_scanrelid(plan)) != 0)
+ pgpa_qf_add_rti(active_query_features, rti);
+}
+
+/*
+ * If we generated plan advice using the provided walker object and array
+ * of identifiers, would we generate the specified tag/target combination?
+ *
+ * If yes, the plan conforms to the advice; if no, it does not. Note that
+ * we have know way of knowing whether the planner was forced to emit a plan
+ * that conformed to the advice or just happened to do so.
+ */
+bool
+pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_tag_type tag,
+ pgpa_advice_target *target)
+{
+ Index rtable_length = list_length(walker->pstmt->rtable);
+ Bitmapset *relids = NULL;
+
+ if (tag == PGPA_TAG_JOIN_ORDER)
+ {
+ foreach_ptr(pgpa_unrolled_join, ujoin, walker->toplevel_unrolled_joins)
+ {
+ if (pgpa_walker_join_order_matches(ujoin, rtable_length,
+ rt_identifiers, target, true))
+ return true;
+ }
+
+ return false;
+ }
+
+ if (target->ttype == PGPA_TARGET_IDENTIFIER)
+ {
+ Index rti;
+
+ rti = pgpa_compute_rti_from_identifier(rtable_length, rt_identifiers,
+ &target->rid);
+ if (rti == 0)
+ return false;
+ relids = bms_make_singleton(rti);
+ }
+ else
+ {
+ Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ Index rti;
+
+ Assert(child_target->ttype == PGPA_TARGET_IDENTIFIER);
+ rti = pgpa_compute_rti_from_identifier(rtable_length,
+ rt_identifiers,
+ &child_target->rid);
+ if (rti == 0)
+ return false;
+ relids = bms_add_member(relids, rti);
+ }
+ }
+
+ switch (tag)
+ {
+ case PGPA_TAG_JOIN_ORDER:
+ /* should have been handled above */
+ pg_unreachable();
+ break;
+ case PGPA_TAG_BITMAP_HEAP_SCAN:
+ return pgpa_walker_find_scan(walker,
+ PGPA_SCAN_BITMAP_HEAP,
+ relids) != NULL;
+ case PGPA_TAG_FOREIGN_JOIN:
+ return pgpa_walker_find_scan(walker,
+ PGPA_SCAN_FOREIGN,
+ relids) != NULL;
+ case PGPA_TAG_INDEX_ONLY_SCAN:
+ {
+ pgpa_scan *scan;
+
+ scan = pgpa_walker_find_scan(walker, PGPA_SCAN_INDEX_ONLY,
+ relids);
+ if (scan == NULL)
+ return false;
+
+ return pgpa_walker_index_target_matches_plan(target->itarget, scan->plan);
+ }
+ case PGPA_TAG_INDEX_SCAN:
+ {
+ pgpa_scan *scan;
+
+ scan = pgpa_walker_find_scan(walker, PGPA_SCAN_INDEX,
+ relids);
+ if (scan == NULL)
+ return false;
+
+ return pgpa_walker_index_target_matches_plan(target->itarget, scan->plan);
+ }
+ case PGPA_TAG_PARTITIONWISE:
+ return pgpa_walker_find_scan(walker,
+ PGPA_SCAN_PARTITIONWISE,
+ relids) != NULL;
+ case PGPA_TAG_SEQ_SCAN:
+ return pgpa_walker_find_scan(walker,
+ PGPA_SCAN_SEQ,
+ relids) != NULL;
+ case PGPA_TAG_TID_SCAN:
+ return pgpa_walker_find_scan(walker,
+ PGPA_SCAN_TID,
+ relids) != NULL;
+ case PGPA_TAG_GATHER:
+ return pgpa_walker_contains_feature(walker,
+ PGPAQF_GATHER,
+ relids);
+ case PGPA_TAG_GATHER_MERGE:
+ return pgpa_walker_contains_feature(walker,
+ PGPAQF_GATHER_MERGE,
+ relids);
+ case PGPA_TAG_SEMIJOIN_NON_UNIQUE:
+ return pgpa_walker_contains_feature(walker,
+ PGPAQF_SEMIJOIN_NON_UNIQUE,
+ relids);
+ case PGPA_TAG_SEMIJOIN_UNIQUE:
+ return pgpa_walker_contains_feature(walker,
+ PGPAQF_SEMIJOIN_UNIQUE,
+ relids);
+ case PGPA_TAG_HASH_JOIN:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_HASH_JOIN,
+ relids);
+ case PGPA_TAG_MERGE_JOIN_MATERIALIZE:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_MERGE_JOIN_MATERIALIZE,
+ relids);
+ case PGPA_TAG_MERGE_JOIN_PLAIN:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_MERGE_JOIN_PLAIN,
+ relids);
+ case PGPA_TAG_NESTED_LOOP_MATERIALIZE:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_NESTED_LOOP_MATERIALIZE,
+ relids);
+ case PGPA_TAG_NESTED_LOOP_MEMOIZE:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_NESTED_LOOP_MEMOIZE,
+ relids);
+ case PGPA_TAG_NESTED_LOOP_PLAIN:
+ return pgpa_walker_contains_join(walker,
+ JSTRAT_NESTED_LOOP_PLAIN,
+ relids);
+ case PGPA_TAG_NO_GATHER:
+ return pgpa_walker_contains_no_gather(walker, relids);
+ }
+
+ /* should not get here */
+ return false;
+}
+
+/*
+ * Does the index target match the Plan?
+ *
+ * Should only be called when we know that itarget mandates an Index Scan or
+ * Index Only Scan and this corresponds to the type of Plan. Here, our job is
+ * just to check whether it's the same index.
+ */
+static bool
+pgpa_walker_index_target_matches_plan(pgpa_index_target *itarget, Plan *plan)
+{
+ Oid indexoid = InvalidOid;
+
+ /* Retrieve the index OID from the plan. */
+ if (IsA(plan, IndexScan))
+ indexoid = ((IndexScan *) plan)->indexid;
+ else if (IsA(plan, IndexOnlyScan))
+ indexoid = ((IndexOnlyScan *) plan)->indexid;
+ else
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+
+ /* Check whether schema name matches, if specified in index target. */
+ if (itarget->indnamespace != NULL)
+ {
+ Oid nspoid = get_rel_namespace(indexoid);
+ char *relnamespace = get_namespace_name_or_temp(nspoid);
+
+ if (strcmp(itarget->indnamespace, relnamespace) != 0)
+ return false;
+ }
+
+ /* Check whether relation name matches. */
+ return (strcmp(itarget->indname, get_rel_name(indexoid)) == 0);
+}
+
+/*
+ * Does an unrolled join match the join order specified by an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches(pgpa_unrolled_join *ujoin,
+ Index rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_target *target,
+ bool toplevel)
+{
+ int nchildren = list_length(target->children);
+
+ Assert(target->ttype == PGPA_TARGET_ORDERED_LIST);
+
+ /* At toplevel, we allow a prefix match. */
+ if (toplevel)
+ {
+ if (nchildren > ujoin->ninner + 1)
+ return false;
+ }
+ else
+ {
+ if (nchildren != ujoin->ninner + 1)
+ return false;
+ }
+
+ /* Outermost rel must match. */
+ if (!pgpa_walker_join_order_matches_member(&ujoin->outer,
+ rtable_length,
+ rt_identifiers,
+ linitial(target->children)))
+ return false;
+
+ /* Each inner rel must match. */
+ for (int n = 0; n < nchildren - 1; ++n)
+ {
+ pgpa_advice_target *child_target = list_nth(target->children, n + 1);
+
+ if (!pgpa_walker_join_order_matches_member(&ujoin->inner[n],
+ rtable_length,
+ rt_identifiers,
+ child_target))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Does one member of an unrolled join match an advice target?
+ */
+static bool
+pgpa_walker_join_order_matches_member(pgpa_join_member *member,
+ Index rtable_length,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_target *target)
+{
+ Bitmapset *relids = NULL;
+
+ if (member->unrolled_join != NULL)
+ {
+ if (target->ttype != PGPA_TARGET_ORDERED_LIST)
+ return false;
+ return pgpa_walker_join_order_matches(member->unrolled_join,
+ rtable_length,
+ rt_identifiers,
+ target,
+ false);
+ }
+
+ Assert(member->scan != NULL);
+ switch (target->ttype)
+ {
+ case PGPA_TARGET_ORDERED_LIST:
+ /* Could only match an unrolled join */
+ return false;
+
+ case PGPA_TARGET_UNORDERED_LIST:
+ {
+ foreach_ptr(pgpa_advice_target, child_target, target->children)
+ {
+ Index rti;
+
+ rti = pgpa_compute_rti_from_identifier(rtable_length,
+ rt_identifiers,
+ &child_target->rid);
+ if (rti == 0)
+ return false;
+ relids = bms_add_member(relids, rti);
+ }
+ break;
+ }
+
+ case PGPA_TARGET_IDENTIFIER:
+ {
+ Index rti;
+
+ rti = pgpa_compute_rti_from_identifier(rtable_length,
+ rt_identifiers,
+ &target->rid);
+ if (rti == 0)
+ return false;
+ relids = bms_make_singleton(rti);
+ break;
+ }
+ }
+
+ return bms_equal(member->scan->relids, relids);
+}
+
+/*
+ * Find the scan where the walker says that the given scan strategy should be
+ * used for the given relid set, if one exists.
+ *
+ * Returns the pgpa_scan object, or NULL if none was found.
+ */
+static pgpa_scan *
+pgpa_walker_find_scan(pgpa_plan_walker_context *walker,
+ pgpa_scan_strategy strategy,
+ Bitmapset *relids)
+{
+ List *scans = walker->scans[strategy];
+
+ foreach_ptr(pgpa_scan, scan, scans)
+ {
+ if (bms_equal(scan->relids, relids))
+ return scan;
+ }
+
+ return NULL;
+}
+
+/*
+ * Does this walker say that the given query feature applies to the given
+ * relid set?
+ */
+static bool
+pgpa_walker_contains_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Bitmapset *relids)
+{
+ List *query_features = walker->query_features[type];
+
+ foreach_ptr(pgpa_query_feature, qf, query_features)
+ {
+ if (bms_equal(qf->relids, relids))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Does the walker say that the given join strategy should be used for the
+ * given relid set?
+ */
+static bool
+pgpa_walker_contains_join(pgpa_plan_walker_context *walker,
+ pgpa_join_strategy strategy,
+ Bitmapset *relids)
+{
+ List *join_strategies = walker->join_strategies[strategy];
+
+ foreach_ptr(Bitmapset, jsrelids, join_strategies)
+ {
+ if (bms_equal(jsrelids, relids))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Does the walker say that the given relids should be marked as NO_GATHER?
+ */
+static bool
+pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
+ Bitmapset *relids)
+{
+ return bms_is_subset(relids, walker->no_gather_scans);
+}
diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h
new file mode 100644
index 00000000000..b37e209dcc5
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.h
@@ -0,0 +1,141 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.h
+ * Main entrypoints for analyzing a plan to generate an advice string
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_walker.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_WALKER_H
+#define PGPA_WALKER_H
+
+#include "pgpa_ast.h"
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+
+/*
+ * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or
+ * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we chose
+ * to implement it as a semijoin or whether we instead chose to make the
+ * nullable side unique and then perform an inner join. When the make-unique
+ * strategy is not chosen, it's not easy to tell from the final plan tree
+ * whether it was considered. That's awkward, because we don't want to emit
+ * useless SEMIJOIN_NON_UNIQUE advice when there was no decision to be made.
+ *
+ * To avoid that, during planning, we create a pgpa_sj_unique_rel for each
+ * relation that we considered making unique for purposes of semijoin planning.
+ */
+typedef struct pgpa_sj_unique_rel
+{
+ char *plan_name;
+ Bitmapset *relids;
+} pgpa_sj_unique_rel;
+
+/*
+ * We use the term "query feature" to refer to plan nodes that are interesting
+ * in the following way: to generate advice, we'll need to know the set of
+ * same-subquery, non-join RTIs occuring at or below that plan node, without
+ * admixture of parent and child RTIs.
+ *
+ * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge
+ * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll
+ * want to admit some kind of advice that describes the portion of the plan
+ * tree that appears beneath those nodes.
+ *
+ * Each semijoin can be implemented either by directly performing a semijoin,
+ * or by making one side unique and then performing a normal join. Either way,
+ * we use a query feature to notice what decision was made, so that we can
+ * describe it by enumerating the RTIs on that side of the join.
+ *
+ * To elaborate on the "no admixture of parent and child RTIs" rule, in all of
+ * these cases, if the entirety of an inheritance hierarchy appears beneath
+ * the query feature, we only want to name the parent table. But it's also
+ * possible to have cases where we must name child tables. This is particularly
+ * likely to happen when partitionwise join is in use, but could happen for
+ * Gather or Gather Merge even without that, if one of those appears below
+ * an Append or MergeAppend node for a single table.
+ */
+typedef enum pgpa_qf_type
+{
+ PGPAQF_GATHER,
+ PGPAQF_GATHER_MERGE,
+ PGPAQF_SEMIJOIN_NON_UNIQUE,
+ PGPAQF_SEMIJOIN_UNIQUE
+ /* update NUM_PGPA_QF_TYPES if you add anything here */
+} pgpa_qf_type;
+
+#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1)
+
+/*
+ * For each query feature, we keep track of the feature type and the set of
+ * relids that we found underneath the relevant plan node. See the comments
+ * on pgpa_qf_type, above, for additional details.
+ */
+typedef struct pgpa_query_feature
+{
+ pgpa_qf_type type;
+ Plan *plan;
+ Bitmapset *relids;
+} pgpa_query_feature;
+
+/*
+ * Context object for plan tree walk.
+ *
+ * pstmt is the PlannedStmt we're studying.
+ *
+ * scans is an array of lists of pgpa_scan objects. The array is indexed by
+ * the scan's pgpa_scan_strategy.
+ *
+ * no_gather_scans is the set of scan RTIs that do not appear beneath any
+ * Gather or Gather Merge node.
+ *
+ * toplevel_unrolled_joins is a list of all pgpa_unrolled_join objects that
+ * are not a child of some other pgpa_unrolled_join.
+ *
+ * join_strategy is an array of lists of Bitmapset objects. Each Bitmapset
+ * is the set of relids that appears on the inner side of some join (excluding
+ * RTIs from partition children and subqueries). The array is indexed by
+ * pgpa_join_strategy.
+ *
+ * query_features is an array lists of pgpa_query_feature objects, indexed
+ * by pgpa_qf_type.
+ *
+ * future_query_features is only used during the plan tree walk and should
+ * be empty when the tree walk concludes. It is a list of pgpa_query_feature
+ * objects for Plan nodes that the plan tree walk has not yet encountered;
+ * when encountered, they will be moved to the list of active query features
+ * that is propagated via the call stack.
+ */
+typedef struct pgpa_plan_walker_context
+{
+ PlannedStmt *pstmt;
+ List *scans[NUM_PGPA_SCAN_STRATEGY];
+ Bitmapset *no_gather_scans;
+ List *toplevel_unrolled_joins;
+ List *join_strategies[NUM_PGPA_JOIN_STRATEGY];
+ List *query_features[NUM_PGPA_QF_TYPES];
+ List *future_query_features;
+} pgpa_plan_walker_context;
+
+extern void pgpa_plan_walker(pgpa_plan_walker_context *walker,
+ PlannedStmt *pstmt,
+ List *sj_unique_rels);
+
+extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Plan *plan);
+
+extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
+extern Bitmapset *pgpa_relids(Plan *plan);
+extern Index pgpa_scanrelid(Plan *plan);
+extern Bitmapset *pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable);
+
+extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
+ pgpa_identifier *rt_identifiers,
+ pgpa_advice_tag_type tag,
+ pgpa_advice_target *target);
+
+#endif
diff --git a/contrib/pg_plan_advice/sql/gather.sql b/contrib/pg_plan_advice/sql/gather.sql
new file mode 100644
index 00000000000..776666bf196
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/gather.sql
@@ -0,0 +1,86 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 1;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+SET min_parallel_table_scan_size = 0;
+SET debug_parallel_query = off;
+
+CREATE TABLE gt_dim (id serial primary key, dim text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO gt_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE gt_dim;
+
+CREATE TABLE gt_fact (
+ id int not null,
+ dim_id integer not null references gt_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO gt_fact
+ SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE gt_fact;
+
+-- By default, we expect Gather Merge with a parallel hash join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+
+-- Force Gather or Gather Merge of both relations together.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a separate Gather or Gather Merge operation for each relation.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((d d/d.d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force a Gather or Gather Merge on one relation but no parallelism on other.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather_merge(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(f) no_gather(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+SET LOCAL pg_plan_advice.advice = 'gather(d) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Force no Gather or Gather Merge use at all.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'no_gather(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
+
+-- Can't force Gather Merge without the ORDER BY clause, but just Gather is OK.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather_merge((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'gather((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'gather((f d)) no_gather(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM gt_fact f JOIN gt_dim d ON f.dim_id = d.id ORDER BY d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/join_order.sql b/contrib/pg_plan_advice/sql/join_order.sql
new file mode 100644
index 00000000000..5e16e54efad
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/join_order.sql
@@ -0,0 +1,145 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE jo_dim1 (id integer primary key, dim1 text, val1 int)
+ WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim1 (id, dim1, val1)
+ SELECT g, 'some filler text ' || g, (g % 3) + 1
+ FROM generate_series(1,100) g;
+VACUUM ANALYZE jo_dim1;
+CREATE TABLE jo_dim2 (id integer primary key, dim2 text, val2 int)
+ WITH (autovacuum_enabled = false);
+INSERT INTO jo_dim2 (id, dim2, val2)
+ SELECT g, 'some filler text ' || g, (g % 7) + 1
+ FROM generate_series(1,1000) g;
+VACUUM ANALYZE jo_dim2;
+
+CREATE TABLE jo_fact (
+ id int primary key,
+ dim1_id integer not null references jo_dim1 (id),
+ dim2_id integer not null references jo_dim2 (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO jo_fact
+ SELECT g, (g%100)+1, (g%100)+1 FROM generate_series(1,100000) g;
+VACUUM ANALYZE jo_fact;
+
+-- We expect to join to d2 first and then d1, since the condition on d2
+-- is more selective.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+
+-- Force a few different join orders. Some of these are very inefficient,
+-- but the planner considers them all viable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(d1 f d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f {d1 d2})';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+COMMIT;
+
+-- Force a join order by mentioning just a prefix of the join list.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+COMMIT;
+
+-- jo_fact is not partitioned, but let's try pretending that it is and
+-- verifying that the advice does not apply.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+SET LOCAL pg_plan_advice.advice = 'join_order(f/d1 (d1 d2))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+COMMIT;
+
+-- The unusual formulation of this query is intended to prevent the query
+-- planner from reducing the FULL JOIN to some other join type, so that we
+-- can test what happens with a join type that cannot be reordered.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+
+-- We should not be able to force the planner to join f to d1 first, because
+-- that is not a valid join order, but we should be able to force the planner
+-- to make either d2 or f the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d1 d2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(f d2 d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(d2 f d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+COMMIT;
+
+-- Two incompatible join orders should conflict. In the second case,
+-- the conflict is implicit: if d1 is on the inner side of a join of any
+-- type, it cannot also be the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'join_order(f) join_order(d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+SET LOCAL pg_plan_advice.advice = 'join_order(d1) hash_join(d1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM jo_dim1 d1
+ INNER JOIN (jo_fact f FULL JOIN jo_dim2 d2 ON f.dim2_id + 0 = d2.id + 0)
+ ON d1.id = f.dim1_id OR f.dim1_id IS NULL;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/join_strategy.sql b/contrib/pg_plan_advice/sql/join_strategy.sql
new file mode 100644
index 00000000000..edd5c4c0e14
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/join_strategy.sql
@@ -0,0 +1,84 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE join_dim (id serial primary key, dim text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO join_dim (dim) SELECT random()::text FROM generate_series(1,100) g;
+VACUUM ANALYZE join_dim;
+
+CREATE TABLE join_fact (
+ id int primary key,
+ dim_id integer not null references join_dim (id)
+) WITH (autovacuum_enabled = false);
+INSERT INTO join_fact
+ SELECT g, (g%3)+1 FROM generate_series(1,100000) g;
+CREATE INDEX join_fact_dim_id ON join_fact (dim_id);
+VACUUM ANALYZE join_fact;
+
+-- We expect a hash join by default.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+
+-- Try forcing each join method in turn with join_dim as the inner table.
+-- All of these should work except for MERGE_JOIN_MATERIALIZE; that will
+-- fail, because the planner knows that join_dim (id) is unique, and will
+-- refuse to add mark/restore overhead.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- Now try forcing each join method in turn with join_fact as the inner
+-- table. All of these should work.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'HASH_JOIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'MERGE_JOIN_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_MEMOIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
+
+-- Non-working cases. We can't force a foreign join between these tables,
+-- because they aren't foreign tables. We also can't use two different
+-- strategies on the same table, nor can we put both tables on the inner
+-- side of the same join.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'FOREIGN_JOIN((f d))';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f) NESTED_LOOP_MATERIALIZE(f)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+SET LOCAL pg_plan_advice.advice = 'NESTED_LOOP_PLAIN(f d)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/local_collector.sql b/contrib/pg_plan_advice/sql/local_collector.sql
new file mode 100644
index 00000000000..3225dd99058
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/local_collector.sql
@@ -0,0 +1,46 @@
+CREATE EXTENSION pg_plan_advice;
+SET debug_parallel_query = off;
+
+-- Try clearing advice before we've collected any.
+SELECT pg_clear_collected_local_advice();
+
+-- Set a small advice collection limit so that we'll exceed it.
+SET pg_plan_advice.local_collection_limit = 2;
+
+-- Enable the collector.
+SET pg_plan_advice.local_collector = on;
+
+-- Set up a dummy table.
+CREATE TABLE dummy_table (a int primary key, b text)
+ WITH (autovacuum_enabled = false, parallel_workers = 0);
+
+-- Test queries.
+SELECT * FROM dummy_table a, dummy_table b;
+SELECT * FROM dummy_table;
+
+-- Should return the advice from the second test query.
+SET pg_plan_advice.local_collector = off;
+SELECT advice FROM pg_get_collected_local_advice() ORDER BY id DESC LIMIT 1;
+
+-- Now try clearing advice again.
+SELECT pg_clear_collected_local_advice();
+
+-- Raise the collection limit so that the collector uses multiple chunks.
+SET pg_plan_advice.local_collection_limit = 2000;
+SET pg_plan_advice.local_collector = on;
+
+-- Push a bunch of queries through the collector.
+DO $$
+BEGIN
+ FOR x IN 1..2000 LOOP
+ EXECUTE 'SELECT * FROM dummy_table';
+ END LOOP;
+END
+$$;
+
+-- Check that the collector worked.
+SELECT COUNT(*) FROM pg_get_collected_local_advice();
+
+-- And clear one more time, to verify that this doesn't cause a problem
+-- even with a larger number of entries.
+SELECT pg_clear_collected_local_advice();
diff --git a/contrib/pg_plan_advice/sql/partitionwise.sql b/contrib/pg_plan_advice/sql/partitionwise.sql
new file mode 100644
index 00000000000..c51456dbbb5
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/partitionwise.sql
@@ -0,0 +1,99 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET enable_partitionwise_join = true;
+
+CREATE TABLE pt1 (id integer primary key, dim1 text, val1 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt1a PARTITION OF pt1 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt1b PARTITION OF pt1 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt1c PARTITION OF pt1 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt1 (id, dim1, val1)
+ SELECT g, 'some filler text ' || g, (g % 3) + 1
+ FROM generate_series(1,3000) g;
+VACUUM ANALYZE pt1;
+
+CREATE TABLE pt2 (id integer primary key, dim2 text, val2 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt2a PARTITION OF pt2 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt2b PARTITION OF pt2 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt2c PARTITION OF pt2 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt2 (id, dim2, val2)
+ SELECT g, 'some other text ' || g, (g % 5) + 1
+ FROM generate_series(1,3000,2) g;
+VACUUM ANALYZE pt2;
+
+CREATE TABLE pt3 (id integer primary key, dim3 text, val3 int)
+ PARTITION BY RANGE (id);
+CREATE TABLE pt3a PARTITION OF pt3 FOR VALUES FROM (1) to (1001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt3b PARTITION OF pt3 FOR VALUES FROM (1001) to (2001)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE pt3c PARTITION OF pt3 FOR VALUES FROM (2001) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO pt3 (id, dim3, val3)
+ SELECT g, 'a third random text ' || g, (g % 7) + 1
+ FROM generate_series(1,3000,3) g;
+VACUUM ANALYZE pt3;
+
+CREATE TABLE ptmismatch (id integer primary key, dimm text, valm int)
+ PARTITION BY RANGE (id);
+CREATE TABLE ptmismatcha PARTITION OF ptmismatch
+ FOR VALUES FROM (1) to (1501)
+ WITH (autovacuum_enabled = false);
+CREATE TABLE ptmismatchb PARTITION OF ptmismatch
+ FOR VALUES FROM (1501) to (3001)
+ WITH (autovacuum_enabled = false);
+INSERT INTO ptmismatch (id, dimm, valm)
+ SELECT g, 'yet another text ' || g, (g % 2) + 1
+ FROM generate_series(1,3000) g;
+VACUUM ANALYZE ptmismatch;
+
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+
+-- Suppress partitionwise join, or do it just partially.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE(pt1 pt2 pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) pt3)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+COMMIT;
+
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 pt2) (pt1 pt3))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+COMMIT;
+
+-- Can't force a partitionwise join with a mismatched table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'PARTITIONWISE((pt1 ptmismatch))';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, ptmismatch WHERE pt1.id = ptmismatch.id;
+COMMIT;
+
+-- Force join order for a particular branch of the partitionwise join with
+-- and without mentioning the schema name.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/public.pt3a pt2/public.pt2a pt1/public.pt1a)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+SET LOCAL pg_plan_advice.advice = 'JOIN_ORDER(pt3/pt3a pt2/pt2a pt1/pt1a)';
+EXPLAIN (PLAN_ADVICE, COSTS OFF)
+SELECT * FROM pt1, pt2, pt3 WHERE pt1.id = pt2.id AND pt2.id = pt3.id
+ AND val1 = 1 AND val2 = 1 AND val3 = 1;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/prepared.sql b/contrib/pg_plan_advice/sql/prepared.sql
new file mode 100644
index 00000000000..3ec30eedee5
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/prepared.sql
@@ -0,0 +1,37 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE ptab (id integer, val text) WITH (autovacuum_enabled = false);
+
+SET pg_plan_advice.always_store_advice_details = false;
+
+-- Not prepared, so advice should be generated.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM ptab;
+
+-- Prepared, so advice should not be generated.
+PREPARE pt1 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt1;
+
+SET pg_plan_advice.always_store_advice_details = true;
+
+-- Prepared, but always_store_advice_details = true, so should show advice.
+PREPARE pt2 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2;
+
+-- Not prepared, so feedback should be generated.
+SET pg_plan_advice.always_store_advice_details = false;
+SET pg_plan_advice.advice = 'SEQ_SCAN(ptab)';
+EXPLAIN (COSTS OFF)
+SELECT * FROM ptab;
+
+-- Prepared, so advice should not be generated.
+PREPARE pt3 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF) EXECUTE pt1;
+
+SET pg_plan_advice.always_store_advice_details = true;
+
+-- Prepared, but always_store_advice_details = true, so should show feedback.
+PREPARE pt4 AS SELECT * FROM ptab;
+EXPLAIN (COSTS OFF, PLAN_ADVICE) EXECUTE pt2;
+
diff --git a/contrib/pg_plan_advice/sql/scan.sql b/contrib/pg_plan_advice/sql/scan.sql
new file mode 100644
index 00000000000..4fc494c7d8e
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/scan.sql
@@ -0,0 +1,195 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+SET seq_page_cost = 0.1;
+SET random_page_cost = 0.1;
+SET cpu_tuple_cost = 0;
+SET cpu_index_tuple_cost = 0;
+
+CREATE TABLE scan_table (a int primary key, b text)
+ WITH (autovacuum_enabled = false);
+INSERT INTO scan_table
+ SELECT g, 'some text ' || g FROM generate_series(1, 100000) g;
+CREATE INDEX scan_table_b ON scan_table USING brin (b);
+VACUUM ANALYZE scan_table;
+
+-- Sequential scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+
+-- Index scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+
+-- Index-only scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+
+-- Bitmap heap scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+
+-- TID scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+
+-- TID range scan
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+
+-- Try forcing each of our test queries to use the scan type they
+-- wanted to use anyway. This should succeed.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Try to force a full scan of the table to use some other scan type. All
+-- of these will fail. An index scan or bitmap heap scan could potentially
+-- generate the correct answer, but the planner does not even consider these
+-- possibilities due to the lack of a WHERE clause.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table;
+COMMIT;
+
+-- Try again to force index use. This should now succeed for the INDEX_SCAN
+-- and BITMAP_HEAP_SCAN, but the INDEX_ONLY_SCAN can't be forced because the
+-- query fetches columns not included in the index.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+SET LOCAL pg_plan_advice.advice = 'BITMAP_HEAP_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a > 0;
+COMMIT;
+
+-- We can force a primary key lookup to use a sequential scan, but we
+-- can't force it to use an index-only scan (due to the column list)
+-- or a TID scan (due to the absence of a TID qual).
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'TID_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can forcibly downgrade an index-only scan to an index scan, but we can't
+-- force the use of an index that the planner thinks is inapplicable.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan in place of a bitmap heap scan,
+-- but a plain index scan on a BRIN index is not possible.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE b > 'some text 8';
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_b)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- We can force the use of a sequential scan rather than a TID scan or
+-- TID range scan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(scan_table)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE ctid = '(0,1)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table
+ WHERE ctid > '(1,1)' AND ctid < '(2,1)';
+COMMIT;
+
+-- Test more complex scenarios with index scans.
+BEGIN;
+-- Should still work if we mention the schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But not if we mention the wrong schema.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table cilbup.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- It's OK to repeat the same advice.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+-- But it doesn't work if the index target is even notionally different.
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table scan_table_pkey scan_table public.scan_table_pkey)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT * FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test assorted incorrect advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(nothing)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(nothing whatsoever)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+SET LOCAL pg_plan_advice.advice = 'INDEX_ONLY_SCAN(scan_table bogus)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE) SELECT a FROM scan_table WHERE a = 1;
+COMMIT;
+
+-- Test our ability to refer to multiple instances of the same alias.
+BEGIN;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s) SEQ_SCAN(s#2)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (generate_series(1,10) g LEFT JOIN scan_table s ON g = s.a) x
+ LEFT JOIN scan_table s ON g = s.a;
+COMMIT;
+
+-- Test our ability to refer to scans within a subquery.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+BEGIN;
+-- Should not match.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match first query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@x)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+-- Should match second query only.
+SET LOCAL pg_plan_advice.advice = 'SEQ_SCAN(s@unnamed_subquery)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/semijoin.sql b/contrib/pg_plan_advice/sql/semijoin.sql
new file mode 100644
index 00000000000..5a4ae52d1d9
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/semijoin.sql
@@ -0,0 +1,118 @@
+LOAD 'pg_plan_advice';
+SET max_parallel_workers_per_gather = 0;
+
+CREATE TABLE sj_wide (
+ id integer primary key,
+ val1 integer,
+ padding text storage plain
+) WITH (autovacuum_enabled = false);
+INSERT INTO sj_wide
+ SELECT g, g%10+1, repeat(' ', 300) FROM generate_series(1, 1000) g;
+CREATE INDEX ON sj_wide (val1);
+VACUUM ANALYZE sj_wide;
+
+CREATE TABLE sj_narrow (
+ id integer primary key,
+ val1 integer
+) WITH (autovacuum_enabled = false);
+INSERT INTO sj_narrow
+ SELECT g, g%10+1 FROM generate_series(1, 1000) g;
+CREATE INDEX ON sj_narrow (val1);
+VACUUM ANALYZE sj_narrow;
+
+-- We expect this to make the VALUES list unique and use index lookups to
+-- find the rows in sj_wide, so as to avoid a full scan of sj_wide.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+
+-- If we ask for a unique semijoin, we should get the same plan as with
+-- no advice. If we ask for a non-unique semijoin, we should see a Semi
+-- Join operation in the plan tree.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_wide
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+COMMIT;
+
+-- Because this table is narrower than the previous one, a sequential scan
+-- is less expensive, and we choose a straightforward Semi Join plan by
+-- default. (Note that this is also very sensitive to the length of the IN
+-- list, which affects how many index lookups the alternative plan will need.)
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+
+-- Here, we expect advising a unique semijoin to swith to the same plan that
+-- we got with sj_wide, and advising a non-unique semijoin should not change
+-- the plan.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique("*VALUES*")';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM sj_narrow
+ WHERE (id, val1) IN (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+COMMIT;
+
+-- In the above example, we made the outer side of the join unique, but here,
+-- we should make the inner side unique.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+
+-- We should be able to force a plan with or without the make-unique strategy,
+-- with either side as the driving table.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) join_order(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(sj_narrow) join_order(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+COMMIT;
+
+-- However, mentioning the wrong side of the join should result in an advice
+-- failure.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+SET LOCAL pg_plan_advice.advice = 'semijoin_non_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+COMMIT;
+
+-- Test conflicting advice.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(sj_narrow) semijoin_non_unique(sj_narrow)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g
+ WHERE g in (select val1 from sj_narrow);
+COMMIT;
+
+-- Try applying SEMIJOIN_UNIQUE() to a non-semijoin.
+BEGIN;
+SET LOCAL pg_plan_advice.advice = 'semijoin_unique(g)';
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT * FROM generate_series(1,1000) g, sj_narrow s WHERE g = s.val1;
+COMMIT;
diff --git a/contrib/pg_plan_advice/sql/syntax.sql b/contrib/pg_plan_advice/sql/syntax.sql
new file mode 100644
index 00000000000..56a5d54e2b5
--- /dev/null
+++ b/contrib/pg_plan_advice/sql/syntax.sql
@@ -0,0 +1,68 @@
+LOAD 'pg_plan_advice';
+
+-- An empty string is allowed. Empty target lists are allowed for most advice
+-- tags, but not for JOIN_ORDER. "Supplied Plan Advice" should be omitted in
+-- text format when there is no actual advice, but not in non-text format.
+SET pg_plan_advice.advice = '';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'SEQ_SCAN()';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'NESTED_LOOP_PLAIN()';
+EXPLAIN (COSTS OFF, FORMAT JSON) SELECT 1;
+SET pg_plan_advice.advice = 'JOIN_ORDER()';
+
+-- Test assorted variations in capitalization, whitespace, and which parts of
+-- the relation identifier are included. These should all work.
+SET pg_plan_advice.advice = 'SEQ_SCAN(x)';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'seq_scan(x@y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'SEQ_scan(x#2)';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'SEQ_SCAN (x/y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = ' SEQ_SCAN ( x / y . z ) ';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'SEQ_SCAN("x"#2/"y"."z"@"t")';
+EXPLAIN (COSTS OFF) SELECT 1;
+
+-- Syntax errors.
+SET pg_plan_advice.advice = 'SEQUENTIAL_SCAN(x)';
+SET pg_plan_advice.advice = 'SEQ_SCAN';
+SET pg_plan_advice.advice = 'SEQ_SCAN(';
+SET pg_plan_advice.advice = 'SEQ_SCAN("';
+SET pg_plan_advice.advice = 'SEQ_SCAN("")';
+SET pg_plan_advice.advice = 'SEQ_SCAN("a"';
+SET pg_plan_advice.advice = 'SEQ_SCAN(#';
+SET pg_plan_advice.advice = '()';
+SET pg_plan_advice.advice = '123';
+
+-- Tags like SEQ_SCAN and NO_GATHER don't allow sublists at all; other tags,
+-- except for JOIN_ORDER, allow at most one level of sublist. Hence, these
+-- examples should error out.
+SET pg_plan_advice.advice = 'SEQ_SCAN((x))';
+SET pg_plan_advice.advice = 'GATHER(((x)))';
+
+-- Legal comments.
+SET pg_plan_advice.advice = '/**/';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = 'HASH_JOIN(_)/***/';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(/*x*/y)';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = '/* comment */ HASH_JOIN(y//*x*/z)';
+EXPLAIN (COSTS OFF) SELECT 1;
+
+-- Unterminated comments.
+SET pg_plan_advice.advice = '/*';
+SET pg_plan_advice.advice = 'JOIN_ORDER("fOO") /* oops';
+
+-- Nested comments are not supported, so the first of these is legal and
+-- the second is not.
+SET pg_plan_advice.advice = '/*/*/';
+EXPLAIN (COSTS OFF) SELECT 1;
+SET pg_plan_advice.advice = '/*/* stuff */*/';
+
+-- Foreign join requires multiple relation identifiers.
+SET pg_plan_advice.advice = 'FOREIGN_JOIN(a)';
+SET pg_plan_advice.advice = 'FOREIGN_JOIN((a))';
diff --git a/contrib/pg_plan_advice/t/001_regress.pl b/contrib/pg_plan_advice/t/001_regress.pl
new file mode 100644
index 00000000000..67595cddf75
--- /dev/null
+++ b/contrib/pg_plan_advice/t/001_regress.pl
@@ -0,0 +1,148 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+# Run the core regression tests under pg_plan_advice to check for problems.
+use strict;
+use warnings FATAL => 'all';
+
+use Cwd qw(abs_path);
+use File::Basename qw(dirname);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize the primary node
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init();
+
+# Set up our desired configuration.
+#
+# We run with pg_plan_advice.shared_collection_limit set to ensure that the
+# plan tree walker code runs against every query in the regression tests. If
+# we're unable to properly analyze any of those plan trees, this test should fail.
+#
+# We set pg_plan_advice.advice to an advice string that will cause the advice
+# trove to be populated with a few entries of various sorts, but which we do
+# not expect to match anything in the regression test queries. This way, the
+# planner hooks will be called, improving code coverage, but no plans should
+# actually change.
+#
+# pg_plan_advice.always_explain_supplied_advice=false is needed to avoid breaking
+# regression test queries that use EXPLAIN. In the real world, it seems like
+# users will want EXPLAIN output to show supplied advice so that it's clear
+# whether normal planner behavior has been altered, but here that's undesirable.
+$node->append_conf('postgresql.conf', <start;
+
+my $srcdir = abs_path("../..");
+
+# --dlpath is needed to be able to find the location of regress.so
+# and any libraries the regression tests require.
+my $dlpath = dirname($ENV{REGRESS_SHLIB});
+
+# --outputdir points to the path where to place the output files.
+my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+# --inputdir points to the path of the input files.
+my $inputdir = "$srcdir/src/test/regress";
+
+# Run the tests.
+my $rc =
+ system($ENV{PG_REGRESS} . " "
+ . "--bindir= "
+ . "--dlpath=\"$dlpath\" "
+ . "--host=" . $node->host . " "
+ . "--port=" . $node->port . " "
+ . "--schedule=$srcdir/src/test/regress/parallel_schedule "
+ . "--max-concurrent-tests=20 "
+ . "--inputdir=\"$inputdir\" "
+ . "--outputdir=\"$outputdir\"");
+
+# Dump out the regression diffs file, if there is one
+if ($rc != 0)
+{
+ my $diffs = "$outputdir/regression.diffs";
+ if (-e $diffs)
+ {
+ print "=== dumping $diffs ===\n";
+ print slurp_file($diffs);
+ print "=== EOF ===\n";
+ }
+}
+
+# Report results
+is($rc, 0, 'regression tests pass');
+
+# Create the extension so we can access the collector
+$node->safe_psql('postgres', 'CREATE EXTENSION pg_plan_advice');
+
+# Verify that a large amount of advice was collected
+my $all_query_count = $node->safe_psql('postgres', <', 20000, "copious advice collected");
+
+# Verify that lots of different advice strings were collected
+my $distinct_query_count = $node->safe_psql('postgres', <', 3000, "diverse advice collected");
+
+# We want to test for the presence of our known tags in the collected advice.
+# Put all tags into the hash that follows; map any tags that aren't tested
+# by the core regression tests to 0, and others to 1.
+my %tag_map = (
+ BITMAP_HEAP_SCAN => 1,
+ FOREIGN_JOIN => 0,
+ GATHER => 1,
+ GATHER_MERGE => 1,
+ HASH_JOIN => 1,
+ INDEX_ONLY_SCAN => 1,
+ INDEX_SCAN => 1,
+ JOIN_ORDER => 1,
+ MERGE_JOIN_MATERIALIZE => 1,
+ MERGE_JOIN_PLAIN => 1,
+ NESTED_LOOP_MATERIALIZE => 1,
+ NESTED_LOOP_MEMOIZE => 1,
+ NESTED_LOOP_PLAIN => 1,
+ NO_GATHER => 1,
+ PARTITIONWISE => 1,
+ SEMIJOIN_NON_UNIQUE => 1,
+ SEMIJOIN_UNIQUE => 1,
+ SEQ_SCAN => 1,
+ TID_SCAN => 1,
+);
+for my $tag (sort keys %tag_map)
+{
+ my $checkit = $tag_map{$tag};
+
+ # Search for the given tag. This is not entirely robust: it could get thrown
+ # off by a table alias such as "FOREIGN_JOIN(", but that probably won't
+ # happen in the core regression tests.
+ my $tag_count = $node->safe_psql('postgres', <', 10, "multiple uses of $tag") if $checkit;
+
+ # Regardless, note the exact count in the log, for human consumption.
+ note("found $tag_count advice strings containing $tag");
+}
+
+# Trigger a partial cleanup of the shared advice collector, and then a full
+# cleanup.
+$node->safe_psql('postgres', < 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+-- Index 50 translations of the word "Mathematics"
+CREATE TEMP TABLE mb (s text);
+\copy mb from 'data/trgm_utf8.data'
+CREATE INDEX ON mb USING gist(s gist_trgm_ops);
diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm_1.out b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
new file mode 100644
index 00000000000..8505c4fa552
--- /dev/null
+++ b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out
@@ -0,0 +1,3 @@
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/pg_trgm/meson.build b/contrib/pg_trgm/meson.build
index 3cc299d5eaa..3ecf95ba862 100644
--- a/contrib/pg_trgm/meson.build
+++ b/contrib/pg_trgm/meson.build
@@ -39,6 +39,7 @@ tests += {
'regress': {
'sql': [
'pg_trgm',
+ 'pg_utf8_trgm',
'pg_word_trgm',
'pg_strict_word_trgm',
],
diff --git a/contrib/pg_trgm/sql/pg_utf8_trgm.sql b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
new file mode 100644
index 00000000000..0dd962ced83
--- /dev/null
+++ b/contrib/pg_trgm/sql/pg_utf8_trgm.sql
@@ -0,0 +1,9 @@
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+-- Index 50 translations of the word "Mathematics"
+CREATE TEMP TABLE mb (s text);
+\copy mb from 'data/trgm_utf8.data'
+CREATE INDEX ON mb USING gist(s gist_trgm_ops);
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h
index ca017585369..ca23aad4dd9 100644
--- a/contrib/pg_trgm/trgm.h
+++ b/contrib/pg_trgm/trgm.h
@@ -47,7 +47,7 @@ typedef char trgm[3];
} while(0)
extern int (*CMPTRGM) (const void *a, const void *b);
-#define ISWORDCHR(c) (t_isalnum(c))
+#define ISWORDCHR(c, len) (t_isalnum_with_len(c, len))
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 014bb3c848c..5766b3e9955 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -99,7 +99,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case LikeStrategyNumber:
/*
@@ -113,7 +113,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case RegExpStrategyNumber:
trg = createTrgmNFA(val, PG_GET_COLLATION(),
&graph, CurrentMemoryContext);
@@ -224,7 +224,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case LikeStrategyNumber:
case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
@@ -242,7 +242,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case RegExpStrategyNumber:
if (nkeys < 1)
{
@@ -310,7 +310,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case LikeStrategyNumber:
case EqualStrategyNumber:
/* Check if all extracted trigrams are presented. */
@@ -328,7 +328,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case RegExpStrategyNumber:
if (nkeys < 1)
{
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 2f0d61985a5..11812b2984e 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -248,7 +248,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case LikeStrategyNumber:
qtrg = generate_wildcard_trgm(VARDATA(query),
querysize - VARHDRSZ);
@@ -257,7 +257,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case RegExpStrategyNumber:
qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
&graph, fcinfo->flinfo->fn_mcxt);
@@ -345,7 +345,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case LikeStrategyNumber:
case EqualStrategyNumber:
/* Wildcard and equal search are inexact */
@@ -387,7 +387,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
#ifndef IGNORECASE
elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
#endif
- /* FALL THRU */
+ pg_fallthrough;
case RegExpStrategyNumber:
/* Regexp search is inexact */
*recheck = true;
@@ -701,10 +701,13 @@ gtrgm_penalty(PG_FUNCTION_ARGS)
if (ISARRKEY(newval))
{
char *cache = (char *) fcinfo->flinfo->fn_extra;
- TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
+ TRGM *cachedVal = NULL;
Size newvalsize = VARSIZE(newval);
BITVECP sign;
+ if (cache != NULL)
+ cachedVal = (TRGM *) (cache + MAXALIGN(siglen));
+
/*
* Cache the sign data across multiple calls with the same newval.
*/
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 81182a15e07..5fba594b61f 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -66,6 +66,78 @@ typedef uint8 TrgmBound;
#define WORD_SIMILARITY_STRICT 0x02 /* force bounds of extent to match
* word bounds */
+/*
+ * A growable array of trigrams
+ *
+ * The actual array of trigrams is in 'datum'. Note that the other fields in
+ * 'datum', i.e. datum->flags and the varlena length, are not kept up to date
+ * when items are added to the growable array. We merely reserve the space
+ * for them here. You must fill those other fields before using 'datum' as a
+ * proper TRGM datum.
+ */
+typedef struct
+{
+ TRGM *datum; /* trigram array */
+ int length; /* number of trigrams in the array */
+ int allocated; /* allocated size of 'datum' (# of trigrams) */
+} growable_trgm_array;
+
+/*
+ * Allocate a new growable array.
+ *
+ * 'slen' is the size of the source string that we're extracting the trigrams
+ * from. It is used to choose the initial size of the array.
+ */
+static void
+init_trgm_array(growable_trgm_array *arr, int slen)
+{
+ size_t init_size;
+
+ /*
+ * In the extreme case, the input string consists entirely of one
+ * character words, like "a b c", where each word is expanded to two
+ * trigrams. This is not a strict upper bound though, because when
+ * IGNORECASE is defined, we convert the input string to lowercase before
+ * extracting the trigrams, which in rare cases can expand one input
+ * character into multiple characters.
+ */
+ init_size = (size_t) slen + 1;
+
+ /*
+ * Guard against possible overflow in the palloc request. (We don't worry
+ * about the additive constants, since palloc can detect requests that are
+ * a little above MaxAllocSize --- we just need to prevent integer
+ * overflow in the multiplications.)
+ */
+ if (init_size > MaxAllocSize / sizeof(trgm))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory")));
+
+ arr->datum = palloc(CALCGTSIZE(ARRKEY, init_size));
+ arr->allocated = init_size;
+ arr->length = 0;
+}
+
+/* Make sure the array can hold at least 'needed' more trigrams */
+static void
+enlarge_trgm_array(growable_trgm_array *arr, int needed)
+{
+ size_t new_needed = (size_t) arr->length + needed;
+
+ if (new_needed > arr->allocated)
+ {
+ /* Guard against possible overflow, like in init_trgm_array */
+ if (new_needed > MaxAllocSize / sizeof(trgm))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory")));
+
+ arr->datum = repalloc(arr->datum, CALCGTSIZE(ARRKEY, new_needed));
+ arr->allocated = new_needed;
+ }
+}
+
/*
* Module load callback
*/
@@ -220,22 +292,31 @@ comp_trgm(const void *a, const void *b)
* endword points to the character after word
*/
static char *
-find_word(char *str, int lenstr, char **endword, int *charlen)
+find_word(char *str, int lenstr, char **endword)
{
char *beginword = str;
+ const char *endstr = str + lenstr;
- while (beginword - str < lenstr && !ISWORDCHR(beginword))
- beginword += pg_mblen(beginword);
+ while (beginword < endstr)
+ {
+ int clen = pg_mblen_range(beginword, endstr);
- if (beginword - str >= lenstr)
+ if (ISWORDCHR(beginword, clen))
+ break;
+ beginword += clen;
+ }
+
+ if (beginword >= endstr)
return NULL;
*endword = beginword;
- *charlen = 0;
- while (*endword - str < lenstr && ISWORDCHR(*endword))
+ while (*endword < endstr)
{
- *endword += pg_mblen(*endword);
- (*charlen)++;
+ int clen = pg_mblen_range(*endword, endstr);
+
+ if (!ISWORDCHR(*endword, clen))
+ break;
+ *endword += clen;
}
return beginword;
@@ -269,78 +350,138 @@ compact_trigram(trgm *tptr, char *str, int bytelen)
}
/*
- * Adds trigrams from words (already padded).
+ * Adds trigrams from the word in 'str' (already padded if necessary).
*/
-static trgm *
-make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
+static void
+make_trigrams(growable_trgm_array *dst, char *str, int bytelen)
{
+ trgm *tptr;
char *ptr = str;
- if (charlen < 3)
- return tptr;
+ if (bytelen < 3)
+ return;
- if (bytelen > charlen)
- {
- /* Find multibyte character boundaries and apply compact_trigram */
- int lenfirst = pg_mblen(str),
- lenmiddle = pg_mblen(str + lenfirst),
- lenlast = pg_mblen(str + lenfirst + lenmiddle);
+ /* max number of trigrams = strlen - 2 */
+ enlarge_trgm_array(dst, bytelen - 2);
+ tptr = GETARR(dst->datum) + dst->length;
- while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
+ if (pg_encoding_max_length(GetDatabaseEncoding()) == 1)
+ {
+ while (ptr < str + bytelen - 2)
{
- compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
-
- ptr += lenfirst;
+ CPTRGM(tptr, ptr);
+ ptr++;
tptr++;
-
- lenfirst = lenmiddle;
- lenmiddle = lenlast;
- lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
}
}
else
{
- /* Fast path when there are no multibyte characters */
- Assert(bytelen == charlen);
+ int lenfirst,
+ lenmiddle,
+ lenlast;
+ char *endptr;
- while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
+ /*
+ * Fast path as long as there are no multibyte characters
+ */
+ if (!IS_HIGHBIT_SET(ptr[0]) && !IS_HIGHBIT_SET(ptr[1]))
{
- CPTRGM(tptr, ptr);
- ptr++;
+ while (!IS_HIGHBIT_SET(ptr[2]))
+ {
+ CPTRGM(tptr, ptr);
+ ptr++;
+ tptr++;
+
+ if (ptr == str + bytelen - 2)
+ goto done;
+ }
+
+ lenfirst = 1;
+ lenmiddle = 1;
+ lenlast = pg_mblen_unbounded(ptr + 2);
+ }
+ else
+ {
+ lenfirst = pg_mblen_unbounded(ptr);
+ if (ptr + lenfirst >= str + bytelen)
+ goto done;
+ lenmiddle = pg_mblen_unbounded(ptr + lenfirst);
+ if (ptr + lenfirst + lenmiddle >= str + bytelen)
+ goto done;
+ lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle);
+ }
+
+ /*
+ * Slow path to handle any remaining multibyte characters
+ *
+ * As we go, 'ptr' points to the beginning of the current
+ * three-character string and 'endptr' points to just past it.
+ */
+ endptr = ptr + lenfirst + lenmiddle + lenlast;
+ while (endptr <= str + bytelen)
+ {
+ compact_trigram(tptr, ptr, endptr - ptr);
tptr++;
+
+ /* Advance to the next character */
+ if (endptr == str + bytelen)
+ break;
+ ptr += lenfirst;
+ lenfirst = lenmiddle;
+ lenmiddle = lenlast;
+ lenlast = pg_mblen_unbounded(endptr);
+ endptr += lenlast;
}
}
- return tptr;
+done:
+ dst->length = tptr - GETARR(dst->datum);
+ Assert(dst->length <= dst->allocated);
}
/*
* Make array of trigrams without sorting and removing duplicate items.
*
- * trg: where to return the array of trigrams.
+ * dst: where to return the array of trigrams.
* str: source string, of length slen bytes.
- * bounds: where to return bounds of trigrams (if needed).
- *
- * Returns length of the generated array.
+ * bounds_p: where to return bounds of trigrams (if needed).
*/
-static int
-generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
+static void
+generate_trgm_only(growable_trgm_array *dst, char *str, int slen, TrgmBound **bounds_p)
{
- trgm *tptr;
+ size_t buflen;
char *buf;
- int charlen,
- bytelen;
+ int bytelen;
char *bword,
*eword;
+ TrgmBound *bounds = NULL;
+ int bounds_allocated = 0;
- if (slen + LPADDING + RPADDING < 3 || slen == 0)
- return 0;
+ init_trgm_array(dst, slen);
- tptr = trg;
+ /*
+ * If requested, allocate an array for the bounds, with the same size as
+ * the trigram array.
+ */
+ if (bounds_p)
+ {
+ bounds_allocated = dst->allocated;
+ bounds = *bounds_p = palloc0_array(TrgmBound, bounds_allocated);
+ }
- /* Allocate a buffer for case-folded, blank-padded words */
- buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
+ if (slen + LPADDING + RPADDING < 3 || slen == 0)
+ return;
+ /*
+ * Allocate a buffer for case-folded, blank-padded words.
+ *
+ * As an initial guess, allocate a buffer large enough to hold the
+ * original string with padding, which is always enough when compiled with
+ * !IGNORECASE. If the case-folding produces a string longer than the
+ * original, we'll grow the buffer.
+ */
+ buflen = (size_t) slen + 4;
+ buf = (char *) palloc(buflen);
if (LPADDING > 0)
{
*buf = ' ';
@@ -349,52 +490,59 @@ generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
}
eword = str;
- while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
+ while ((bword = find_word(eword, slen - (eword - str), &eword)) != NULL)
{
+ int oldlen;
+
+ /* Convert word to lower case before extracting trigrams from it */
#ifdef IGNORECASE
- bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
- bytelen = strlen(bword);
+ {
+ char *lowered;
+
+ lowered = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
+ bytelen = strlen(lowered);
+
+ /* grow the buffer if necessary */
+ if (bytelen > buflen - 4)
+ {
+ pfree(buf);
+ buflen = (size_t) bytelen + 4;
+ buf = (char *) palloc(buflen);
+ if (LPADDING > 0)
+ {
+ *buf = ' ';
+ if (LPADDING > 1)
+ *(buf + 1) = ' ';
+ }
+ }
+ memcpy(buf + LPADDING, lowered, bytelen);
+ pfree(lowered);
+ }
#else
bytelen = eword - bword;
-#endif
-
memcpy(buf + LPADDING, bword, bytelen);
-
-#ifdef IGNORECASE
- pfree(bword);
#endif
buf[LPADDING + bytelen] = ' ';
buf[LPADDING + bytelen + 1] = ' ';
/* Calculate trigrams marking their bounds if needed */
+ oldlen = dst->length;
+ make_trigrams(dst, buf, bytelen + LPADDING + RPADDING);
if (bounds)
- bounds[tptr - trg] |= TRGM_BOUND_LEFT;
- tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
- charlen + LPADDING + RPADDING);
- if (bounds)
- bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
+ {
+ if (bounds_allocated < dst->length)
+ {
+ bounds = repalloc0_array(bounds, TrgmBound, bounds_allocated, dst->allocated);
+ bounds_allocated = dst->allocated;
+ }
+
+ bounds[oldlen] |= TRGM_BOUND_LEFT;
+ bounds[dst->length - 1] |= TRGM_BOUND_RIGHT;
+ }
}
pfree(buf);
-
- return tptr - trg;
-}
-
-/*
- * Guard against possible overflow in the palloc requests below. (We
- * don't worry about the additive constants, since palloc can detect
- * requests that are a little above MaxAllocSize --- we just need to
- * prevent integer overflow in the multiplications.)
- */
-static void
-protect_out_of_mem(int slen)
-{
- if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
- (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("out of memory")));
}
/*
@@ -408,19 +556,14 @@ TRGM *
generate_trgm(char *str, int slen)
{
TRGM *trg;
+ growable_trgm_array arr;
int len;
- protect_out_of_mem(slen);
-
- trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
+ generate_trgm_only(&arr, str, slen, NULL);
+ len = arr.length;
+ trg = arr.datum;
trg->flag = ARRKEY;
- len = generate_trgm_only(GETARR(trg), str, slen, NULL);
- SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
-
- if (len == 0)
- return trg;
-
/*
* Make trigrams unique.
*/
@@ -675,8 +818,8 @@ calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
{
bool *found;
pos_trgm *ptrg;
- trgm *trg1;
- trgm *trg2;
+ growable_trgm_array trg1;
+ growable_trgm_array trg2;
int len1,
len2,
len,
@@ -685,27 +828,21 @@ calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
ulen1;
int *trg2indexes;
float4 result;
- TrgmBound *bounds;
-
- protect_out_of_mem(slen1 + slen2);
+ TrgmBound *bounds = NULL;
/* Make positional trigrams */
- trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
- trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
- if (flags & WORD_SIMILARITY_STRICT)
- bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
- else
- bounds = NULL;
- len1 = generate_trgm_only(trg1, str1, slen1, NULL);
- len2 = generate_trgm_only(trg2, str2, slen2, bounds);
+ generate_trgm_only(&trg1, str1, slen1, NULL);
+ len1 = trg1.length;
+ generate_trgm_only(&trg2, str2, slen2, (flags & WORD_SIMILARITY_STRICT) ? &bounds : NULL);
+ len2 = trg2.length;
- ptrg = make_positional_trgm(trg1, len1, trg2, len2);
+ ptrg = make_positional_trgm(GETARR(trg1.datum), len1, GETARR(trg2.datum), len2);
len = len1 + len2;
qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
- pfree(trg1);
- pfree(trg2);
+ pfree(trg1.datum);
+ pfree(trg2.datum);
/*
* Merge positional trigrams array: enumerate each trigram and find its
@@ -761,20 +898,20 @@ calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
* str: source string, of length lenstr bytes (need not be null-terminated)
* buf: where to return the substring (must be long enough)
* *bytelen: receives byte length of the found substring
- * *charlen: receives character length of the found substring
*
* Returns pointer to end+1 of the found substring in the source string.
- * Returns NULL if no word found (in which case buf, bytelen, charlen not set)
+ * Returns NULL if no word found (in which case buf, bytelen is not set)
*
* If the found word is bounded by non-word characters or string boundaries
* then this function will include corresponding padding spaces into buf.
*/
static const char *
get_wildcard_part(const char *str, int lenstr,
- char *buf, int *bytelen, int *charlen)
+ char *buf, int *bytelen)
{
const char *beginword = str;
const char *endword;
+ const char *endstr = str + lenstr;
char *s = buf;
bool in_leading_wildcard_meta = false;
bool in_trailing_wildcard_meta = false;
@@ -787,11 +924,13 @@ get_wildcard_part(const char *str, int lenstr,
* from this loop to the next one, since we may exit at a word character
* that is in_escape.
*/
- while (beginword - str < lenstr)
+ while (beginword < endstr)
{
+ clen = pg_mblen_range(beginword, endstr);
+
if (in_escape)
{
- if (ISWORDCHR(beginword))
+ if (ISWORDCHR(beginword, clen))
break;
in_escape = false;
in_leading_wildcard_meta = false;
@@ -802,12 +941,12 @@ get_wildcard_part(const char *str, int lenstr,
in_escape = true;
else if (ISWILDCARDCHAR(beginword))
in_leading_wildcard_meta = true;
- else if (ISWORDCHR(beginword))
+ else if (ISWORDCHR(beginword, clen))
break;
else
in_leading_wildcard_meta = false;
}
- beginword += pg_mblen(beginword);
+ beginword += clen;
}
/*
@@ -820,18 +959,13 @@ get_wildcard_part(const char *str, int lenstr,
* Add left padding spaces if preceding character wasn't wildcard
* meta-character.
*/
- *charlen = 0;
if (!in_leading_wildcard_meta)
{
if (LPADDING > 0)
{
*s++ = ' ';
- (*charlen)++;
if (LPADDING > 1)
- {
*s++ = ' ';
- (*charlen)++;
- }
}
}
@@ -840,15 +974,14 @@ get_wildcard_part(const char *str, int lenstr,
* string boundary. Strip escapes during copy.
*/
endword = beginword;
- while (endword - str < lenstr)
+ while (endword < endstr)
{
- clen = pg_mblen(endword);
+ clen = pg_mblen_range(endword, endstr);
if (in_escape)
{
- if (ISWORDCHR(endword))
+ if (ISWORDCHR(endword, clen))
{
memcpy(s, endword, clen);
- (*charlen)++;
s += clen;
}
else
@@ -873,10 +1006,9 @@ get_wildcard_part(const char *str, int lenstr,
in_trailing_wildcard_meta = true;
break;
}
- else if (ISWORDCHR(endword))
+ else if (ISWORDCHR(endword, clen))
{
memcpy(s, endword, clen);
- (*charlen)++;
s += clen;
}
else
@@ -894,12 +1026,8 @@ get_wildcard_part(const char *str, int lenstr,
if (RPADDING > 0)
{
*s++ = ' ';
- (*charlen)++;
if (RPADDING > 1)
- {
*s++ = ' ';
- (*charlen)++;
- }
}
}
@@ -918,24 +1046,21 @@ TRGM *
generate_wildcard_trgm(const char *str, int slen)
{
TRGM *trg;
- char *buf,
- *buf2;
- trgm *tptr;
+ growable_trgm_array arr;
+ char *buf;
int len,
- charlen,
bytelen;
const char *eword;
- protect_out_of_mem(slen);
-
- trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
- trg->flag = ARRKEY;
- SET_VARSIZE(trg, TRGMHDRSIZE);
-
if (slen + LPADDING + RPADDING < 3 || slen == 0)
+ {
+ trg = (TRGM *) palloc(TRGMHDRSIZE);
+ trg->flag = ARRKEY;
+ SET_VARSIZE(trg, TRGMHDRSIZE);
return trg;
+ }
- tptr = GETARR(trg);
+ init_trgm_array(&arr, slen);
/* Allocate a buffer for blank-padded, but not yet case-folded, words */
buf = palloc_array(char, slen + 4);
@@ -945,39 +1070,41 @@ generate_wildcard_trgm(const char *str, int slen)
*/
eword = str;
while ((eword = get_wildcard_part(eword, slen - (eword - str),
- buf, &bytelen, &charlen)) != NULL)
+ buf, &bytelen)) != NULL)
{
+ char *word;
+
#ifdef IGNORECASE
- buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
- bytelen = strlen(buf2);
+ word = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
+ bytelen = strlen(word);
#else
- buf2 = buf;
+ word = buf;
#endif
/*
* count trigrams
*/
- tptr = make_trigrams(tptr, buf2, bytelen, charlen);
+ make_trigrams(&arr, word, bytelen);
#ifdef IGNORECASE
- pfree(buf2);
+ pfree(word);
#endif
}
pfree(buf);
- if ((len = tptr - GETARR(trg)) == 0)
- return trg;
-
/*
* Make trigrams unique.
*/
+ trg = arr.datum;
+ len = arr.length;
if (len > 1)
{
qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
}
+ trg->flag = ARRKEY;
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
return trg;
diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c
index 1d1b5fe304d..efee4cf5fb4 100644
--- a/contrib/pg_trgm/trgm_regexp.c
+++ b/contrib/pg_trgm/trgm_regexp.c
@@ -483,7 +483,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph,
static void RE_compile(regex_t *regex, text *text_re,
int cflags, Oid collation);
static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA);
-static bool convertPgWchar(pg_wchar c, trgm_mb_char *result);
+static int convertPgWchar(pg_wchar c, trgm_mb_char *result);
static void transformGraph(TrgmNFA *trgmNFA);
static void processState(TrgmNFA *trgmNFA, TrgmState *state);
static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key);
@@ -807,10 +807,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
for (j = 0; j < charsCount; j++)
{
trgm_mb_char c;
+ int clen = convertPgWchar(chars[j], &c);
- if (!convertPgWchar(chars[j], &c))
+ if (!clen)
continue; /* ok to ignore it altogether */
- if (ISWORDCHR(c.bytes))
+ if (ISWORDCHR(c.bytes, clen))
colorInfo->wordChars[colorInfo->wordCharsCount++] = c;
else
colorInfo->containsNonWord = true;
@@ -822,13 +823,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
/*
* Convert pg_wchar to multibyte format.
- * Returns false if the character should be ignored completely.
+ * Returns 0 if the character should be ignored completely, else returns its
+ * byte length.
*/
-static bool
+static int
convertPgWchar(pg_wchar c, trgm_mb_char *result)
{
/* "s" has enough space for a multibyte character and a trailing NUL */
char s[MAX_MULTIBYTE_CHAR_LEN + 1];
+ int clen;
/*
* We can ignore the NUL character, since it can never appear in a PG text
@@ -836,11 +839,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
* reconstructing trigrams.
*/
if (c == 0)
- return false;
+ return 0;
/* Do the conversion, making sure the result is NUL-terminated */
memset(s, 0, sizeof(s));
- pg_wchar2mb_with_len(&c, s, 1);
+ clen = pg_wchar2mb_with_len(&c, s, 1);
/*
* In IGNORECASE mode, we can ignore uppercase characters. We assume that
@@ -857,12 +860,12 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
*/
#ifdef IGNORECASE
{
- char *lowerCased = str_tolower(s, strlen(s), DEFAULT_COLLATION_OID);
+ char *lowerCased = str_tolower(s, clen, DEFAULT_COLLATION_OID);
if (strcmp(lowerCased, s) != 0)
{
pfree(lowerCased);
- return false;
+ return 0;
}
pfree(lowerCased);
}
@@ -870,7 +873,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result)
/* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */
memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN);
- return true;
+ return clen;
}
diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out
index e10f1706015..d26f0ab7589 100644
--- a/contrib/pg_visibility/expected/pg_visibility.out
+++ b/contrib/pg_visibility/expected/pg_visibility.out
@@ -207,7 +207,7 @@ select pg_truncate_visibility_map('test_partition');
-- test the case where vacuum phase I does not need to modify the heap buffer
-- and only needs to set the VM
-create table test_vac_unmodified_heap(a int);
+create temp table test_vac_unmodified_heap(a int);
insert into test_vac_unmodified_heap values (1);
vacuum (freeze) test_vac_unmodified_heap;
select pg_visibility_map_summary('test_vac_unmodified_heap');
diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql
index 57af8a0c5b6..0888adb96a6 100644
--- a/contrib/pg_visibility/sql/pg_visibility.sql
+++ b/contrib/pg_visibility/sql/pg_visibility.sql
@@ -97,7 +97,7 @@ select pg_truncate_visibility_map('test_partition');
-- test the case where vacuum phase I does not need to modify the heap buffer
-- and only needs to set the VM
-create table test_vac_unmodified_heap(a int);
+create temp table test_vac_unmodified_heap(a int);
insert into test_vac_unmodified_heap values (1);
vacuum (freeze) test_vac_unmodified_heap;
select pg_visibility_map_summary('test_vac_unmodified_heap');
diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile
index 69afa375011..17d2b0c5ed1 100644
--- a/contrib/pgcrypto/Makefile
+++ b/contrib/pgcrypto/Makefile
@@ -44,7 +44,8 @@ REGRESS = init md5 sha1 hmac-md5 hmac-sha1 blowfish rijndael \
sha2 des 3des cast5 \
crypt-des crypt-md5 crypt-blowfish crypt-xdes \
pgp-armor pgp-decrypt pgp-encrypt pgp-encrypt-md5 $(CF_PGP_TESTS) \
- pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-info crypt-shacrypt
+ pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-pubkey-session \
+ pgp-info crypt-shacrypt
ifdef USE_PGXS
PG_CONFIG = pg_config
diff --git a/contrib/pgcrypto/crypt-sha.c b/contrib/pgcrypto/crypt-sha.c
index 7ec21771a83..e8f32bc3896 100644
--- a/contrib/pgcrypto/crypt-sha.c
+++ b/contrib/pgcrypto/crypt-sha.c
@@ -328,7 +328,7 @@ px_crypt_shacrypt(const char *pw, const char *salt, char *passwd, unsigned dstle
ereport(ERROR,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid character in salt string: \"%.*s\"",
- pg_mblen(ep), ep));
+ pg_mblen_cstr(ep), ep));
}
else
{
diff --git a/contrib/pgcrypto/expected/pgp-decrypt.out b/contrib/pgcrypto/expected/pgp-decrypt.out
index eb049ba9d44..8ce6466f2e9 100644
--- a/contrib/pgcrypto/expected/pgp-decrypt.out
+++ b/contrib/pgcrypto/expected/pgp-decrypt.out
@@ -315,7 +315,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
\xda39a3ee5e6b4b0d3255bfef95601890afd80709
(1 row)
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
-----BEGIN PGP MESSAGE-----
Comment: dat3.aes.sha1.mdc.s2k3.z0
@@ -387,6 +387,28 @@ ERROR: Wrong key or corrupt data
select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
NOTICE: dbg: parse_literal_data: data type=b
ERROR: Not text data
+-- NUL byte in text decrypt. Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
+-- --personal-compress-preferences uncompressed --textmode \
+-- --personal-cipher-preferences aes --no-emit-version --batch \
+-- --symmetric --passphrase key --armor
+do $$
+begin
+ perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
+vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
+=rV7z
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+ raise '%',
+ regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+ERROR: invalid byte sequence for encoding [REDACTED]: 0x00
+CONTEXT: PL/pgSQL function inline_code_block line 12 at RAISE
-- Decryption with a certain incorrect key yields an apparent BZip2-compressed
-- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
-- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/expected/pgp-decrypt_1.out b/contrib/pgcrypto/expected/pgp-decrypt_1.out
index 80a4c48613d..ee57ad43cb7 100644
--- a/contrib/pgcrypto/expected/pgp-decrypt_1.out
+++ b/contrib/pgcrypto/expected/pgp-decrypt_1.out
@@ -311,7 +311,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
\xda39a3ee5e6b4b0d3255bfef95601890afd80709
(1 row)
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
-----BEGIN PGP MESSAGE-----
Comment: dat3.aes.sha1.mdc.s2k3.z0
@@ -383,6 +383,28 @@ ERROR: Wrong key or corrupt data
select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
NOTICE: dbg: parse_literal_data: data type=b
ERROR: Not text data
+-- NUL byte in text decrypt. Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
+-- --personal-compress-preferences uncompressed --textmode \
+-- --personal-cipher-preferences aes --no-emit-version --batch \
+-- --symmetric --passphrase key --armor
+do $$
+begin
+ perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
+vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
+=rV7z
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+ raise '%',
+ regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+ERROR: invalid byte sequence for encoding [REDACTED]: 0x00
+CONTEXT: PL/pgSQL function inline_code_block line 12 at RAISE
-- Decryption with a certain incorrect key yields an apparent BZip2-compressed
-- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
-- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/expected/pgp-pubkey-session.out b/contrib/pgcrypto/expected/pgp-pubkey-session.out
new file mode 100644
index 00000000000..e57cb8fab99
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-pubkey-session.out
@@ -0,0 +1,47 @@
+-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/pgp_session_data.py.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
+da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
+94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
+0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
+3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
+a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
+b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
+8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
+0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
+57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
+ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
+67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
+060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
+2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
+5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
+135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
+'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
+9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
+f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
+07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
+23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
+f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
+138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
+c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
+18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
+e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
+de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
+239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
+ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
+9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
+74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
+3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
+58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
+507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
+183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
+25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
+3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
+cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
+ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
+7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
+487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
+9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
+ERROR: Session key too big
diff --git a/contrib/pgcrypto/meson.build b/contrib/pgcrypto/meson.build
index c9c48f16f90..4f255c8cb05 100644
--- a/contrib/pgcrypto/meson.build
+++ b/contrib/pgcrypto/meson.build
@@ -52,6 +52,7 @@ pgcrypto_regress = [
'pgp-encrypt-md5',
'pgp-pubkey-decrypt',
'pgp-pubkey-encrypt',
+ 'pgp-pubkey-session',
'pgp-info',
'crypt-shacrypt'
]
diff --git a/contrib/pgcrypto/pgp-info.c b/contrib/pgcrypto/pgp-info.c
index 83dc60486bd..6c2be4713ab 100644
--- a/contrib/pgcrypto/pgp-info.c
+++ b/contrib/pgcrypto/pgp-info.c
@@ -169,7 +169,7 @@ pgp_get_keyid(MBuf *pgp_data, char *dst)
break;
case PGP_PKT_SYMENCRYPTED_SESSKEY:
got_symenc_key++;
- /* fall through */
+ pg_fallthrough;
case PGP_PKT_SIGNATURE:
case PGP_PKT_MARKER:
case PGP_PKT_TRUST:
diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c
index 3e47b9364ab..d3e7895b0d9 100644
--- a/contrib/pgcrypto/pgp-pgsql.c
+++ b/contrib/pgcrypto/pgp-pgsql.c
@@ -631,6 +631,7 @@ pgp_sym_decrypt_text(PG_FUNCTION_ARGS)
arg = PG_GETARG_TEXT_PP(2);
res = decrypt_internal(0, 1, data, key, NULL, arg);
+ pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
PG_FREE_IF_COPY(data, 0);
PG_FREE_IF_COPY(key, 1);
@@ -732,6 +733,7 @@ pgp_pub_decrypt_text(PG_FUNCTION_ARGS)
arg = PG_GETARG_TEXT_PP(3);
res = decrypt_internal(1, 1, data, key, psw, arg);
+ pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false);
PG_FREE_IF_COPY(data, 0);
PG_FREE_IF_COPY(key, 1);
diff --git a/contrib/pgcrypto/pgp-pubdec.c b/contrib/pgcrypto/pgp-pubdec.c
index a0a5738a40e..2a13aa3e6ad 100644
--- a/contrib/pgcrypto/pgp-pubdec.c
+++ b/contrib/pgcrypto/pgp-pubdec.c
@@ -157,6 +157,7 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
uint8 *msg;
int msglen;
PGP_MPI *m;
+ unsigned sess_key_len;
pk = ctx->pub_key;
if (pk == NULL)
@@ -220,11 +221,19 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt)
if (res < 0)
goto out;
+ sess_key_len = msglen - 3;
+ if (sess_key_len > PGP_MAX_KEY)
+ {
+ px_debug("incorrect session key length=%u", sess_key_len);
+ res = PXE_PGP_KEY_TOO_BIG;
+ goto out;
+ }
+
/*
* got sesskey
*/
ctx->cipher_algo = *msg;
- ctx->sess_key_len = msglen - 3;
+ ctx->sess_key_len = sess_key_len;
memcpy(ctx->sess_key, msg + 1, ctx->sess_key_len);
out:
diff --git a/contrib/pgcrypto/px.c b/contrib/pgcrypto/px.c
index 4d668d4e496..f08bc498ac8 100644
--- a/contrib/pgcrypto/px.c
+++ b/contrib/pgcrypto/px.c
@@ -65,6 +65,7 @@ static const struct error_desc px_err_list[] = {
{PXE_PGP_UNEXPECTED_PKT, "Unexpected packet in key data"},
{PXE_PGP_MATH_FAILED, "Math operation failed"},
{PXE_PGP_SHORT_ELGAMAL_KEY, "Elgamal keys must be at least 1024 bits long"},
+ {PXE_PGP_KEY_TOO_BIG, "Session key too big"},
{PXE_PGP_UNKNOWN_PUBALGO, "Unknown public-key encryption algorithm"},
{PXE_PGP_WRONG_KEY, "Wrong key"},
{PXE_PGP_MULTIPLE_KEYS,
diff --git a/contrib/pgcrypto/px.h b/contrib/pgcrypto/px.h
index 4b81fceab8e..a09533a3582 100644
--- a/contrib/pgcrypto/px.h
+++ b/contrib/pgcrypto/px.h
@@ -75,7 +75,7 @@
/* -108 is unused */
#define PXE_PGP_MATH_FAILED -109
#define PXE_PGP_SHORT_ELGAMAL_KEY -110
-/* -111 is unused */
+#define PXE_PGP_KEY_TOO_BIG -111
#define PXE_PGP_UNKNOWN_PUBALGO -112
#define PXE_PGP_WRONG_KEY -113
#define PXE_PGP_MULTIPLE_KEYS -114
diff --git a/contrib/pgcrypto/scripts/pgp_session_data.py b/contrib/pgcrypto/scripts/pgp_session_data.py
new file mode 100644
index 00000000000..999350bb2bc
--- /dev/null
+++ b/contrib/pgcrypto/scripts/pgp_session_data.py
@@ -0,0 +1,491 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Generate PGP data to check the session key length of the input data provided
+# to pgp_pub_decrypt_bytea().
+#
+# First, the crafted data is generated from valid RSA data, freshly generated
+# by this script each time it is run, see generate_rsa_keypair().
+# Second, the crafted PGP data is built, see build_message_data() and
+# build_key_data(). Finally, the resulting SQL script is generated.
+#
+# This script generates in stdout the SQL file that is used in the regression
+# tests of pgcrypto. The following command can be used to regenerate the file
+# which should never be manually manipulated:
+# python3 scripts/pgp_session_data.py > sql/pgp-pubkey-session.sql
+
+import os
+import re
+import struct
+import secrets
+import sys
+import time
+
+# pwn for binary manipulation (p32, p64)
+from pwn import *
+
+# Cryptographic libraries, to craft the PGP data.
+from Crypto.Cipher import AES
+from Crypto.PublicKey import RSA
+from Crypto.Util.number import inverse
+
+# AES key used for session key encryption (16 bytes for AES-128)
+AES_KEY = b'\x01' * 16
+
+def generate_rsa_keypair(key_size: int = 2048) -> dict:
+ """
+ Generate a fresh RSA key pair.
+
+ The generated key includes all components needed for PGP operations:
+ - n: public modulus (p * q)
+ - e: public exponent (typically 65537)
+ - d: private exponent (e^-1 mod phi(n))
+ - p, q: prime factors of n
+ - u: coefficient (p^-1 mod q) for CRT optimization
+
+ The caller can pass the wanted key size in input, for a default of 2048
+ bytes. This function returns the RSA key components, after performing
+ some validation on them.
+ """
+
+ start_time = time.time()
+
+ # Generate RSA key
+ key = RSA.generate(key_size)
+
+ # Extract all key components
+ rsa_components = {
+ 'n': key.n, # Public modulus (p * q)
+ 'e': key.e, # Public exponent (typically 65537)
+ 'd': key.d, # Private exponent (e^-1 mod phi(n))
+ 'p': key.p, # First prime factor
+ 'q': key.q, # Second prime factor
+ 'u': inverse(key.p, key.q) # Coefficient for CRT: p^-1 mod q
+ }
+
+ # Validate key components for correctness
+ validate_rsa_key(rsa_components)
+
+ return rsa_components
+
+def validate_rsa_key(rsa: dict) -> None:
+ """
+ Validate a generated RSA key.
+
+ This function performs basic validation to ensure the RSA key is properly
+ constructed and all components are consistent, at least mathematically.
+
+ Validations performed:
+ 1. n = p * q (modulus is product of primes)
+ 2. gcd(e, phi(n)) = 1 (public exponent is coprime to phi(n))
+ 3. (d * e) mod(phi(n)) = 1 (private exponent is multiplicative inverse)
+ 4. (u * p) (mod q) = 1 (coefficient is correct for CRT)
+ """
+
+ n, e, d, p, q, u = rsa['n'], rsa['e'], rsa['d'], rsa['p'], rsa['q'], rsa['u']
+
+ # Check that n = p * q
+ if n != p * q:
+ raise ValueError("RSA validation failed: n <> p * q")
+
+ # Check that p and q are different
+ if p == q:
+ raise ValueError("RSA validation failed: p = q (not allowed)")
+
+ # Calculate phi(n) = (p-1)(q-1)
+ phi_n = (p - 1) * (q - 1)
+
+ # Check that gcd(e, phi(n)) = 1
+ def gcd(a, b):
+ while b:
+ a, b = b, a % b
+ return a
+
+ if gcd(e, phi_n) != 1:
+ raise ValueError("RSA validation failed: gcd(e, phi(n)) <> 1")
+
+ # Check that (d * e) mod(phi(n)) = 1
+ if (d * e) % phi_n != 1:
+ raise ValueError("RSA validation failed: d * e <> 1 (mod phi(n))")
+
+ # Check that (u * p) (mod q) = 1
+ if (u * p) % q != 1:
+ raise ValueError("RSA validation failed: u * p <> 1 (mod q)")
+
+def mpi_encode(x: int) -> bytes:
+ """
+ Encode an integer as an OpenPGP Multi-Precision Integer (MPI).
+
+ Format (RFC 4880, Section 3.2):
+ - 2 bytes: bit length of the integer (big-endian)
+ - N bytes: the integer in big-endian format
+
+ This is used to encode RSA key components (n, e, d, p, q, u) in PGP
+ packets.
+
+ The integer to encode is given in input, returning an MPI-encoded
+ integer.
+
+ For example:
+ mpi_encode(65537) -> b'\x00\x11\x01\x00\x01'
+ (17 bits, value 0x010001)
+ """
+ if x < 0:
+ raise ValueError("MPI cannot encode negative integers")
+
+ if x == 0:
+ # Special case: zero has 0 bits and empty magnitude
+ bits = 0
+ mag = b""
+ else:
+ # Calculate bit length and convert to bytes
+ bits = x.bit_length()
+ mag = x.to_bytes((bits + 7) // 8, 'big')
+
+ # Pack: 2-byte bit length + magnitude bytes
+ return struct.pack('>H', bits) + mag
+
+def new_packet(tag: int, payload: bytes) -> bytes:
+ """
+ Create a new OpenPGP packet with a proper header.
+
+ OpenPGP packet format (RFC 4880, Section 4.2):
+ - New packet format: 0xC0 | tag
+ - Length encoding depends on payload size:
+ * 0-191: single byte
+ * 192-8383: two bytes (192 + ((length - 192) >> 8), (length - 192) & 0xFF)
+ * 8384+: five bytes (0xFF + 4-byte big-endian length)
+
+ The packet is built from a "tag" (1-63) and some "payload" data. The
+ result generated is a complete OpenPGP packet.
+
+ For example:
+ new_packet(1, b'data') -> b'\xC1\x04data'
+ (Tag 1, length 4, payload 'data')
+ """
+ # New packet format: set bit 7 and 6, clear bit 5, tag in bits 0-5
+ first = 0xC0 | (tag & 0x3F)
+ ln = len(payload)
+
+ # Encode length according to OpenPGP specification
+ if ln <= 191:
+ # Single byte length for small packets
+ llen = bytes([ln])
+ elif ln <= 8383:
+ # Two-byte length for medium packets
+ ln2 = ln - 192
+ llen = bytes([192 + (ln2 >> 8), ln2 & 0xFF])
+ else:
+ # Five-byte length for large packets
+ llen = bytes([255]) + struct.pack('>I', ln)
+
+ return bytes([first]) + llen + payload
+
+def build_key_data(rsa: dict) -> bytes:
+ """
+ Build the key data, containing an RSA private key.
+
+ The RSA contents should have been generated previously.
+
+ Format (see RFC 4880, Section 5.5.3):
+ - 1 byte: version (4)
+ - 4 bytes: creation time (current Unix timestamp)
+ - 1 byte: public key algorithm (2 = RSA encrypt)
+ - MPI: RSA public modulus n
+ - MPI: RSA public exponent e
+ - 1 byte: string-to-key usage (0 = no encryption)
+ - MPI: RSA private exponent d
+ - MPI: RSA prime p
+ - MPI: RSA prime q
+ - MPI: RSA coefficient u = p^-1 mod q
+ - 2 bytes: checksum of private key material
+
+ This function takes a set of RSA key components in input (n, e, d, p, q, u)
+ and returns a secret key packet.
+ """
+
+ # Public key portion
+ ver = bytes([4]) # Version 4 key
+ ctime = struct.pack('>I', int(time.time())) # Current Unix timestamp
+ algo = bytes([2]) # RSA encrypt algorithm
+ n_mpi = mpi_encode(rsa['n']) # Public modulus
+ e_mpi = mpi_encode(rsa['e']) # Public exponent
+ pub = ver + ctime + algo + n_mpi + e_mpi
+
+ # Private key portion
+ hide_type = bytes([0]) # No string-to-key encryption
+ d_mpi = mpi_encode(rsa['d']) # Private exponent
+ p_mpi = mpi_encode(rsa['p']) # Prime p
+ q_mpi = mpi_encode(rsa['q']) # Prime q
+ u_mpi = mpi_encode(rsa['u']) # Coefficient u = p^-1 mod q
+
+ # Calculate checksum of private key material (simple sum mod 65536)
+ private_data = d_mpi + p_mpi + q_mpi + u_mpi
+ cksum = sum(private_data) & 0xFFFF
+
+ secret = hide_type + private_data + struct.pack('>H', cksum)
+ payload = pub + secret
+
+ return new_packet(7, payload)
+
+def pgp_cfb_encrypt_resync(key, plaintext):
+ """
+ Implement OpenPGP CFB mode with resync.
+
+ OpenPGP CFB mode is a variant of standard CFB with a resync operation
+ after the first two blocks.
+
+ Algorithm (RFC 4880, Section 13.9):
+ 1. Block 1: FR=zeros, encrypt full block_size bytes
+ 2. Block 2: FR=block1, encrypt only 2 bytes
+ 3. Resync: FR = block1[2:] + block2
+ 4. Remaining blocks: standard CFB mode
+
+ This function uses the following arguments:
+ - key: AES encryption key (16 bytes for AES-128)
+ - plaintext: Data to encrypt
+ """
+ block_size = 16 # AES block size
+ cipher = AES.new(key[:16], AES.MODE_ECB) # Use ECB for manual CFB
+ ciphertext = b''
+
+ # Block 1: FR=zeros, encrypt full 16 bytes
+ FR = b'\x00' * block_size
+ FRE = cipher.encrypt(FR) # Encrypt the feedback register
+ block1 = bytes(a ^ b for a, b in zip(FRE, plaintext[0:16]))
+ ciphertext += block1
+
+ # Block 2: FR=block1, encrypt only 2 bytes
+ FR = block1
+ FRE = cipher.encrypt(FR)
+ block2 = bytes(a ^ b for a, b in zip(FRE[0:2], plaintext[16:18]))
+ ciphertext += block2
+
+ # Resync: FR = block1[2:16] + block2[0:2]
+ # This is the key difference from standard CFB mode
+ FR = block1[2:] + block2
+
+ # Block 3+: Continue with standard CFB mode
+ pos = 18
+ while pos < len(plaintext):
+ FRE = cipher.encrypt(FR)
+ chunk_len = min(block_size, len(plaintext) - pos)
+ chunk = plaintext[pos:pos+chunk_len]
+ enc_chunk = bytes(a ^ b for a, b in zip(FRE[:chunk_len], chunk))
+ ciphertext += enc_chunk
+
+ # Update feedback register for next iteration
+ if chunk_len == block_size:
+ FR = enc_chunk
+ else:
+ # Partial block: pad with old FR bytes
+ FR = enc_chunk + FR[chunk_len:]
+ pos += chunk_len
+
+ return ciphertext
+
+def build_literal_data_packet(data: bytes) -> bytes:
+ """
+ Build a literal data packet containing a message.
+
+ Format (RFC 4880, Section 5.9):
+ - 1 byte: data format ('b' = binary, 't' = text, 'u' = UTF-8 text)
+ - 1 byte: filename length (0 = no filename)
+ - N bytes: filename (empty in this case)
+ - 4 bytes: date (current Unix timestamp)
+ - M bytes: literal data
+
+ The data used to build the packet is given in input, with the generated
+ result returned.
+ """
+ body = bytes([
+ ord('b'), # Binary data format
+ 0, # Filename length (0 = no filename)
+ ]) + struct.pack('>I', int(time.time())) + data # Current timestamp + data
+
+ return new_packet(11, body)
+
+def build_symenc_data_packet(sess_key: bytes, cipher_algo: int, payload: bytes) -> bytes:
+ """
+ Build a symmetrically-encrypted data packet using AES-128-CFB.
+
+ This packet contains encrypted data using the session key. The format
+ includes a random prefix, for security (see RFC 4880, Section 5.7).
+
+ Packet structure:
+ - Random prefix (block_size bytes)
+ - Prefix repeat (last 2 bytes of prefix repeated)
+ - Encrypted literal data packet
+
+ This function uses the following set of arguments:
+ - sess_key: Session key for encryption
+ - cipher_algo: Cipher algorithm identifier (7 = AES-128)
+ - payload: Data to encrypt (wrapped in literal data packet)
+ """
+ block_size = 16 # AES-128 block size
+ key = sess_key[:16] # Use first 16 bytes for AES-128
+
+ # Create random prefix + repeat last 2 bytes (total 18 bytes)
+ # This is required by OpenPGP for integrity checking
+ prefix_random = secrets.token_bytes(block_size)
+ prefix = prefix_random + prefix_random[-2:] # 18 bytes total
+
+ # Wrap payload in literal data packet
+ literal_pkt = build_literal_data_packet(payload)
+
+ # Plaintext = prefix + literal data packet
+ plaintext = prefix + literal_pkt
+
+ # Encrypt using OpenPGP CFB mode with resync
+ ciphertext = pgp_cfb_encrypt_resync(key, plaintext)
+
+ return new_packet(9, ciphertext)
+
+def build_tag1_packet(rsa: dict, sess_key: bytes) -> bytes:
+ """
+ Build a public-key encrypted key.
+
+ This is a very important function, as it is able to create the packet
+ triggering the overflow check. This function can also be used to create
+ "legit" packet data.
+
+ Format (RFC 4880, Section 5.1):
+ - 1 byte: version (3)
+ - 8 bytes: key ID (0 = any key accepted)
+ - 1 byte: public key algorithm (2 = RSA encrypt)
+ - MPI: RSA-encrypted session key
+
+ This uses in arguments the generated RSA key pair, and the session key
+ to encrypt. The latter is manipulated to trigger the overflow.
+
+ This function returns a complete packet encrypted by a session key.
+ """
+
+ # Calculate RSA modulus size in bytes
+ n_bytes = (rsa['n'].bit_length() + 7) // 8
+
+ # Session key message format:
+ # - 1 byte: symmetric cipher algorithm (7 = AES-128)
+ # - N bytes: session key
+ # - 2 bytes: checksum (simple sum of session key bytes)
+ algo_byte = bytes([7]) # AES-128 algorithm identifier
+ cksum = sum(sess_key) & 0xFFFF # 16-bit checksum
+ M = algo_byte + sess_key + struct.pack('>H', cksum)
+
+ # PKCS#1 v1.5 padding construction
+ # Format: 0x02 || PS || 0x00 || M
+ # Total padded message must be exactly n_bytes long.
+ total_len = n_bytes # Total length must equal modulus size in bytes
+ ps_len = total_len - len(M) - 2 # Subtract 2 for 0x02 and 0x00 bytes
+
+ if ps_len < 8:
+ raise ValueError(f"Padding string too short ({ps_len} bytes); need at least 8 bytes. "
+ f"Message length: {len(M)}, Modulus size: {n_bytes} bytes")
+
+ # Create padding string with *ALL* bytes being 0xFF (no zero separator!)
+ PS = bytes([0xFF]) * ps_len
+
+ # Construct the complete padded message
+ # Normal PKCS#1 v1.5 padding: 0x02 || PS || 0x00 || M
+ padded = bytes([0x02]) + PS + bytes([0x00]) + M
+
+ # Verify padding construction
+ if len(padded) != n_bytes:
+ raise ValueError(f"Padded message length ({len(padded)}) doesn't match RSA modulus size ({n_bytes})")
+
+ # Convert padded message to integer and encrypt with RSA
+ m_int = int.from_bytes(padded, 'big')
+
+ # Ensure message is smaller than modulus (required for RSA)
+ if m_int >= rsa['n']:
+ raise ValueError("Padded message is larger than RSA modulus")
+
+ # RSA encryption: c = m^e mod n
+ c_int = pow(m_int, rsa['e'], rsa['n'])
+
+ # Encode encrypted result as MPI
+ c_mpi = mpi_encode(c_int)
+
+ # Build complete packet
+ ver = bytes([3]) # Version 3 packet
+ key_id = b"\x00" * 8 # Key ID (0 = any key accepted)
+ algo = bytes([2]) # RSA encrypt algorithm
+ payload = ver + key_id + algo + c_mpi
+
+ return new_packet(1, payload)
+
+def build_message_data(rsa: dict) -> bytes:
+ """
+ This function creates a crafted message, with a long session key
+ length.
+
+ This takes in input the RSA key components generated previously,
+ returning a concatenated set of PGP packets crafted for the purpose
+ of this test.
+ """
+
+ # Base prefix for session key (AES key + padding + size).
+ # Note that the crafted size is the important part for this test.
+ prefix = AES_KEY + b"\x00" * 16 + p32(0x10)
+
+ # Build encrypted data packet, legit.
+ sedata = build_symenc_data_packet(AES_KEY, cipher_algo=7, payload=b"\x0a\x00")
+
+ # Build multiple packets
+ packets = [
+ # First packet, legit.
+ build_tag1_packet(rsa, prefix),
+
+ # Encrypted data packet, legit.
+ sedata,
+
+ # Second packet: information payload.
+ #
+ # This packet contains a longer-crafted session key, able to trigger
+ # the overflow check in pgcrypto. This is the critical part, and
+ # and you are right to pay a lot of attention here if you are
+ # reading this code.
+ build_tag1_packet(rsa, prefix)
+ ]
+
+ return b"".join(packets)
+
+def main():
+ # Default key size.
+ # This number can be set to a higher number if wanted, like 4096. We
+ # just do not need to do that here.
+ key_size = 2048
+
+ # Generate fresh RSA key pair
+ rsa = generate_rsa_keypair(key_size)
+
+ # Generate the message data.
+ print("### Building message data", file=sys.stderr)
+ message_data = build_message_data(rsa)
+
+ # Build the key containing the RSA private key
+ print("### Building key data", file=sys.stderr)
+ key_data = build_key_data(rsa)
+
+ # Convert to hexadecimal, for the bytea used in the SQL file.
+ message_data = message_data.hex()
+ key_data = key_data.hex()
+
+ # Split each value into lines of 72 characters, for readability.
+ message_data = re.sub("(.{72})", "\\1\n", message_data, 0, re.DOTALL)
+ key_data = re.sub("(.{72})", "\\1\n", key_data, 0, re.DOTALL)
+
+ # Get the script filename for documentation
+ file_basename = os.path.basename(__file__)
+
+ # Output the SQL test case
+ print(f'''-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/{file_basename}.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\\x{message_data}'::bytea,
+'\\x{key_data}'::bytea);''',
+ file=sys.stdout)
+
+if __name__ == "__main__":
+ main()
diff --git a/contrib/pgcrypto/sql/pgp-decrypt.sql b/contrib/pgcrypto/sql/pgp-decrypt.sql
index 49a0267bbcb..b499bf757b0 100644
--- a/contrib/pgcrypto/sql/pgp-decrypt.sql
+++ b/contrib/pgcrypto/sql/pgp-decrypt.sql
@@ -228,7 +228,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
-----END PGP MESSAGE-----
'), '0123456789abcdefghij'), 'sha1');
-select digest(pgp_sym_decrypt(dearmor('
+select digest(pgp_sym_decrypt_bytea(dearmor('
-----BEGIN PGP MESSAGE-----
Comment: dat3.aes.sha1.mdc.s2k3.z0
@@ -282,6 +282,27 @@ VsxxqLSPzNLAeIspJk5G
-- Routine text/binary mismatch.
select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
+-- NUL byte in text decrypt. Ciphertext source:
+-- printf 'a\x00\xc' | gpg --homedir /nonexistent \
+-- --personal-compress-preferences uncompressed --textmode \
+-- --personal-cipher-preferences aes --no-emit-version --batch \
+-- --symmetric --passphrase key --armor
+do $$
+begin
+ perform pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCXLc8pozB10Fg0jQBVUID59TLvWutJp0j6eh9ZgjqIRzdYaIymFB8y4XH
+vu0YlJP5D5BX7yqZ+Pry7TlDmiFO
+=rV7z
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+exception when others then
+ raise '%',
+ regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]');
+end
+$$;
+
-- Decryption with a certain incorrect key yields an apparent BZip2-compressed
-- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
-- until the random prefix gave rise to that property.
diff --git a/contrib/pgcrypto/sql/pgp-pubkey-session.sql b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
new file mode 100644
index 00000000000..51792f1f4d8
--- /dev/null
+++ b/contrib/pgcrypto/sql/pgp-pubkey-session.sql
@@ -0,0 +1,46 @@
+-- Test for overflow with session key at decrypt.
+-- Data automatically generated by scripts/pgp_session_data.py.
+-- See this file for details explaining how this data is generated.
+SELECT pgp_pub_decrypt_bytea(
+'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1
+da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30
+94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd
+0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616
+3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10
+a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7
+b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d
+8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc
+0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494
+57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599
+ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3
+67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5
+060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56
+2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175
+5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d
+135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea,
+'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad
+9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f
+f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12
+07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1
+23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709
+f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c
+138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4
+c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5
+18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847
+e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9
+de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0
+239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0
+ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9
+9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e
+74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c
+3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8
+58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549
+507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd
+183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302
+25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45
+3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103
+cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03
+ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8
+7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8
+487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75
+9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea);
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 487a1a23170..add673a4776 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -150,7 +150,8 @@ static void pgfdw_subxact_callback(SubXactEvent event,
SubTransactionId mySubid,
SubTransactionId parentSubid,
void *arg);
-static void pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue);
+static void pgfdw_inval_callback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static void pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry);
static void pgfdw_reset_xact_state(ConnCacheEntry *entry, bool toplevel);
static bool pgfdw_cancel_query(PGconn *conn);
@@ -1309,7 +1310,7 @@ pgfdw_subxact_callback(SubXactEvent event, SubTransactionId mySubid,
* individual option values, but it seems too much effort for the gain.
*/
static void
-pgfdw_inval_callback(Datum arg, int cacheid, uint32 hashvalue)
+pgfdw_inval_callback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS scan;
ConnCacheEntry *entry;
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 6066510c7c0..2ccb72c539a 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -698,12 +698,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- Op
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = (- "C 1")))
(3 rows)
-EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr
- QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
+EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS DISTINCT FROM c3; -- DistinctExpr
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------
Foreign Scan on public.ft1 t1
Output: c1, c2, c3, c4, c5, c6, c7, c8
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (((c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL)))
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c3 IS DISTINCT FROM c3))
(3 rows)
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr
@@ -6503,20 +6503,31 @@ UPDATE ft2 d SET c2 = CASE WHEN random() >= 0 THEN d.c2 ELSE 0 END
ALTER SERVER loopback OPTIONS (DROP extensions);
INSERT INTO ft2 (c1,c2,c3)
SELECT id, id % 10, to_char(id, 'FM00000') FROM generate_series(2001, 2010) id;
+-- this will do a remote seqscan, causing unstable result order, so sort
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *; -- can't be pushed down
- QUERY PLAN
-----------------------------------------------------------------------------------------------------------
- Update on public.ft2
- Output: c1, c2, c3, c4, c5, c6, c7, c8
- Remote SQL: UPDATE "S 1"."T 1" SET c3 = $2 WHERE ctid = $1 RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
- -> Foreign Scan on public.ft2
- Output: 'bar'::text, ctid, ft2.*
- Filter: (postgres_fdw_abs(ft2.c1) > 2000)
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" FOR UPDATE
-(7 rows)
+WITH cte AS (
+ UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *
+) SELECT * FROM cte ORDER BY c1; -- can't be pushed down
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------------
+ Sort
+ Output: cte.c1, cte.c2, cte.c3, cte.c4, cte.c5, cte.c6, cte.c7, cte.c8
+ Sort Key: cte.c1
+ CTE cte
+ -> Update on public.ft2
+ Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8
+ Remote SQL: UPDATE "S 1"."T 1" SET c3 = $2 WHERE ctid = $1 RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
+ -> Foreign Scan on public.ft2
+ Output: 'bar'::text, ft2.ctid, ft2.*
+ Filter: (postgres_fdw_abs(ft2.c1) > 2000)
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" FOR UPDATE
+ -> CTE Scan on cte
+ Output: cte.c1, cte.c2, cte.c3, cte.c4, cte.c5, cte.c6, cte.c7, cte.c8
+(13 rows)
-UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *;
+WITH cte AS (
+ UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *
+) SELECT * FROM cte ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+----+-----+----+----+----+------------+----
2001 | 1 | bar | | | | ft2 |
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 3572689e33b..60d90329a65 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -1856,7 +1856,7 @@ postgresPlanForeignModify(PlannerInfo *root,
returningList = (List *) list_nth(plan->returningLists, subplan_index);
/*
- * ON CONFLICT DO UPDATE and DO NOTHING case with inference specification
+ * ON CONFLICT DO NOTHING/SELECT/UPDATE with inference specification
* should have already been rejected in the optimizer, as presently there
* is no way to recognize an arbiter index on a foreign table. Only DO
* NOTHING is supported without an inference specification.
diff --git a/contrib/postgres_fdw/shippable.c b/contrib/postgres_fdw/shippable.c
index d32d3d0e461..250f54fea32 100644
--- a/contrib/postgres_fdw/shippable.c
+++ b/contrib/postgres_fdw/shippable.c
@@ -62,7 +62,8 @@ typedef struct
* made for them, however.
*/
static void
-InvalidateShippableCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateShippableCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
ShippableCacheEntry *entry;
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 4f7ab2ed0ac..72d2d9c311b 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -340,7 +340,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NULL; -- Nu
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NOT NULL; -- NullTest
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE round(abs(c1), 0) = 1; -- FuncExpr
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- OpExpr(l)
-EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr
+EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS DISTINCT FROM c3; -- DistinctExpr
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- SubscriptingRef
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars
@@ -1613,9 +1613,16 @@ UPDATE ft2 d SET c2 = CASE WHEN random() >= 0 THEN d.c2 ELSE 0 END
ALTER SERVER loopback OPTIONS (DROP extensions);
INSERT INTO ft2 (c1,c2,c3)
SELECT id, id % 10, to_char(id, 'FM00000') FROM generate_series(2001, 2010) id;
+
+-- this will do a remote seqscan, causing unstable result order, so sort
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *; -- can't be pushed down
-UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *;
+WITH cte AS (
+ UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *
+) SELECT * FROM cte ORDER BY c1; -- can't be pushed down
+WITH cte AS (
+ UPDATE ft2 SET c3 = 'bar' WHERE postgres_fdw_abs(c1) > 2000 RETURNING *
+) SELECT * FROM cte ORDER BY c1;
+
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c3 = 'baz'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index e25c8a5aa26..69b173e4498 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -156,7 +156,7 @@ initTrie(const char *filename)
state = 0;
for (ptr = line; *ptr; ptr += ptrlen)
{
- ptrlen = pg_mblen(ptr);
+ ptrlen = pg_mblen_cstr(ptr);
/* ignore whitespace, but end src or trg */
if (isspace((unsigned char) *ptr))
{
@@ -382,6 +382,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
char *srcchar = (char *) PG_GETARG_POINTER(1);
int32 len = PG_GETARG_INT32(2);
char *srcstart = srcchar;
+ const char *srcend = srcstart + len;
TSLexeme *res;
StringInfoData buf;
@@ -409,7 +410,7 @@ unaccent_lexize(PG_FUNCTION_ARGS)
}
else
{
- matchlen = pg_mblen(srcchar);
+ matchlen = pg_mblen_range(srcchar, srcend);
if (buf.data != NULL)
appendBinaryStringInfo(&buf, srcchar, matchlen);
}
diff --git a/doc/src/sgml/bgworker.sgml b/doc/src/sgml/bgworker.sgml
index 4699ef6345f..2affba74382 100644
--- a/doc/src/sgml/bgworker.sgml
+++ b/doc/src/sgml/bgworker.sgml
@@ -232,6 +232,8 @@ typedef struct BackgroundWorker
+ A well-behaved background worker must react promptly to standard signals
+ that the postmaster uses to control its child processes.
Signals are initially blocked when control reaches the
background worker's main function, and must be unblocked by it; this is to
allow the process to customize its signal handlers, if necessary.
@@ -240,6 +242,14 @@ typedef struct BackgroundWorker
BackgroundWorkerBlockSignals.
+
+ The default signal handlers merely set interrupt flags
+ that are processed later by CHECK_FOR_INTERRUPTS().
+ CHECK_FOR_INTERRUPTS() should be called in any
+ long-running loop to ensure that the background worker doesn't prevent the
+ system from shutting down in a timely fashion.
+
+
If bgw_restart_time for a background worker is
configured as BGW_NEVER_RESTART, or if it exits with an exit
diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 53a982bf60d..087a6827b00 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -271,6 +271,21 @@
+
+
+ There is a special case for values of the
+ pg_proc.proargdefaults
+ field, which is of type pg_node_tree. The real
+ contents of that type are too complex for hand-written entries,
+ but what we need for proargdefaults is
+ typically just a list of Const nodes. Therefore, the bootstrap
+ backend will interpret a value given for that field according to
+ text array syntax, and then feed the array element values to the
+ datatype input routines for the corresponding input parameters' data
+ types, and finally build Const nodes from the datums.
+
+
+
Since hashes are unordered data structures, field order and line
@@ -817,11 +832,11 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
The following column types are supported directly by
bootstrap.c: bool,
bytea, char (1 byte),
- name, int2,
- int4, regproc, regclass,
- regtype, text,
- oid, tid, xid,
- cid, int2vector, oidvector,
+ int2, int4, int8,
+ float4, float8,
+ name, regproc, text,
+ jsonb, oid, pg_node_tree,
+ int2vector, oidvector,
_int4 (array), _text (array),
_oid (array), _char (array),
_aclitem (array). Although it is possible to create
@@ -884,7 +899,7 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
- insert(oid_valuevalue1value2 ... )
+ insert(value1value2 ... )
@@ -902,6 +917,13 @@ $ perl rewrite_dat_with_prokind.pl pg_proc.dat
(To include a single quote in a value, write it twice.
Escape-string-style backslash escapes are allowed in the string, too.)
+
+
+ In most cases a value
+ string is simply fed to the datatype input routine for the column's
+ data type, after de-quoting if needed. However there are exceptions
+ for certain fields, as detailed previously.
+
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 332193565e2..e7067c84ece 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -8216,6 +8216,16 @@ SCRAM-SHA-256$<iteration count>:&l
+
+
+ subwalrcvtimeouttext
+
+
+ The wal_receiver_timeout
+ setting for the subscription's workers to use
+
+
+
subpublicationstext[]
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5560b95ee60..20dbcaeb3ee 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1157,6 +1157,28 @@ include_dir 'conf.d'
+
+ password_expiration_warning_threshold (integer)
+
+ password_expiration_warning_threshold configuration parameter
+
+
+
+
+ When this parameter is greater than zero, the server will emit a
+ WARNING upon successful password authentication if
+ less than this amount of time remains until the authenticated role's
+ password expires. Note that a role's password only expires if a date
+ was specified in a VALID UNTIL clause for
+ CREATE ROLE or ALTER ROLE. If
+ this value is specified without units, it is taken as seconds. The
+ default is 7 days. This parameter can only be set in the
+ postgresql.conf file or on the server command
+ line.
+
+
+
+
md5_password_warnings (boolean)
@@ -1541,6 +1563,15 @@ include_dir 'conf.d'
The default is X25519:prime256v1.
+
+
+ X25519 is not allowed when
+ OpenSSL is configured for FIPS mode and
+ must be removed from the server configuration when FIPS mode is
+ enabled.
+
+
+
OpenSSL names for the most common curves
are:
@@ -2412,6 +2443,43 @@ include_dir 'conf.d'
+
+ file_extend_method (enum)
+
+ file_extend_method configuration parameter
+
+
+
+
+ Specifies the method used to extend data files during bulk operations
+ such as COPY. The first available option is used as
+ the default, depending on the operating system:
+
+
+
+ posix_fallocate (Unix) uses the standard POSIX
+ interface for allocating disk space, but is missing on some systems.
+ If it is present but the underlying file system doesn't support it,
+ this option silently falls back to write_zeros.
+ Current versions of BTRFS are known to disable compression when
+ this option is used.
+ This is the default on systems that have the function.
+
+
+
+
+ write_zeros extends files by writing out blocks
+ of zero bytes. This is the default on systems that don't have the
+ function posix_fallocate.
+
+
+
+ The write_zeros method is always used when data
+ files are extended by 8 blocks or fewer.
+
+
+
+
max_notify_queue_pages (integer)
@@ -4722,45 +4790,6 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
-
- synchronized_standby_slots (string)
-
- synchronized_standby_slots configuration parameter
-
-
-
-
- A comma-separated list of streaming replication standby server slot names
- that logical WAL sender processes will wait for. Logical WAL sender processes
- will send decoded changes to plugins only after the specified replication
- slots confirm receiving WAL. This guarantees that logical replication
- failover slots do not consume changes until those changes are received
- and flushed to corresponding physical standbys. If a
- logical replication connection is meant to switch to a physical standby
- after the standby is promoted, the physical replication slot for the
- standby should be listed here. Note that logical replication will not
- proceed if the slots specified in the
- synchronized_standby_slots do not exist or are invalidated.
- Additionally, the replication management functions
-
- pg_replication_slot_advance,
-
- pg_logical_slot_get_changes, and
-
- pg_logical_slot_peek_changes,
- when used with logical failover slots, will block until all
- physical slots specified in synchronized_standby_slots have
- confirmed WAL receipt.
-
-
- The standbys corresponding to the physical replication slots in
- synchronized_standby_slots must configure
- sync_replication_slots = true so they can receive
- logical failover slot changes from the primary.
-
-
-
-
@@ -4909,6 +4938,45 @@ ANY num_sync (
+ synchronized_standby_slots (string)
+
+ synchronized_standby_slots configuration parameter
+
+
+
+
+ A comma-separated list of streaming replication standby server slot names
+ that logical WAL sender processes will wait for. Logical WAL sender processes
+ will send decoded changes to plugins only after the specified replication
+ slots confirm receiving WAL. This guarantees that logical replication
+ failover slots do not consume changes until those changes are received
+ and flushed to corresponding physical standbys. If a
+ logical replication connection is meant to switch to a physical standby
+ after the standby is promoted, the physical replication slot for the
+ standby should be listed here. Note that logical replication will not
+ proceed if the slots specified in the
+ synchronized_standby_slots do not exist or are invalidated.
+ Additionally, the replication management functions
+
+ pg_replication_slot_advance,
+
+ pg_logical_slot_get_changes, and
+
+ pg_logical_slot_peek_changes,
+ when used with logical failover slots, will block until all
+ physical slots specified in synchronized_standby_slots have
+ confirmed WAL receipt.
+
+
+ The standbys corresponding to the physical replication slots in
+ synchronized_standby_slots must configure
+ sync_replication_slots = true so they can receive
+ logical failover slot changes from the primary.
+
+
@@ -5181,9 +5249,6 @@ ANY num_sync (
- log_min_messages (enum)
+ log_min_messages (string)
log_min_messages configuration parameter
- Controls which message
- levels are written to the server log.
- Valid values are DEBUG5, DEBUG4,
- DEBUG3, DEBUG2, DEBUG1,
- INFO, NOTICE, WARNING,
- ERROR, LOG, FATAL, and
- PANIC. Each level includes all the levels that
- follow it. The later the level, the fewer messages are sent
- to the log. The default is WARNING. Note that
- LOG has a different rank here than in
+ Controls which
+ message levels
+ are written to the server log. The value is a comma-separated
+ list of zero or more
+ process type:level
+ entries and exactly one mandatory
+ level entry,
+ which becomes the default for process types not listed.
+ Valid process types are listed in the table below.
+
+ archiver
+ autovacuum
+ backend
+ bgworker
+ bgwriter
+ checkpointer
+ ioworker
+ postmaster
+ slotsyncworker
+ startup
+ syslogger
+ walreceiver
+ walsender
+ walsummarizer
+ walwriter
+
+ Valid level values are DEBUG5,
+ DEBUG4, DEBUG3, DEBUG2,
+ DEBUG1, INFO, NOTICE,
+ WARNING, ERROR, LOG,
+ FATAL, and PANIC. Each level includes
+ all the levels that follow it. The later the level, the fewer messages are sent
+ to the log. The default is WARNING, which
+ applies that level to all process types.
+ Note that LOG has a different rank here than in
.
Only superusers and users with the appropriate SET
privilege can change this setting.
+
+ Example: To log walsender and autovacuum
+ at level DEBUG1 and everything else at ERROR,
+ set log_min_messages to error, walsender:debug1, autovacuum:debug1.
+
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index 24b706b29ad..bdd4865f53f 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -156,6 +156,7 @@ CREATE EXTENSION extension_name;
&pgfreespacemap;
&pglogicalinspect;
&pgoverexplain;
+ &pgplanadvice;
&pgprewarm;
&pgrowlocks;
&pgstatstatements;
diff --git a/doc/src/sgml/dml.sgml b/doc/src/sgml/dml.sgml
index 61c64cf6c49..cd348d5773a 100644
--- a/doc/src/sgml/dml.sgml
+++ b/doc/src/sgml/dml.sgml
@@ -385,7 +385,7 @@ UPDATE products SET price = price * 1.10
for a DELETE. However, there are situations where it
can still be useful for those commands. For example, in an
INSERT with an
- ON CONFLICT DO UPDATE
+ ON CONFLICT DO SELECT/UPDATE
clause, the old values will be non-NULL for conflicting
rows. Similarly, if a DELETE is turned into an
UPDATE by a rewrite rule,
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index c6d66414b8e..9826e09f983 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -2045,7 +2045,7 @@ GetForeignServerByName(const char *name, bool missing_ok);
INSERT with an ON CONFLICT clause does not
support specifying the conflict target, as unique constraints or
exclusion constraints on remote tables are not locally known. This
- in turn implies that ON CONFLICT DO UPDATE is not supported,
+ in turn implies that ON CONFLICT DO SELECT/UPDATE is not supported,
since the specification is mandatory there.
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index ac66fcbdb57..d90b4338d2a 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -149,6 +149,7 @@
+
diff --git a/doc/src/sgml/images/README b/doc/src/sgml/images/README
index 07c45802553..93b75485c44 100644
--- a/doc/src/sgml/images/README
+++ b/doc/src/sgml/images/README
@@ -13,14 +13,14 @@ involve diffable source files.
These tools are acceptable:
- Graphviz (https://graphviz.org/)
-- Ditaa (http://ditaa.sourceforge.net/)
+- Ditaa v0.11.0 or later (https://github.com/stathissideris/ditaa)
We use SVG as the format for integrating the image into the ultimate
output formats of the documentation, that is, HTML, PDF, and others.
Therefore, any tool used needs to be able to produce SVG.
-This directory contains makefile rules to build SVG from common input
-formats, using some common styling.
+This directory contains makefile and meson rules to build SVG from common
+input formats, using some common styling.
fixup-svg.xsl applies some postprocessing to the SVG files produced by
those external tools to address assorted issues. See comments in
diff --git a/doc/src/sgml/images/meson.build b/doc/src/sgml/images/meson.build
new file mode 100644
index 00000000000..8e601e877a5
--- /dev/null
+++ b/doc/src/sgml/images/meson.build
@@ -0,0 +1,61 @@
+# doc/src/sgml/images/meson.build
+#
+# see README in this directory about image handling
+
+if not xsltproc_bin.found() or not dot.found() or not ditaa.found()
+ subdir_done()
+endif
+
+image_targets = []
+
+fixup_svg_xsl = files('fixup-svg.xsl')
+
+all_files = [
+ 'genetic-algorithm.gv',
+ 'gin.gv',
+ 'pagelayout.txt',
+ 'temporal-entities.txt',
+ 'temporal-references.txt',
+]
+
+foreach file : all_files
+
+ str_split = file.split('.')
+ actual_file_name = str_split[0]
+ extension = str_split[1]
+ cur_file = files(file)
+ tmp_name = '@0@.svg.tmp'.format(file)
+ output_name = '@0@.svg'.format(actual_file_name)
+
+ command = []
+ if extension == 'gv'
+ command = [dot, '-T', 'svg', '-o', '@OUTPUT@', '@INPUT@']
+ elif extension == 'txt'
+ command = [ditaa, '-E', '-S', '--svg', '@INPUT@', '@OUTPUT@']
+ else
+ error('Unknown extension: ".@0@" while generating images'.format(extension))
+ endif
+
+ svg_tmp = custom_target(tmp_name,
+ input: cur_file,
+ output: tmp_name,
+ command: command,
+ )
+
+ current_svg = custom_target(output_name,
+ input: svg_tmp,
+ output: output_name,
+ command: [xsltproc_bin,
+ '--nonet',
+ # Use --novalid to avoid loading SVG DTD if a file specifies it, since
+ # it might not be available locally, and we don't need it.
+ '--novalid',
+ '-o', '@OUTPUT@',
+ fixup_svg_xsl,
+ '@INPUT@']
+ )
+
+ image_targets += current_svg
+endforeach
+
+alias_target('images', image_targets)
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml
index 206eadb8f7b..8a2aad5935e 100644
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -882,9 +882,10 @@ UPDATE table_name SET jsonb_field[1]['a'] = '1';
$varname
A named variable. Its value can be set by the parameter
- vars of several JSON processing functions;
- see for details.
-
+ vars of several JSON processing functions
+ (see ), or by
+ using the SQL/JSON PASSING clause as described
+ in .
diff --git a/doc/src/sgml/meson.build b/doc/src/sgml/meson.build
index d8f40a0b16d..a1ae5c54ed6 100644
--- a/doc/src/sgml/meson.build
+++ b/doc/src/sgml/meson.build
@@ -1,5 +1,7 @@
# Copyright (c) 2022-2026, PostgreSQL Global Development Group
+subdir('images')
+
docs = []
installdocs = []
alldocs = []
diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml
index 049ee75a4ba..e775260936a 100644
--- a/doc/src/sgml/mvcc.sgml
+++ b/doc/src/sgml/mvcc.sgml
@@ -366,6 +366,18 @@
conventionally visible to the command.
+
+ INSERT with an ON CONFLICT DO
+ SELECT clause behaves similarly to ON CONFLICT DO
+ UPDATE. In Read Committed mode, each row proposed for insertion
+ is guaranteed to either insert or return the conflicting row (unless there are
+ unrelated errors). If a conflict originates in another transaction whose
+ effects are not yet visible to the INSERT, the command
+ will wait for that transaction to commit or roll back, then return the
+ conflicting row if it was committed (even though that row was not visible
+ when the command started).
+
+
INSERT with an ON CONFLICT DO
NOTHING clause may have insertion not proceed for a row due to
diff --git a/doc/src/sgml/oid2name.sgml b/doc/src/sgml/oid2name.sgml
index 54cc9be2b82..9340d7376aa 100644
--- a/doc/src/sgml/oid2name.sgml
+++ b/doc/src/sgml/oid2name.sgml
@@ -118,7 +118,7 @@
display more information about each object shown: tablespace name,
- schema name, and OID.
+ schema name, OID and path.
@@ -299,10 +299,10 @@ From database "alvherre":
$ # you can mix the options, and get more details with -x
$ oid2name -d alvherre -t accounts -f 1155291 -x
From database "alvherre":
- Filenode Table Name Oid Schema Tablespace
-------------------------------------------------------
- 155173 accounts 155173 public pg_default
- 1155291 accounts_pkey 1155291 public pg_default
+ Filenode Table Name Oid Schema Tablespace Path
+--------------------------------------------------------------------------
+ 155173 accounts 155173 public pg_default base/17228/155173
+ 1155291 accounts_pkey 1155291 public pg_default base/17228/1155291
$ # show disk space for every db object
$ du [0-9]* |
diff --git a/doc/src/sgml/pgplanadvice.sgml b/doc/src/sgml/pgplanadvice.sgml
new file mode 100644
index 00000000000..efc4287df95
--- /dev/null
+++ b/doc/src/sgml/pgplanadvice.sgml
@@ -0,0 +1,1036 @@
+
+
+
+ pg_plan_advice — help the planner get the right plan
+
+
+ pg_plan_advice
+
+
+
+ The pg_plan_advice allows key planner decisions to be
+ described, reproduced, and altered using a special-purpose "plan advice"
+ mini-language. It is intended to allow stabilization of plan choices that
+ the user believes to be good, as well as experimentation with plans that
+ the planner believes to be non-optimal.
+
+
+
+ Note that, since the planner often makes good decisions, overriding its
+ judgement can easily backfire. For example, if the distribution of the
+ underlying data changes, the planner normally has the option to adjust the
+ plan in an attempt to preserve good performance. If the plan advice prevents
+ this, a very poor plan may be chosen. It is important to use plan advice
+ only when the risks of constraining the planner's choices are outweighed by
+ the benefits.
+
+
+
+ Getting Started
+
+
+ In order to use this module, the pg_plan_advice module
+ must be loaded. You can do this on a system-wide basis by adding
+ pg_plan_advice to
+ and restarting the
+ server, or by adding it to
+ and starting a new session,
+ or by loading it into an individual session using the
+ LOAD command. If you
+ wish to use the
+ collector interface,
+ you must also install the pg_plan_advice extension
+ in the database where you wish to use the collector. Use the command
+ CREATE EXTENSION pg_plan_advice to do this. If you do
+ not wish to use the collector interface, this step is not required.
+
+
+
+ Once the pg_plan_advice module is loaded,
+ EXPLAIN will support
+ a PLAN_ADVICE option. You can use this option to see
+ a plan advice string for the chosen plan. For example:
+
+
+
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Seq Scan on join_fact f
+ -> Hash
+ -> Seq Scan on join_dim d
+ Generated Plan Advice:
+ JOIN_ORDER(f d)
+ HASH_JOIN(d)
+ SEQ_SCAN(f d)
+ NO_GATHER(f d)
+
+
+
+ In this example, the user has not specified any advice; instead, the
+ planner has been permitted to make whatever decisions it thinks best, and
+ those decisions are memorialized in the form of an advice string.
+ JOIN_ORDER(f d) means that f should
+ be the driving table, and the first table to which it should be joined is
+ d. HASH_JOIN(d) means that
+ d should appear on the inner side of a hash join.
+ SEQ_SCAN(f d) means that both f
+ and d should be accessed via a sequential scan.
+ NO_GATHER(f d) means that neither f
+ nor d should appear beneath a Gather
+ or Gather Merge node. For more details on the plan
+ advice mini-language, see the information on
+ advice targets and
+ advice tags, below.
+
+
+
+ If you want to see the advice strings for a large number of queries, or
+ an entire workload, running EXPLAIN (PLAN_ADVICE) for
+ each one may not be convenient. In such situations, it can be more
+ convenient to use an
+ advice collector.
+
+
+
+ Once you have an advice string for a query, you can use it to control how
+ that query is planned. You can do this by setting
+ pg_plan_advice.advice to the advice string you've
+ chosen. This can be an advice string that was generated by the system,
+ or one you've written yourself. One good way of creating your own advice
+ string is to take the string generated by the system and pick out just
+ those elements that you wish to enforce. In the example above,
+ pg_plan_advice emits advice for the join order, the
+ join method, the scan method, and the use of parallelism, but you might
+ only want to control the join order:
+
+
+
+SET pg_plan_advice.advice = 'JOIN_ORDER(f d)';
+EXPLAIN (COSTS OFF)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+------------------------------------
+ Hash Join
+ Hash Cond: (f.dim_id = d.id)
+ -> Seq Scan on join_fact f
+ -> Hash
+ -> Seq Scan on join_dim d
+ Supplied Plan Advice:
+ JOIN_ORDER(f d) /* matched */
+
+
+
+ Since the PLAN_ADVICE option to
+ EXPLAIN was not specified, no advice string is generated
+ for the plan. However, the supplied plan advice is still shown so that
+ anyone looking at the EXPLAIN output knows that the
+ chosen plan was influenced by plan advice. If information about supplied
+ plan advice is not desired, it can be suppressed by configuring
+ pg_plan_advice.always_explain_supplied_advice = false.
+ For each piece of supplied advice, the output shows
+ advice feedback indicating
+ whether or not the advice was successfully applied to the query. In this
+ case, the feedback says /* matched */, which means that
+ f and d were found in the query and
+ that the resulting query plan conforms to the specified advice.
+
+
+
+
+
+ How It Works
+
+
+ Plan advice is written imperatively; that is, it specifies what should be
+ done. However, at an implementation level,
+ pg_plan_advice works by telling the core planner what
+ should not be done. In other words, it operates by constraining the
+ planner's choices, not by replacing it. Therefore, no matter what advice
+ you provide, you will only ever get a plan that the core planner would have
+ considered for the query in question. If you attempt to force what you
+ believe to be the correct plan by supplying an advice string, and the
+ planner still fails to produce the desired plan, this means that either
+ there is a bug in your advice string, or the plan in question was not
+ considered viable by the core planner. This commonly happens for one of two
+ reasons. First, it might be the planner believes that the plan you're trying
+ to force would be semantically incorrect - that is, it would produce the
+ wrong results - and for that reason it wasn't considered. Second, it might
+ be that the planner rejected the plan you were hoping to generate on some
+ grounds other than cost. For example, given a very simple query such as
+ SELECT * FROM some_table, the query planner will
+ decide that the use of an index is worthless here before it performs any
+ costing calculations. You cannot force it to use an index for this query
+ even if you set enable_seqscan = false, and you can't
+ force it to use an index using plan advice, either.
+
+
+
+ Specifying plan advice should never cause planner failure. However, if you
+ specify plan advice that asks for something impossible, you may get a plan
+ where some plan nodes are flagged as Disabled: true in
+ the EXPLAIN output. In some cases, such plans will be
+ basically the same plan you would have gotten with no supplied advice at
+ all, but in other cases, they may be much worse. For example:
+
+
+
+SET pg_plan_advice.advice = 'JOIN_ORDER(x f d)';
+EXPLAIN (COSTS OFF)
+ SELECT * FROM join_fact f JOIN join_dim d ON f.dim_id = d.id;
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ Disabled: true
+ -> Seq Scan on join_fact f
+ -> Index Scan using join_dim_pkey on join_dim d
+ Index Cond: (id = f.dim_id)
+ Supplied Plan Advice:
+ JOIN_ORDER(x f d) /* partially matched */
+
+
+
+ Because neither f nor d is the
+ first table in the JOIN_ORDER() specification, the
+ planner disables all direct joins between the two of them, thinking that
+ the join to x should happen first. Since planning isn't
+ allowed to fail, a disabled plan between the two relations is eventually
+ selected anyway, but here it's a Nested Loop rather than
+ the Hash Join that was chosen in the above example where
+ no advice was specified. There are several different ways that this kind
+ of thing can happen; when it does, the resulting plan is generally worse
+ than if no advice had been specified at all. Therefore, it is a good idea
+ to validate that the advice you specify applies to the query to which it
+ is applied and that the results are as expected.
+
+
+
+
+
+ Advice Targets
+
+
+ An advice target uniquely identifies a particular
+ instance of a particular relation involved in a particular query. In simple
+ cases, such as the examples shown above, the advice target is simply the
+ relation alias. However, a more complex syntax is required when subqueries
+ are used, when tables are partitioned, or when the same relation alias is
+ mentioned more than once in the same subquery (e.g., (foo JOIN bar
+ ON foo.a = bar.a) x JOIN foo ON x.b = foo.b). Any combination of
+ these three things can occur simultaneously: a relation could be mentioned
+ more than once, be partitioned, and be used inside of a subquery.
+
+
+
+ Because of this, the general syntax for a relation identifier is:
+
+
+
+alias_name#occurrence_number/partition_schema.partition_name@plan_name
+
+
+
+ All components except for the alias_name are optional
+ and are included only when required. When a component is omitted, the
+ preceding punctuation must also be omitted. For the first occurrence of a
+ relation within a given subquery, generated advice will omit the occurrence
+ number, but it is legal to write #1, if desired. The
+ partition schema and partition name are included only for children of
+ partitioned tables. In generated advice, pg_plan_advice
+ always includes both, but it is legal to omit the schema. The plan name is
+ omitted for the top-level plan, and must be included for any subplan.
+
+
+
+ It is not always easy to determine the correct advice target by examining
+ the query. For instance, if the planner pulls up a subquery into the parent
+ query level, everything inside of it becomes part of the parent query level,
+ and uses the parent query's subplan name (or no subplan name, if pulled up
+ to the top level). Furthermore, the correct subquery name is sometimes not
+ obvious. For example, when two queries are joined using an operation such as
+ UNION or INTERSECT, no name for the
+ subqueries is present in the SQL syntax; instead, a system-generated name is
+ assigned to each branch. The easiest way to discover the proper advice
+ targets is to use EXPLAIN (PLAN_ADVICE) and examine the
+ generated advice.
+
+
+
+
+
+ Advice Tags
+
+
+ An advice tag specifies a particular behavior that
+ should be enforced for some portion of the query, such as a particular
+ join order or join method. All advice tags take
+ advice targets as arguments,
+ and many allow lists of advice targets, which in some cases can be nested
+ multiple levels deep. Several different classes of advice targets exist,
+ each controlling a different aspect of query planning.
+
+
+
+ Scan Method Advice
+
+SEQ_SCAN(target [ ... ])
+TID_SCAN(target [ ... ])
+INDEX_SCAN(targetindex_name [ ... ])
+INDEX_ONLY_SCAN(targetindex_name [ ... ])
+FOREIGN_SCAN((target [ ... ]) [ ... ])
+BITMAP_HEAP_SCAN(target [ ... ])
+
+
+ SEQ_SCAN specifies that each target should be
+ scanned using a Seq Scan. TID_SCAN
+ specifies that each target should be scanned using a
+ TID Scan or TID Range Scan.
+ BITMAP_HEAP_SCAN specifies that each target
+ should be scanned using a Bitmap Heap Scan.
+
+
+
+ INDEX_SCAN specifies that each target should
+ be scanned using an Index Scan on the given index
+ name. INDEX_ONLY_SCAN is similar, but specifies the
+ use of an Index Only Scan. In either case, the index
+ name can be, but does not have to be, schema-qualified.
+
+
+
+ FOREIGN_SCAN specifies that a join between two or
+ more foreign tables should be pushed down to a remote server so
+ that it can be implemented as a single Foreign Scan.
+ Specifying FOREIGN_SCAN for a single foreign table is
+ neither necessary nor permissible: a Foreign Scan will
+ need to be used regardless. If you want to prevent a join from being
+ pushed down, consider using the JOIN_ORDER tag for
+ that purpose.
+
+
+
+ The planner supports many types of scans other than those listed here;
+ however, in most of those cases, there is no meaningful decision to be
+ made, and hence no need for advice. For example, the output of a
+ set-returning function that appears in the FROM clause
+ can only ever be scanned using a Function Scan, so
+ there is no opportunity for advice to change anything.
+
+
+
+
+
+ Join Order Advice
+
+JOIN_ORDER(join_order_item [ ... ])
+
+where join_order_item is:
+
+advice_target |
+( join_order_item [ ... ] ) |
+{ join_order_item [ ... ] }
+
+
+ When JOIN_ORDER is used without any sublists, it
+ specifies an outer-deep join with the first advice target as the driving
+ table, joined to each subsequent advice target in turn in the order
+ specified. For instance, JOIN_ORDER(a b c) means that
+ a should be the driving table, and that it should be
+ joined first to b and then to c.
+ If there are more relations in the query than a,
+ b, and c, the rest can be joined
+ afterwards in any manner.
+
+
+
+ If a JOIN_ORDER list contains a parenthesized sublist,
+ it specifies a non-outer-deep join. The relations in the sublist must first
+ be joined to each other much as if the sublist were a top-level
+ JOIN_ORDER list, and the resulting join product must
+ then appear on the inner side of a join at the appropriate point in the
+ join order. For example, JOIN_ORDER(a (b c) d) requires
+ a plan of this form:
+
+
+
+Join
+ -> Join
+ -> Scan on a
+ -> Join
+ -> Scan on b
+ -> Scan on c
+ -> Scan on d
+
+
+
+ If a JOIN_ORDER list contains a sublist surrounded by
+ curly braces, this also specifies a non-outer-deep join. However, the join
+ order within the sublist is not constrained. For example, specifiying
+ JOIN_ORDER(a {b c} d) would allow the scans of
+ b and c to be swapped in the
+ previous example, which is not allowed when parentheses are used.
+
+
+
+ Parenthesized sublists can be arbitrarily nested, but sublists surrounded
+ by curly braces cannot themselves contain sublists.
+
+
+
+ Multiple instances of JOIN_ORDER() can sometimes be
+ needed in order to fully constraint the join order. This occurs when there
+ are multiple join problems that are optimized separately by the planner.
+ This can happen due to the presence of subqueries, or because there is a
+ partitionwise join. In the latter case, each branch of the partitionwise
+ join can have its own join order, independent of every other branch.
+
+
+
+
+
+ Join Method Advice
+
+join_method_name(join_method_item [ ... ])
+
+where join_method_name is:
+
+{ MERGE_JOIN_MATERIALIZE | MERGE_JOIN_PLAIN | NESTED_LOOP_MATERIALIZE | NESTED_LOOP_PLAIN | HASH_JOIN }
+
+and join_method_item is:
+
+{ advice_target |
+( advice_target [ ... ] ) }
+
+
+ Join method advice specifies the relation, or set of relations, that should
+ appear on the inner side of a join using the named join method. For
+ example, HASH_JOIN(a b) means that each of
+ a and b should appear on the inner
+ side of a hash join; a conforming plan must contain at least two hash
+ joins, one of which has a and nothing else on the
+ inner side, and the other of which has b and nothing
+ else on the inner side. On the other hand,
+ HASH_JOIN((a b)) means that the join product of
+ a and b should appear together
+ on the inner side of a single hash join.
+
+
+
+ Note that join method advice implies a negative join order constraint.
+ Since the named relation or relations must be on the inner side of a join
+ using the specified method, none of them can be the driving table for the
+ entire join problem. Moreover, no relation inside the set should be joined
+ to any relation outside the set until all relations within the set have
+ been joined to each other. For example, if the advice specifies
+ HASH_JOIN((a b)) and the system begins by joining either
+ of those to some third relation c, the resulting
+ plan could never be compliant with the request to put exactly those two
+ relations on the inner side of a hash join. When using both join order
+ advice and join method advice for the same query, it is a good idea to make
+ sure that they do not mandate incompatible join orders.
+
+
+
+
+
+ Partitionwise Advice
+
+PARTITIONWISE(partitionwise_item [ ... ])
+
+where partitionwise_item is:
+
+{ advice_target |
+( advice_target [ ... ] ) }
+
+
+ When applied to a single target, PARTITIONWISE
+ specifies that the specified table should not be part of any partitionwise
+ join. When applied to a list of targets, PARTITIONWISE
+ specifies that exactly that set of relations should be joined in
+ partitionwise fashion. Note that, regardless of what advice is specified,
+ no partitionwise joins will be possible if
+ enable_partitionwise_join = off.
+
+
+
+
+
+ Semijoin Uniqueness Advice
+
+SEMIJOIN_UNIQUE(sj_unique_item [ ... ])
+SEMIJOIN_NON_UNIQUE(sj_unique_item [ ... ])
+
+where sj_unique_item is:
+
+{ advice_target |
+( advice_target [ ... ] ) }
+
+
+ The planner sometimes has a choice between implementing a semijoin
+ directly and implememnting a semijoin by making the nullable side unique
+ and then performing an inner join. SEMIJOIN_UNIQUE
+ specifies the latter strategy, while SEMIJOIN_NON_UNIQUE
+ specifies the former strategy. In either case, the argument is the single
+ relation or list of relations that appear beneath the nullable side of the
+ join.
+
+
+
+
+
+ Parallel Query Advice
+
+GATHER(gather_item [ ... ])
+GATHER_MERGE(gather_item [ ... ])
+NO_GATHER(advice_target [ ... ])
+
+where gather_item is:
+
+{ advice_target |
+( advice_target [ ... ] ) }
+
+
+ GATHER or GATHER_MERGE specifies
+ that Gather or Gather Merge,
+ respectively, should be placed on top of the single relation specified as
+ a target, or on top of the join between the list of relations specified as
+ a target. This means that GATHER(a b c) is a request
+ for three different Gather nodes, while
+ GATHER((a b c)) is a request for a single
+ Gather node on top of a 3-way join.
+
+
+
+ NO_GATHER specifies that no Gather or
+ Gather Merge node should appear above any of the
+ targets, but it only constrains the planning of an individual subquery,
+ and outer subquery levels can still use parallel query. For example,
+ NO_GATHER(inner_example@any_1) precludes using a
+ Parallel Seq Scan to access the
+ inner_example table within the any_1
+ subquery, but it does not prevent the planner from placing
+ SubPlan any_1 beneath a Gather
+ or Gather Merge node. The following plan is
+ compatible with NO_GATHER(inner_example@any_1), but
+ not with NO_GATHER(outer_example):
+
+
+
+ Finalize Aggregate
+ -> Gather
+ -> Partial Aggregate
+ -> Parallel Seq Scan on outer_example
+ Filter: (something = (hashed SubPlan any_1).col1)
+ SubPlan any_1
+ -> Seq Scan on inner_example
+ Filter: (something_else > 100)
+
+
+
+ Here is the reverse case, that is, a plan compatible with
+ NO_GATHER(outer_example) but not with
+ NO_GATHER(inner_example@any_1):
+
+
+
+ Aggregate
+ -> Seq Scan on outer_example
+ Filter: (something = (hashed SubPlan any_1).col1)
+ SubPlan any_1
+ -> Gather
+ -> Parallel Seq Scan on inner_example
+ Filter: (something_else > 100)
+
+
+
+
+
+
+ Advice Feedback
+
+
+ EXPLAIN provides feedback on whether supplied advice was
+ successfully applied to the query in the form of a comment on each piece
+ of supplied advice. For example:
+
+
+
+SET pg_plan_advice.advice = 'hash_join(f g) join_order(f g) index_scan(f no_such_index)';
+SET
+rhaas=# EXPLAIN (COSTS OFF) SELECT * FROM jo_fact f
+ LEFT JOIN jo_dim1 d1 ON f.dim1_id = d1.id
+ LEFT JOIN jo_dim2 d2 ON f.dim2_id = d2.id
+ WHERE val1 = 1 AND val2 = 1;
+ QUERY PLAN
+-------------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((d1.id = f.dim1_id) AND (d2.id = f.dim2_id))
+ -> Nested Loop
+ -> Seq Scan on jo_dim2 d2
+ Filter: (val2 = 1)
+ -> Materialize
+ -> Seq Scan on jo_dim1 d1
+ Filter: (val1 = 1)
+ -> Hash
+ -> Seq Scan on jo_fact f
+ Supplied Plan Advice:
+ INDEX_SCAN(f no_such_index) /* matched, inapplicable, failed */
+ HASH_JOIN(f) /* matched */
+ HASH_JOIN(g) /* not matched */
+ JOIN_ORDER(f g) /* partially matched */
+
+
+
+ For this query, f is a valid advice target, but
+ g is not. Therefore, the request to place
+ f on the inner side of a hash join is listed as
+ matched, but the request to place g
+ on the inner side of a hash join is listed as
+ not matched. The JOIN_ORDER advice
+ tag involves one valid target and one invald target, and so is listed as
+ partially matched. Note that
+ HASH_JOIN(f g) is actually a request for two logically
+ separate behaviors, whereas JOIN_ORDER(f g) is a single
+ request. When providing advice feedback, EXPLAIN shows
+ each logical request separately, together with all the feedback applicable
+ to that request type.
+
+
+
+ Advice feedback can include any of the folllowing:
+
+
+
+
+
+
+ matched means that all of the specified advice targets
+ were observed together during query planning, at a time at which the
+ advice could bbe enforced.
+
+
+
+
+
+ partially matched means that some but not all of the
+ specified advice targets were observed during query planning, or all
+ of the advice targets were observed but not together. For example, this
+ may happen if all the targets of JOIN_ORDER advice
+ individually match the query, but the proposed join order is not legal.
+
+
+
+
+
+ not matched means that none of the
+ specified advice targets were observed during query planning. This may
+ happen if the advice simply doesn't match the query, or it may
+ occur if the relevant portion of the query was not planned, perhaps
+ because it was gated by a condition that was simplified to constant false.
+
+
+
+
+
+ inapplicable means that the advice tag could not
+ be applied to the advice targets for some reason. For example, this will
+ happen if the use of a nonexistent index is requested, or if an attempt
+ is made to control semijoin uniquness for a non-semijoin.
+
+
+
+
+
+ conflicting means that two or more pieces of advice
+ request incompatible behaviors. For example, if you advise a sequential
+ scan and an index scan for the same table, both requests will be flagged
+ as conflicting. This also commonly happens if join method advice or
+ semijoin uniqueness advice implies a join order incompatible with the
+ one explicitly specified; see
+ .
+
+
+
+
+
+ failed means that query plan does not comply with
+ the advice. This only occurs for entries that are also shown as
+ matched. It frequently occurs for entries that are
+ also marked as conflicting or
+ inapplicable. However, it can also occur when the
+ advice is valid insofar as pg_plan_advice is able
+ to determine, but the planner is not able to construct a legal
+ plan that can comply with the advice. It is important to note that the
+ sanity checks performed by pg_plan_advice are fairly
+ superficial and focused mostly on looking for logical inconsistencies in
+ the advice string; only the planner knows what will actually work.
+
+
+
+
+
+
+ All advice should be marked as exactly one of matched,
+ partially matched, or not matched.
+
+
+
+
+
+ Advice Collectors
+
+
+ pg_plan_advice can be configured to automatically
+ generate advice every time a query is planned and store the query and
+ the generated advice string either in local or shared memory.
+
+
+
+ To enable a collector, you must first set a collection limit. When the
+ number of queries for which advice has been stored exceeds the collection
+ limit, the oldest queries and the corresponding advice will be discarded.
+ Then, you must adjust a separate setting to actually enable advice
+ collection. For the local collector, set the collection limit by configuring
+ pg_plan_advice.local_collection_limit to a value
+ greater than zero, and then enable advice collection by setting
+ pg_plan_advice.local_collector = true. For the shared
+ collector, the procedure is the same, except that the names of the settings
+ are pg_plan_advice.shared_collection_limit and
+ pg_plan_advice.shared_collector. Note that the local
+ collector stores query texts and advice strings in backend-local memory,
+ and the shared collector does the same in dynamic shared memory, so
+ configuring large limits may result in considerable memory consumption.
+
+
+
+ Once the collector is enabled, you can run any queries for which you wish
+ to see the generated plan advice. Then, you can examine what has been
+ collected using whichever of
+ SELECT * FROM pg_get_collected_local_advice() or
+ SELECT * FROM pg_get_collected_shared_advice()
+ corresponds to the collector you enabled. To discard the collected advice
+ and release memory, you can call
+ pg_clear_collected_local_advice()
+ or pg_clear_collected_shared_advice().
+
+
+
+ In addition to the query texts an advice strings, the advice collectors
+ will also store the OID of the role that caused the query to be planned,
+ the OID of the database in which the query was planned, the query ID,
+ and the time at which the collection occurred. This module does not
+ automatically enable query ID computation; therefore, if you want the
+ query ID value to be populated in collected advice, be sure to configure
+ enable_query_id = on. Otherwise, the query ID may
+ always show as 0.
+
+
+
+
+
+ Functions
+
+
+ Note that these functions will only be available if the
+ pg_plan_advice extension has been installed in the
+ current database, which is not mandatory, since much of the functionality
+ of this module can be used without installing the extension.
+
+
+
+
+
+
+ pg_clear_collected_local_advice() returns void
+
+ pg_clear_collected_local_advice
+
+
+
+
+
+ Removes all collected query texts and advice strings from backend-local
+ memory.
+
+
+
+
+
+
+ pg_get_collected_local_advice() returns setof (id bigint,
+ userid oid, dbid oid, queryid bigint, collection_time timestamptz,
+ query text, advice text)
+
+ pg_get_collected_local_advice
+
+
+
+
+
+ Returns all query texts and advice strings stored in the local
+ advice collector.
+
+
+
+
+
+
+ pg_clear_collected_shared_advice() returns void
+
+ pg_clear_collected_shared_advice
+
+
+
+
+
+ Removes all collected query texts and advice strings from shared
+ memory.
+
+
+
+
+
+
+ pg_get_collected_shared_advice() returns setof (id bigint,
+ userid oid, dbid oid, queryid bigint, collection_time timestamptz,
+ query text, advice text)
+
+ pg_get_collected_shared_advice
+
+
+
+
+
+ Returns all query texts and advice strings stored in the shared
+ advice collector.
+
+
+
+
+
+
+
+
+
+ Configuration Parameters
+
+
+
+
+
+ pg_plan_advice.advice (string)
+
+ pg_plan_advice.advice configuration parameter
+
+
+
+
+
+ pg_plan_advice.advice is an advice string to be
+ used during query planning.
+
+
+
+
+
+
+ pg_plan_advice.always_explain_supplied_advice (boolean)
+
+ pg_plan_advice.always_explain_supplied_advice configuration parameter
+
+
+
+
+
+ pg_plan_advice.always_explain_supplied_advice causes
+ EXPLAIN to always show any supplied advice and the
+ associated
+ advice feedback.
+ The default value is true. If set to
+ false, this information will be displayed only when
+ EXPLAIN (PLAN_ADVICE) is used.
+
+
+
+
+
+
+ pg_plan_advice.always_store_advice_details (boolean)
+
+ pg_plan_advice.always_store_advice_details configuration parameter
+
+
+
+
+
+ pg_plan_advice.always_store_advice_details allows
+ EXPLAIN to show details related to plan advice even
+ when prepared queries are used. The default value is
+ false. When planning a prepared query, it is not
+ possible to know whether EXPLAIN will later be used,
+ so by default, to reduce overhead, pg_plan_advice
+ will not generate plan advice or feedback on supplied advice. This means
+ that if EXPLAIN EXECUTE is used on the prepared query,
+ it will not be able to show this information. Changing this setting to
+ true avoids this problem, but adds additional
+ overhead. It is probably a good idea to enable this option only in
+ sessions where it is needed, rather than on a system-wide basis.
+
+
+
+
+
+
+ pg_plan_advice.feedback_warnings (boolean)
+
+ pg_plan_advice.feedback_warnings configuration parameter
+
+
+
+
+
+ When set to true, pg_plan_advice.feedback_warnings
+ emits a warning whenever supplied plan advice is not successfully
+ enforced. The default value is false.
+
+
+
+
+
+
+ pg_plan_advice.local_collector (boolean)
+
+ pg_plan_advice.local_collector configuration parameter
+
+
+
+
+
+ pg_plan_advice.local_collector enables the
+ local advice collector.
+ The default value is false.
+
+
+
+
+
+
+ pg_plan_advice.local_collection_limit (integer)
+
+ pg_plan_advice.local_collection_limit configuration parameter
+
+
+
+
+
+ pg_plan_advice.local_collection_limit sets the
+ maximum number of query texts and advice strings retained by the
+ local advice collector.
+ The default value is 0.
+
+
+
+
+
+
+ pg_plan_advice.shared_collector (boolean)
+
+ pg_plan_advice.shared_collector configuration parameter
+
+
+
+
+
+ pg_plan_advice.shared_collector enables the
+ shared advice collector.
+ The default value is false. Only superusers and users
+ with the appropriate SET privilege can change this
+ setting.
+
+
+
+
+
+
+ pg_plan_advice.shared_collection_limit (integer)
+
+ pg_plan_advice.shared_collection_limit configuration parameter
+
+
+
+
+
+ pg_plan_advice.shared_collection_limit sets the
+ maximum number of query texts and advice strings retained by the
+ shared advice collector.
+ The default value is 0. Only superusers and users
+ with the appropriate SET privilege can change this
+ setting.
+
+
+
+
+
+
+ pg_plan_advice.trace_mask (boolean)
+
+ pg_plan_advice.trace_mask configuration parameter
+
+
+
+
+
+ When pg_plan_advice.trace_mask is
+ true, pg_plan_advice will print
+ messages during query planning each time that
+ pg_plan_advice alters the mask of allowable query
+ plan types in response to supplied plan advice. The default values is
+ false. The messages printed by this setting are not
+ excepted to be useful except for purposes of debugging this module.
+
+
+
+
+
+
+
+
+
+ Limitations
+
+
+ It is currently not possible to control any aspect of the planner's behavior
+ with respect to aggregation. This includes both whether aggregates are
+ computed by sorting or hashing, and also whether strategies such as
+ eager aggregation or
+ partitionwise
+ aggregation are used.
+
+
+
+ It also is currently not possible to control any aspect of the planner's
+ behavior with respect to set operations such as UNION
+ or INTERSECT.
+
+
+
+ As discussed above under, How
+ It Works, the use of plan advice can only affect which plan
+ the planner chooses from among those it believes to be viable. It can never
+ force the choice of a plan which the planner refused to consider in the
+ first place.
+
+
+
+
+ Author
+
+
+ Robert Haas rhaas@postgresql.org
+
+
+
+
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 9b032fbf675..fcf10e4317e 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -82,7 +82,7 @@
Note that postgres_fdw currently lacks support for
INSERT statements with an ON CONFLICT DO
- UPDATE clause. However, the ON CONFLICT DO NOTHING
+ SELECT/UPDATE clause. However, the ON CONFLICT DO NOTHING
clause is supported, provided a unique index inference specification
is omitted.
Note also that postgres_fdw supports row movement
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index a2b528c481e..89ac680efd5 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -223,10 +223,12 @@
shows the currently supported
protocol versions.
+
+ documents protocol versions that are unsupported or otherwise reserved.
+ Other Protocol Versions
+
+
+
+
+ Version
+ Supported by
+ Description
+
+
+
+
+
+ 3.9999
+ -
+ Reserved for protocol greasing. libpq may use this version, which
+ is higher than any minor version the project ever expects to use, to
+ test that servers and middleware properly implement protocol version
+ negotiation. Servers must not add special-case
+ logic for this version; they should simply compare it to their latest
+ supported version (which will always be smaller) and downgrade via a
+ NegotiateProtocolVersion message.
+
+
+ 3.1-Reserved. Version 3.1 has not been used by any PostgreSQL
@@ -257,15 +292,89 @@
- 3.0
- PostgreSQL 7.4 and later
-
- 2.0up to PostgreSQL 13
- See previous releases of
+ Obsolete. See previous releases of
the PostgreSQL documentation for
- details
+ details.
+
+
+
+
+
+
+
+ Protocol Extensions
+
+
+ Servers and clients may additionally negotiate individual extensions to the
+ protocol version in use. These are offered by the client in the startup
+ message, as specially-named parameters with a _pq_.
+ prefix. Servers reject any unknown or unsupported extensions by sending a
+ NegotiateProtocolVersion message containing the list of rejected parameter
+ names, at which point the client may choose whether to continue with the
+ connection. and
+ document the supported
+ and reserved protocol extension parameters, respectively.
+
+
+
+ Supported Protocol Extensions
+
+
+
+
+
+
+ Parameter Name
+ Values
+ Supported by
+ Description
+
+
+
+
+
+
+ (No supported protocol extensions are currently defined.)
+
+
+
+
+
+
+
+ Reserved Protocol Extensions
+
+
+
+
+ Parameter Name
+ Description
+
+
+
+
+
+ _pq_.[name]
+ Any other parameter names beginning with _pq_.,
+ that are not defined above, are reserved for future protocol expansion.
+ Servers must reject any that are received from a
+ client, by sending a NegotiateProtocolVersion message during the
+ startup flow, and should
+ otherwise continue the connection.
+
+
+
+
+ _pq_.test_protocol_negotiation
+ Reserved for protocol greasing. libpq may send this extension to
+ test that servers and middleware properly implement protocol extension
+ negotiation. Servers must not add special-case
+ logic for this parameter; they should simply send the list of all
+ unsupported options (including this one) via a NegotiateProtocolVersion
+ message.
+
@@ -295,8 +404,8 @@
To begin a session, a frontend opens a connection to the server and sends
a startup message. This message includes the names of the user and of the
database the user wants to connect to; it also identifies the particular
- protocol version to be used. (Optionally, the startup message can include
- additional settings for run-time parameters.)
+ protocol version to be used. (Optionally, the startup message can request
+ protocol extensions and include additional settings for run-time parameters.)
The server then uses this information and
the contents of its configuration files (such as
pg_hba.conf) to determine
@@ -6151,7 +6260,9 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
In addition to the above, other parameters may be listed.
Parameter names beginning with _pq_. are
- reserved for use as protocol extensions, while others are
+ reserved for use as
+ protocol extensions,
+ while others are
treated as run-time parameters to be set at backend start
time. Such settings will be applied during backend start
(after parsing the command-line arguments if any) and will
diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml
index 27c06439f4f..5318998e80c 100644
--- a/doc/src/sgml/ref/alter_subscription.sgml
+++ b/doc/src/sgml/ref/alter_subscription.sgml
@@ -280,8 +280,9 @@ ALTER SUBSCRIPTION name RENAME TO <
origin,
failover,
two_phase,
- retain_dead_tuples, and
- max_retention_duration.
+ retain_dead_tuples,
+ max_retention_duration, and
+ wal_receiver_timeout.
Only a superuser can set password_required = false.
diff --git a/doc/src/sgml/ref/create_policy.sgml b/doc/src/sgml/ref/create_policy.sgml
index 42d43ad7bf4..d8a036739c0 100644
--- a/doc/src/sgml/ref/create_policy.sgml
+++ b/doc/src/sgml/ref/create_policy.sgml
@@ -294,7 +294,9 @@ CREATE POLICY name ON
If an INSERT has an ON CONFLICT DO
- NOTHING/UPDATE clause, SELECT
+ SELECT/UPDATE clause, or an ON CONFLICT DO
+ NOTHING clause with an arbiter index or constraint
+ specification, then SELECT
permissions are required on the relation, and the rows proposed for
insertion are checked using the relation's SELECT
policies. If a row proposed for insertion does not satisfy the
@@ -336,8 +338,8 @@ CREATE POLICY name ON
- Note that an INSERT with an ON CONFLICT
- DO NOTHING/UPDATE clause will check the
+ Note that an INSERT with an
+ ON CONFLICT clause will check the
INSERT policies' WITH CHECK
expressions for all rows proposed for insertion, regardless of
whether or not they end up being inserted.
@@ -350,9 +352,10 @@ CREATE POLICY name ON
Using UPDATE for a policy means that it will apply
- to UPDATE, SELECT FOR UPDATE,
- and SELECT FOR SHARE commands, as well as
- auxiliary ON CONFLICT DO UPDATE clauses of
+ to UPDATE and
+ SELECT FOR UPDATE/SHARE commands, as well as
+ auxiliary ON CONFLICT DO UPDATE and
+ ON CONFLICT DO SELECT FOR UPDATE/SHARE clauses of
INSERT commands, and MERGE
commands containing UPDATE actions.
Since an UPDATE command
@@ -540,7 +543,11 @@ CREATE POLICY name ON INSERT ... ON CONFLICT
- Check new row
+ Check new row
+
+ If an arbiter index or constraint is specified.
+
+
Row proposed for insertion is checked regardless of whether or not a
conflict occurs.
@@ -572,6 +579,22 @@ CREATE POLICY name ON —
+
+ ON CONFLICT DO SELECT
+ Check existing row
+ —
+ —
+ —
+ —
+
+
+ ON CONFLICT DO SELECT FOR UPDATE/SHARE
+ Check existing row
+ —
+ Check existing row
+ —
+ —
+ MERGEFilter source & target rows
diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml
index b7dd361294b..eb0cc645d8f 100644
--- a/doc/src/sgml/ref/create_subscription.sgml
+++ b/doc/src/sgml/ref/create_subscription.sgml
@@ -574,8 +574,21 @@ CREATE SUBSCRIPTION subscription_name
-
+
+ wal_receiver_timeout (text)
+
+
+ The value of this parameter overrides the
+ setting within this
+ subscription's apply worker processes. The default value is
+ -1, which means it does not override the global setting,
+ i.e., the value from the server configuration, command line, role or
+ database settings will be used instead.
+
+
+
+
diff --git a/doc/src/sgml/ref/create_view.sgml b/doc/src/sgml/ref/create_view.sgml
index f8a4740608a..60215eba3b8 100644
--- a/doc/src/sgml/ref/create_view.sgml
+++ b/doc/src/sgml/ref/create_view.sgml
@@ -415,7 +415,7 @@ CREATE VIEW vista AS SELECT text 'Hello World' AS hello;
DELETE, or MERGE statement
on the view into the corresponding statement on the underlying base
relation. INSERT statements that have an ON
- CONFLICT DO UPDATE clause are fully supported.
+ CONFLICT clause are fully supported.
@@ -430,7 +430,7 @@ CREATE VIEW vista AS SELECT text 'Hello World' AS hello;
an INSERT or MERGE command can
potentially insert base-relation rows
that do not satisfy the WHERE condition and thus are not
- visible through the view (ON CONFLICT DO UPDATE may
+ visible through the view (ON CONFLICT DO SELECT/UPDATE may
similarly affect an existing row not visible through the view).
The CHECK OPTION may be used to prevent
INSERT, UPDATE, and
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index 04962e39e12..121a9edcb99 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -37,6 +37,7 @@ INSERT INTO table_name [ AS and conflict_action is one of:
DO NOTHING
+ DO SELECT [ FOR { UPDATE | NO KEY UPDATE | SHARE | KEY SHARE } ] [ WHERE condition ]
DO UPDATE SET { column_name = { expression | DEFAULT } |
( column_name [, ...] ) = [ ROW ] ( { expression | DEFAULT } [, ...] ) |
( column_name [, ...] ) = ( sub-SELECT )
@@ -89,24 +90,27 @@ INSERT INTO table_name [ AS
The optional RETURNING clause causes INSERT
to compute and return value(s) based on each row actually inserted
- (or updated, if an ON CONFLICT DO UPDATE clause was
- used). This is primarily useful for obtaining values that were
+ (or selected or updated, if an ON CONFLICT DO SELECT/UPDATE
+ clause was used). This is primarily useful for obtaining values that were
supplied by defaults, such as a serial sequence number. However,
any expression using the table's columns is allowed. The syntax of
the RETURNING list is identical to that of the output
list of SELECT. Only rows that were successfully
- inserted or updated will be returned. For example, if a row was
- locked but not updated because an ON CONFLICT DO UPDATE
- ... WHERE clause condition was not satisfied, the
- row will not be returned.
+ inserted, updated, or selected will be returned. For example, if a row was
+ locked but not updated or selected because an ON CONFLICT ...
+ WHERE clause condition
+ was not satisfied, the row will not be returned.
You must have INSERT privilege on a table in
order to insert into it. If ON CONFLICT DO UPDATE is
present, UPDATE privilege on the table is also
- required.
+ required. If ON CONFLICT DO SELECT is present,
+ SELECT privilege on the table is required.
+ If ON CONFLICT DO SELECT FOR UPDATE/SHARE is used,
+ UPDATE privilege is required on at least one
+ column, in addition to SELECT privilege.
@@ -114,10 +118,13 @@ INSERT INTO table_name [ AS INSERT privilege on the listed columns.
Similarly, when ON CONFLICT DO UPDATE is specified, you
only need UPDATE privilege on the column(s) that are
- listed to be updated. However, ON CONFLICT DO UPDATE
- also requires SELECT privilege on any column whose
- values are read in the ON CONFLICT DO UPDATE
- expressions or condition.
+ listed to be updated. However, all forms of ON CONFLICT
+ also require SELECT privilege on any column whose values
+ are read. This includes any column mentioned in
+ conflict_target (including columns referred to
+ by the arbiter constraint), and any column mentioned in an
+ ON CONFLICT DO UPDATEexpression,
+ or a WHERE clause condition.
@@ -340,8 +347,11 @@ INSERT INTO table_name [ AS
For a simple INSERT, all old values will be
NULL. However, for an INSERT
- with an ON CONFLICT DO UPDATE clause, the old
- values may be non-NULL.
+ with an ON CONFLICT DO SELECT/UPDATE clause, the
+ old values may be non-NULL (when the row proposed
+ for insertion conflicts with an existing row). If the
+ SELECT path is taken, the new values will be
+ identical to the old values, since no modification takes place.
@@ -377,6 +387,9 @@ INSERT INTO table_name [ AS ON CONFLICT DO
UPDATE updates the existing row that conflicts with the
row proposed for insertion as its alternative action.
+ ON CONFLICT DO SELECT returns the existing row
+ that conflicts with the row proposed for insertion, optionally
+ with row-level locking.
@@ -408,6 +421,15 @@ INSERT INTO table_name [ AS .
+
+ ON CONFLICT DO SELECT similarly allows an atomic
+ INSERT or SELECT outcome. This
+ is also known as idempotent insert or
+ get or create. For ON CONFLICT DO
+ SELECT, a RETURNING clause
+ must be provided.
+
+
conflict_target
@@ -421,7 +443,8 @@ INSERT INTO table_name [ AS conflict_target; when
omitted, conflicts with all usable constraints (and unique
indexes) are handled. For ON CONFLICT DO
- UPDATE, a conflict_target
+ UPDATE and ON CONFLICT DO SELECT,
+ a conflict_targetmust be provided.
@@ -431,19 +454,23 @@ INSERT INTO table_name [ AS conflict_action
- conflict_action specifies an
- alternative ON CONFLICT action. It can be
- either DO NOTHING, or a DO
- UPDATE clause specifying the exact details of the
- UPDATE action to be performed in case of a
- conflict. The SET and
- WHERE clauses in ON CONFLICT DO
- UPDATE have access to the existing row using the
- table's name (or an alias), and to the row proposed for insertion
- using the special excluded table.
- SELECT privilege is required on any column in the
- target table where corresponding excluded
- columns are read.
+ conflict_action specifies an alternative
+ ON CONFLICT action. It can be
+ DO NOTHING, a DO SELECT
+ clause that allows conflicting rows to be returned, or a
+ DO UPDATE clause specifying the exact details
+ of the UPDATE action to be performed in case
+ of a conflict.
+
+
+ The SET clause in DO UPDATE
+ and the WHERE clause in both
+ DO SELECT and DO UPDATE have
+ access to the existing row using the table's name (or an alias),
+ and to the row proposed for insertion using the special
+ excluded table. SELECT
+ privilege is required on any column in the target table where
+ corresponding excluded columns are read.
Note that the effects of all per-row BEFORE
@@ -542,24 +569,41 @@ INSERT INTO table_name [ AS
+
+ FOR UPDATE
+ FOR NO KEY UPDATE
+ FOR SHARE
+ FOR KEY SHARE
+
+
+ When specified in an ON CONFLICT DO SELECT clause,
+ conflicting table rows are locked against concurrent updates.
+ See in the
+ documentation.
+
+
+
+
condition
An expression that returns a value of type
boolean. Only rows for which this expression
- returns true will be updated, although all
- rows will be locked when the ON CONFLICT DO UPDATE
- action is taken. Note that
- condition is evaluated last, after
- a conflict has been identified as a candidate to update.
+ returns true will be updated or selected for
+ return, although all conflicting rows will be locked when
+ ON CONFLICT DO UPDATE or
+ ON CONFLICT DO SELECT FOR UPDATE/SHARE is
+ specified. Note that condition is
+ evaluated last, after a conflict has been identified as a candidate
+ to update or select.
Note that exclusion constraints are not supported as arbiters with
- ON CONFLICT DO UPDATE. In all cases, only
+ ON CONFLICT DO SELECT/UPDATE. In all cases, only
NOT DEFERRABLE constraints and unique indexes
are supported as arbiters.
@@ -607,7 +651,7 @@ INSERT INTO table_name [ AS oidcount
The count is the number of
- rows inserted or updated. oid is always 0 (it
+ rows inserted, updated, or selected for return. oid is always 0 (it
used to be the OID assigned to the inserted row if
count was exactly one and the target table was
declared WITH OIDS and 0 otherwise, but creating a table
@@ -618,8 +662,7 @@ INSERT oidcountINSERT command contains a RETURNING
clause, the result will be similar to that of a SELECT
statement containing the columns and values defined in the
- RETURNING list, computed over the row(s) inserted or
- updated by the command.
+ RETURNING list, computed over the row(s) affected by the command.
@@ -793,6 +836,35 @@ INSERT INTO distributors AS d (did, dname) VALUES (8, 'Anvil Distribution')
-- index to arbitrate taking the DO NOTHING action)
INSERT INTO distributors (did, dname) VALUES (9, 'Antwerp Design')
ON CONFLICT ON CONSTRAINT distributors_pkey DO NOTHING;
+
+
+
+ Insert new distributor if possible, otherwise return the existing
+ distributor row. Example assumes a unique index has been defined
+ that constrains values appearing in the did column.
+ This is useful for get-or-create patterns:
+
+INSERT INTO distributors (did, dname) VALUES (11, 'Global Electronics')
+ ON CONFLICT (did) DO SELECT
+ RETURNING *;
+
+
+
+ Insert a new distributor if the ID doesn't match, otherwise return
+ the existing row, if its name doesn't match:
+
+INSERT INTO distributors AS d (did, dname) VALUES (12, 'Micro Devices Inc')
+ ON CONFLICT (did) DO SELECT WHERE d.dname != EXCLUDED.dname
+ RETURNING *;
+
+
+
+ Insert a new distributor or return and lock the existing row for update.
+ This is useful when you need to ensure exclusive access to the row:
+
+INSERT INTO distributors (did, dname) VALUES (13, 'Advanced Systems')
+ ON CONFLICT (did) DO SELECT FOR UPDATE
+ RETURNING *;
diff --git a/doc/src/sgml/ref/merge.sgml b/doc/src/sgml/ref/merge.sgml
index c2e181066a4..765fe7a7d62 100644
--- a/doc/src/sgml/ref/merge.sgml
+++ b/doc/src/sgml/ref/merge.sgml
@@ -714,7 +714,8 @@ MERGE total_count
on the behavior at each isolation level.
You may also wish to consider using INSERT ... ON CONFLICT
as an alternative statement which offers the ability to run an
- UPDATE if a concurrent INSERT
+ UPDATE or return the existing row (with
+ DO SELECT) if a concurrent INSERT
occurs. There are a variety of differences and restrictions between
the two statement types and they are not interchangeable.
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 688e23c0e90..7f538e90194 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -263,12 +263,10 @@ PostgreSQL documentation
- When is specified,
- pg_dump makes no attempt to dump any other
- database objects that the selected extension(s) might depend upon.
- Therefore, there is no guarantee that the results of a
- specific-extension dump can be successfully restored by themselves
- into a clean database.
+ pg_dump does not dump the extension's
+ underlying installation files (such as shared libraries or control
+ files). These must be available on the destination system for the
+ restore to succeed.
@@ -445,16 +443,6 @@ PostgreSQL documentation
below.
-
-
- When is specified, pg_dump
- makes no attempt to dump any other database objects that the selected
- schema(s) might depend upon. Therefore, there is no guarantee
- that the results of a specific-schema dump can be successfully
- restored by themselves into a clean database.
-
-
-
Non-schema objects such as large objects are not dumped when is
@@ -596,16 +584,6 @@ PostgreSQL documentation
be dumped.
-
-
- When is specified, pg_dump
- makes no attempt to dump any other database objects that the selected
- table(s) might depend upon. Therefore, there is no guarantee
- that the results of a specific-table dump can be successfully
- restored by themselves into a clean database.
-
-
-
@@ -1689,6 +1667,17 @@ CREATE DATABASE foo WITH TEMPLATE template0;
+
+ When options , or
+ are specified, pg_dump makes no attempt to dump
+ any other database objects that the selected object(s) might depend upon.
+ Therefore, there is no guarantee that the results of a dump so generated
+ can be successfully restored by themselves into a clean database.
+ For example, if a table whose definition includes a foreign key is
+ specified to be restored, the table referenced by the foreign key is
+ not automatically restored.
+
+
When a dump without schema is chosen and the option
is used, pg_dump emits commands
diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 2c295bbf8dc..420a308a7c7 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -452,16 +452,6 @@ PostgreSQL documentation
specify table(s) in a particular schema.
-
-
- When is specified, pg_restore
- makes no attempt to restore any other database objects that the
- selected table(s) might depend upon. Therefore, there is no
- guarantee that a specific-table restore into a clean database will
- succeed.
-
-
-
This flag does not behave identically to the
@@ -1089,6 +1079,16 @@ PostgreSQL documentation
Notes
+
+ When options or are specified,
+ pg_restore makes no attempt to restore
+ any other database objects that the selected table(s) or schema(s)
+ might depend upon. Therefore, there is no guarantee that a specific-table
+ restore into a clean database will succeed. For example, if a table
+ whose definition includes a foreign key is specified to be restored, the
+ table referenced by the foreign key is not automatically restored.
+
+
If your installation has any local additions to the
template1 database, be careful to load the output of
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index e464e3b13de..8b1d948ba05 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -5075,6 +5075,23 @@ testdb=> INSERT INTO my_table VALUES (:'content');
+
+ %i
+
+
+ Indicates whether the connected server is running in hot standby mode.
+ The value is shown as standby, if the server is
+ currently in hot standby and reports
+ as on,
+ and primary otherwise. This is useful when
+ connecting to multiple servers to quickly determine the role of
+ each connection. A value of ? is shown
+ when connected to a server running
+ PostgreSQL 13 or older.
+
+
+
+
%x
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index 02ddfda834a..6b6377503bf 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -1068,7 +1068,7 @@ data. Empty in ordinary tables.
fixed width field, then all the bytes are simply placed. If it's a
variable length field (attlen = -1) then it's a bit more complicated.
All variable-length data types share the common header structure
- struct varlena, which includes the total length of the stored
+ varlena, which includes the total length of the stored
value and some flag bits. Depending on the flags, the data can be either
inline or in a TOAST table;
it might be compressed, too (see ).
diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml
index 8b4abef8c68..e5fe423fc61 100644
--- a/doc/src/sgml/system-views.sgml
+++ b/doc/src/sgml/system-views.sgml
@@ -5045,6 +5045,45 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
non-null elements. (Null for scalar types.)
+
+
+
+ range_length_histogramanyarray
+
+
+ A histogram of the lengths of non-empty and non-null range values of an
+ expression. (Null for non-range types.)
+
+
+ This histogram is calculated using the subtype_diff
+ range function regardless of whether range bounds are inclusive.
+
+
+
+
+
+ range_empty_fracfloat4
+
+
+ Fraction of expression entries whose values are empty ranges.
+ (Null for non-range types.)
+
+
+
+
+
+ range_bounds_histogramanyarray
+
+
+ A histogram of lower and upper bounds of non-empty and non-null range
+ values. (Null for non-range types.)
+
+
+ These two histograms are represented as a single array of ranges, whose
+ lower bounds represent the histogram of lower bounds, and upper bounds
+ represent the histogram of upper bounds.
+
+
diff --git a/meson.build b/meson.build
index df907b62da3..055e96315d0 100644
--- a/meson.build
+++ b/meson.build
@@ -355,6 +355,8 @@ cp = find_program('cp', required: false, native: true)
xmllint_bin = find_program(get_option('XMLLINT'), native: true, required: false)
xsltproc_bin = find_program(get_option('XSLTPROC'), native: true, required: false)
nm = find_program('nm', required: false, native: true)
+ditaa = find_program('ditaa', native: true, required: false)
+dot = find_program('dot', native: true, required: false)
bison_flags = []
if bison.found()
@@ -2152,7 +2154,7 @@ common_warning_flags = [
'-Werror=unguarded-availability-new',
'-Wendif-labels',
'-Wmissing-format-attribute',
- '-Wimplicit-fallthrough=3',
+ '-Wimplicit-fallthrough=5',
'-Wcast-function-type',
'-Wshadow=compatible-local',
# This was included in -Wall/-Wformat in older GCC versions
@@ -2911,7 +2913,7 @@ gnugetopt_dep = cc.find_library('gnugetopt', required: false)
# (i.e., allow '-' as a flag character), so use our version on those platforms
# - We want to use system's getopt_long() only if the system provides struct
# option
-always_replace_getopt = host_system in ['windows', 'cygwin', 'openbsd', 'solaris']
+always_replace_getopt = host_system in ['windows', 'cygwin', 'openbsd', 'sunos']
always_replace_getopt_long = host_system in ['windows', 'cygwin'] or not cdata.has('HAVE_STRUCT_OPTION')
# Required on BSDs
diff --git a/src/backend/Makefile b/src/backend/Makefile
index baa9b05d021..05642dc02e3 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -136,9 +136,6 @@ parser/gram.h: parser/gram.y
storage/lmgr/lwlocknames.h: storage/lmgr/generate-lwlocknames.pl ../include/storage/lwlocklist.h utils/activity/wait_event_names.txt
$(MAKE) -C storage/lmgr lwlocknames.h
-utils/activity/wait_event_types.h: utils/activity/generate-wait_event_types.pl utils/activity/wait_event_names.txt
- $(MAKE) -C utils/activity wait_event_types.h pgstat_wait_event.c wait_event_funcs_data.c
-
# run this unconditionally to avoid needing to know its dependencies here:
submake-catalog-headers:
$(MAKE) -C ../include/catalog generated-headers
@@ -163,18 +160,13 @@ submake-utils-headers:
.PHONY: generated-headers
-generated-headers: $(top_builddir)/src/include/storage/lwlocknames.h $(top_builddir)/src/include/utils/wait_event_types.h submake-catalog-headers submake-nodes-headers submake-utils-headers parser/gram.h
+generated-headers: $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-nodes-headers submake-utils-headers parser/gram.h
$(top_builddir)/src/include/storage/lwlocknames.h: storage/lmgr/lwlocknames.h
prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
cd '$(dir $@)' && rm -f $(notdir $@) && \
$(LN_S) "$$prereqdir/$(notdir $<)" .
-$(top_builddir)/src/include/utils/wait_event_types.h: utils/activity/wait_event_types.h
- prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
- cd '$(dir $@)' && rm -f $(notdir $@) && \
- $(LN_S) "$$prereqdir/$(notdir $<)" .
-
utils/probes.o: utils/probes.d $(SUBDIROBJS)
$(DTRACE) $(DTRACEFLAGS) -C -G -s $(call expand_subsys,$^) -o $@
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 706387e36d6..69c233c62eb 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -206,7 +206,7 @@ brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
*/
if (VARATT_IS_EXTERNAL(DatumGetPointer(value)))
{
- value = PointerGetDatum(detoast_external_attr((struct varlena *)
+ value = PointerGetDatum(detoast_external_attr((varlena *)
DatumGetPointer(value)));
free_value = true;
}
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 7bef01bb5f3..a6c1f3a734b 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -22,12 +22,12 @@
#include "utils/expandeddatum.h"
#include "utils/rel.h"
-static struct varlena *toast_fetch_datum(struct varlena *attr);
-static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
- int32 sliceoffset,
- int32 slicelength);
-static struct varlena *toast_decompress_datum(struct varlena *attr);
-static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
+static varlena *toast_fetch_datum(varlena *attr);
+static varlena *toast_fetch_datum_slice(varlena *attr,
+ int32 sliceoffset,
+ int32 slicelength);
+static varlena *toast_decompress_datum(varlena *attr);
+static varlena *toast_decompress_datum_slice(varlena *attr, int32 slicelength);
/* ----------
* detoast_external_attr -
@@ -41,10 +41,10 @@ static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32
* EXTERNAL datum, the result will be a pfree'able chunk.
* ----------
*/
-struct varlena *
-detoast_external_attr(struct varlena *attr)
+varlena *
+detoast_external_attr(varlena *attr)
{
- struct varlena *result;
+ varlena *result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
@@ -58,10 +58,10 @@ detoast_external_attr(struct varlena *attr)
/*
* This is an indirect pointer --- dereference it
*/
- struct varatt_indirect redirect;
+ varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
- attr = (struct varlena *) redirect.pointer;
+ attr = (varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
@@ -74,7 +74,7 @@ detoast_external_attr(struct varlena *attr)
* Copy into the caller's memory context, in case caller tries to
* pfree the result.
*/
- result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ result = (varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
}
else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
@@ -87,7 +87,7 @@ detoast_external_attr(struct varlena *attr)
eoh = DatumGetEOHP(PointerGetDatum(attr));
resultsize = EOH_get_flat_size(eoh);
- result = (struct varlena *) palloc(resultsize);
+ result = (varlena *) palloc(resultsize);
EOH_flatten_into(eoh, result, resultsize);
}
else
@@ -112,8 +112,8 @@ detoast_external_attr(struct varlena *attr)
* datum, the result will be a pfree'able chunk.
* ----------
*/
-struct varlena *
-detoast_attr(struct varlena *attr)
+varlena *
+detoast_attr(varlena *attr)
{
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
@@ -124,7 +124,7 @@ detoast_attr(struct varlena *attr)
/* If it's compressed, decompress it */
if (VARATT_IS_COMPRESSED(attr))
{
- struct varlena *tmp = attr;
+ varlena *tmp = attr;
attr = toast_decompress_datum(tmp);
pfree(tmp);
@@ -135,10 +135,10 @@ detoast_attr(struct varlena *attr)
/*
* This is an indirect pointer --- dereference it
*/
- struct varatt_indirect redirect;
+ varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
- attr = (struct varlena *) redirect.pointer;
+ attr = (varlena *) redirect.pointer;
/* nested indirect Datums aren't allowed */
Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
@@ -147,11 +147,11 @@ detoast_attr(struct varlena *attr)
attr = detoast_attr(attr);
/* if it isn't, we'd better copy it */
- if (attr == (struct varlena *) redirect.pointer)
+ if (attr == (varlena *) redirect.pointer)
{
- struct varlena *result;
+ varlena *result;
- result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ result = (varlena *) palloc(VARSIZE_ANY(attr));
memcpy(result, attr, VARSIZE_ANY(attr));
attr = result;
}
@@ -179,9 +179,9 @@ detoast_attr(struct varlena *attr)
*/
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
Size new_size = data_size + VARHDRSZ;
- struct varlena *new_attr;
+ varlena *new_attr;
- new_attr = (struct varlena *) palloc(new_size);
+ new_attr = (varlena *) palloc(new_size);
SET_VARSIZE(new_attr, new_size);
memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
attr = new_attr;
@@ -201,12 +201,12 @@ detoast_attr(struct varlena *attr)
* If slicelength < 0, return everything beyond sliceoffset
* ----------
*/
-struct varlena *
-detoast_attr_slice(struct varlena *attr,
+varlena *
+detoast_attr_slice(varlena *attr,
int32 sliceoffset, int32 slicelength)
{
- struct varlena *preslice;
- struct varlena *result;
+ varlena *preslice;
+ varlena *result;
char *attrdata;
int32 slicelimit;
int32 attrsize;
@@ -225,7 +225,7 @@ detoast_attr_slice(struct varlena *attr,
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
@@ -266,7 +266,7 @@ detoast_attr_slice(struct varlena *attr,
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
- struct varatt_indirect redirect;
+ varatt_indirect redirect;
VARATT_EXTERNAL_GET_POINTER(redirect, attr);
@@ -288,7 +288,7 @@ detoast_attr_slice(struct varlena *attr,
if (VARATT_IS_COMPRESSED(preslice))
{
- struct varlena *tmp = preslice;
+ varlena *tmp = preslice;
/* Decompress enough to encompass the slice and the offset */
if (slicelimit >= 0)
@@ -321,7 +321,7 @@ detoast_attr_slice(struct varlena *attr,
else if (slicelength < 0 || slicelimit > attrsize)
slicelength = attrsize - sliceoffset;
- result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+ result = (varlena *) palloc(slicelength + VARHDRSZ);
SET_VARSIZE(result, slicelength + VARHDRSZ);
memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
@@ -339,12 +339,12 @@ detoast_attr_slice(struct varlena *attr,
* in the toast relation
* ----------
*/
-static struct varlena *
-toast_fetch_datum(struct varlena *attr)
+static varlena *
+toast_fetch_datum(varlena *attr)
{
Relation toastrel;
- struct varlena *result;
- struct varatt_external toast_pointer;
+ varlena *result;
+ varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
@@ -355,7 +355,7 @@ toast_fetch_datum(struct varlena *attr)
attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
- result = (struct varlena *) palloc(attrsize + VARHDRSZ);
+ result = (varlena *) palloc(attrsize + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
@@ -392,13 +392,13 @@ toast_fetch_datum(struct varlena *attr)
* has to be a prefix, i.e. sliceoffset has to be 0).
* ----------
*/
-static struct varlena *
-toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
+static varlena *
+toast_fetch_datum_slice(varlena *attr, int32 sliceoffset,
int32 slicelength)
{
Relation toastrel;
- struct varlena *result;
- struct varatt_external toast_pointer;
+ varlena *result;
+ varatt_external toast_pointer;
int32 attrsize;
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
@@ -438,7 +438,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
slicelength = attrsize - sliceoffset;
- result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+ result = (varlena *) palloc(slicelength + VARHDRSZ);
if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
@@ -467,8 +467,8 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
*
* Decompress a compressed version of a varlena datum
*/
-static struct varlena *
-toast_decompress_datum(struct varlena *attr)
+static varlena *
+toast_decompress_datum(varlena *attr)
{
ToastCompressionId cmid;
@@ -499,8 +499,8 @@ toast_decompress_datum(struct varlena *attr)
* offset handling happens in detoast_attr_slice.
* Here we just decompress a slice from the front.
*/
-static struct varlena *
-toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
+static varlena *
+toast_decompress_datum_slice(varlena *attr, int32 slicelength)
{
ToastCompressionId cmid;
@@ -544,20 +544,20 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
Size
toast_raw_datum_size(Datum value)
{
- struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ varlena *attr = (varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
/* va_rawsize is the size of the original datum -- including header */
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = toast_pointer.va_rawsize;
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
- struct varatt_indirect toast_pointer;
+ varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
@@ -600,7 +600,7 @@ toast_raw_datum_size(Datum value)
Size
toast_datum_size(Datum value)
{
- struct varlena *attr = (struct varlena *) DatumGetPointer(value);
+ varlena *attr = (varlena *) DatumGetPointer(value);
Size result;
if (VARATT_IS_EXTERNAL_ONDISK(attr))
@@ -610,14 +610,14 @@ toast_datum_size(Datum value)
* compressed or not. We do not count the size of the toast pointer
* ... should we?
*/
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
}
else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
{
- struct varatt_indirect toast_pointer;
+ varatt_indirect toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index d7c8c53fd8d..d6350201e01 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -108,7 +108,7 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i])))
{
untoasted_values[i] =
- PointerGetDatum(detoast_external_attr((struct varlena *)
+ PointerGetDatum(detoast_external_attr((varlena *)
DatumGetPointer(values[i])));
untoasted_free[i] = true;
}
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index d449613b21f..4d00537049e 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -36,12 +36,12 @@ int default_toast_compression = TOAST_PGLZ_COMPRESSION;
*
* Returns the compressed varlena, or NULL if compression fails.
*/
-struct varlena *
-pglz_compress_datum(const struct varlena *value)
+varlena *
+pglz_compress_datum(const varlena *value)
{
int32 valsize,
len;
- struct varlena *tmp = NULL;
+ varlena *tmp = NULL;
valsize = VARSIZE_ANY_EXHDR(value);
@@ -57,8 +57,8 @@ pglz_compress_datum(const struct varlena *value)
* Figure out the maximum possible size of the pglz output, add the bytes
* that will be needed for varlena overhead, and allocate that amount.
*/
- tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
- VARHDRSZ_COMPRESSED);
+ tmp = (varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
+ VARHDRSZ_COMPRESSED);
len = pglz_compress(VARDATA_ANY(value),
valsize,
@@ -78,14 +78,14 @@ pglz_compress_datum(const struct varlena *value)
/*
* Decompress a varlena that was compressed using PGLZ.
*/
-struct varlena *
-pglz_decompress_datum(const struct varlena *value)
+varlena *
+pglz_decompress_datum(const varlena *value)
{
- struct varlena *result;
+ varlena *result;
int32 rawsize;
/* allocate memory for the uncompressed data */
- result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
+ result = (varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
/* decompress the data */
rawsize = pglz_decompress((const char *) value + VARHDRSZ_COMPRESSED,
@@ -105,15 +105,15 @@ pglz_decompress_datum(const struct varlena *value)
/*
* Decompress part of a varlena that was compressed using PGLZ.
*/
-struct varlena *
-pglz_decompress_datum_slice(const struct varlena *value,
+varlena *
+pglz_decompress_datum_slice(const varlena *value,
int32 slicelength)
{
- struct varlena *result;
+ varlena *result;
int32 rawsize;
/* allocate memory for the uncompressed data */
- result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+ result = (varlena *) palloc(slicelength + VARHDRSZ);
/* decompress the data */
rawsize = pglz_decompress((const char *) value + VARHDRSZ_COMPRESSED,
@@ -135,8 +135,8 @@ pglz_decompress_datum_slice(const struct varlena *value,
*
* Returns the compressed varlena, or NULL if compression fails.
*/
-struct varlena *
-lz4_compress_datum(const struct varlena *value)
+varlena *
+lz4_compress_datum(const varlena *value)
{
#ifndef USE_LZ4
NO_COMPRESSION_SUPPORT("lz4");
@@ -145,7 +145,7 @@ lz4_compress_datum(const struct varlena *value)
int32 valsize;
int32 len;
int32 max_size;
- struct varlena *tmp = NULL;
+ varlena *tmp = NULL;
valsize = VARSIZE_ANY_EXHDR(value);
@@ -154,7 +154,7 @@ lz4_compress_datum(const struct varlena *value)
* that will be needed for varlena overhead, and allocate that amount.
*/
max_size = LZ4_compressBound(valsize);
- tmp = (struct varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
+ tmp = (varlena *) palloc(max_size + VARHDRSZ_COMPRESSED);
len = LZ4_compress_default(VARDATA_ANY(value),
(char *) tmp + VARHDRSZ_COMPRESSED,
@@ -178,18 +178,18 @@ lz4_compress_datum(const struct varlena *value)
/*
* Decompress a varlena that was compressed using LZ4.
*/
-struct varlena *
-lz4_decompress_datum(const struct varlena *value)
+varlena *
+lz4_decompress_datum(const varlena *value)
{
#ifndef USE_LZ4
NO_COMPRESSION_SUPPORT("lz4");
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
- struct varlena *result;
+ varlena *result;
/* allocate memory for the uncompressed data */
- result = (struct varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
+ result = (varlena *) palloc(VARDATA_COMPRESSED_GET_EXTSIZE(value) + VARHDRSZ);
/* decompress the data */
rawsize = LZ4_decompress_safe((const char *) value + VARHDRSZ_COMPRESSED,
@@ -211,22 +211,22 @@ lz4_decompress_datum(const struct varlena *value)
/*
* Decompress part of a varlena that was compressed using LZ4.
*/
-struct varlena *
-lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
+varlena *
+lz4_decompress_datum_slice(const varlena *value, int32 slicelength)
{
#ifndef USE_LZ4
NO_COMPRESSION_SUPPORT("lz4");
return NULL; /* keep compiler quiet */
#else
int32 rawsize;
- struct varlena *result;
+ varlena *result;
/* slice decompression not supported prior to 1.8.3 */
if (LZ4_versionNumber() < 10803)
return lz4_decompress_datum(value);
/* allocate memory for the uncompressed data */
- result = (struct varlena *) palloc(slicelength + VARHDRSZ);
+ result = (varlena *) palloc(slicelength + VARHDRSZ);
/* decompress the data */
rawsize = LZ4_decompress_safe_partial((const char *) value + VARHDRSZ_COMPRESSED,
@@ -251,7 +251,7 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
* Returns TOAST_INVALID_COMPRESSION_ID if the varlena is not compressed.
*/
ToastCompressionId
-toast_get_compression_id(struct varlena *attr)
+toast_get_compression_id(varlena *attr)
{
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
@@ -262,7 +262,7 @@ toast_get_compression_id(struct varlena *attr)
*/
if (VARATT_IS_EXTERNAL_ONDISK(attr))
{
- struct varatt_external toast_pointer;
+ varatt_external toast_pointer;
VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 6836786fd05..4d0da07135e 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -45,7 +45,7 @@ static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
Datum
toast_compress_datum(Datum value, char cmethod)
{
- struct varlena *tmp = NULL;
+ varlena *tmp = NULL;
int32 valsize;
ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
@@ -64,11 +64,11 @@ toast_compress_datum(Datum value, char cmethod)
switch (cmethod)
{
case TOAST_PGLZ_COMPRESSION:
- tmp = pglz_compress_datum((const struct varlena *) DatumGetPointer(value));
+ tmp = pglz_compress_datum((const varlena *) DatumGetPointer(value));
cmid = TOAST_PGLZ_COMPRESSION_ID;
break;
case TOAST_LZ4_COMPRESSION:
- tmp = lz4_compress_datum((const struct varlena *) DatumGetPointer(value));
+ tmp = lz4_compress_datum((const varlena *) DatumGetPointer(value));
cmid = TOAST_LZ4_COMPRESSION_ID;
break;
default:
@@ -117,14 +117,14 @@ toast_compress_datum(Datum value, char cmethod)
*/
Datum
toast_save_datum(Relation rel, Datum value,
- struct varlena *oldexternal, int options)
+ varlena *oldexternal, int options)
{
Relation toastrel;
Relation *toastidxs;
TupleDesc toasttupDesc;
CommandId mycid = GetCurrentCommandId(true);
- struct varlena *result;
- struct varatt_external toast_pointer;
+ varlena *result;
+ varatt_external toast_pointer;
int32 chunk_seq = 0;
char *data_p;
int32 data_todo;
@@ -225,7 +225,7 @@ toast_save_datum(Relation rel, Datum value,
toast_pointer.va_valueid = InvalidOid;
if (oldexternal != NULL)
{
- struct varatt_external old_toast_pointer;
+ varatt_external old_toast_pointer;
Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
/* Must copy to access aligned fields */
@@ -287,7 +287,7 @@ toast_save_datum(Relation rel, Datum value,
bool t_isnull[3] = {0};
union
{
- alignas(int32) struct varlena hdr;
+ alignas(int32) varlena hdr;
/* this is to make the union big enough for a chunk: */
char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
} chunk_data;
@@ -359,7 +359,7 @@ toast_save_datum(Relation rel, Datum value,
/*
* Create the TOAST pointer value that we'll return
*/
- result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
+ result = (varlena *) palloc(TOAST_POINTER_SIZE);
SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
@@ -375,8 +375,8 @@ toast_save_datum(Relation rel, Datum value,
void
toast_delete_datum(Relation rel, Datum value, bool is_speculative)
{
- struct varlena *attr = (struct varlena *) DatumGetPointer(value);
- struct varatt_external toast_pointer;
+ varlena *attr = (varlena *) DatumGetPointer(value);
+ varatt_external toast_pointer;
Relation toastrel;
Relation *toastidxs;
ScanKeyData toastkey;
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 94b4f1f9975..b69d10f0a45 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -86,25 +86,8 @@ populate_compact_attribute_internal(Form_pg_attribute src,
IsCatalogRelationOid(src->attrelid) ? ATTNULLABLE_VALID :
ATTNULLABLE_UNKNOWN;
- switch (src->attalign)
- {
- case TYPALIGN_INT:
- dst->attalignby = ALIGNOF_INT;
- break;
- case TYPALIGN_CHAR:
- dst->attalignby = sizeof(char);
- break;
- case TYPALIGN_DOUBLE:
- dst->attalignby = ALIGNOF_DOUBLE;
- break;
- case TYPALIGN_SHORT:
- dst->attalignby = ALIGNOF_SHORT;
- break;
- default:
- dst->attalignby = 0;
- elog(ERROR, "invalid attalign value: %c", src->attalign);
- break;
- }
+ /* Compute numeric alignment requirement, too */
+ dst->attalignby = typalign_to_alignby(src->attalign);
}
/*
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 436e54f2066..c5d7db28077 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -1854,10 +1854,10 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
PageSetLSN(page, recptr);
}
- UnlockReleaseBuffer(buffer);
-
END_CRIT_SECTION();
+ UnlockReleaseBuffer(buffer);
+
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
index 7a6b177977b..f50848eb65a 100644
--- a/src/backend/access/gin/ginfast.c
+++ b/src/backend/access/gin/ginfast.c
@@ -134,10 +134,10 @@ writeListPage(Relation index, Buffer buffer,
/* get free space before releasing buffer */
freesize = PageGetExactFreeSpace(page);
- UnlockReleaseBuffer(buffer);
-
END_CRIT_SECTION();
+ UnlockReleaseBuffer(buffer);
+
return freesize;
}
@@ -459,10 +459,10 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
if (metadata->nPendingPages * GIN_PAGE_FREESIZE > cleanupSize * (Size) 1024)
needCleanup = true;
- UnlockReleaseBuffer(metabuffer);
-
END_CRIT_SECTION();
+ UnlockReleaseBuffer(metabuffer);
+
/*
* Since it could contend with concurrent cleanup process we cleanup
* pending list not forcibly.
@@ -659,11 +659,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
}
}
+ END_CRIT_SECTION();
+
for (i = 0; i < data.ndeleted; i++)
UnlockReleaseBuffer(buffers[i]);
- END_CRIT_SECTION();
-
for (i = 0; fill_fsm && i < data.ndeleted; i++)
RecordFreeIndexPage(index, freespace[i]);
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d205093e21d..ff927279cc3 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -663,9 +663,9 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
PageSetLSN(metapage, recptr);
}
- UnlockReleaseBuffer(metabuffer);
-
END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(metabuffer);
}
/*
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 11a6674a10b..c9f143f6c31 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -224,12 +224,12 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
PageSetLSN(BufferGetPage(lBuffer), recptr);
}
+ END_CRIT_SECTION();
+
ReleaseBuffer(pBuffer);
ReleaseBuffer(lBuffer);
ReleaseBuffer(dBuffer);
- END_CRIT_SECTION();
-
gvs->result->pages_newly_deleted++;
gvs->result->pages_deleted++;
}
@@ -654,8 +654,8 @@ ginbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
PageRestoreTempPage(resPage, page);
MarkBufferDirty(buffer);
xlogVacuumPage(gvs.index, buffer);
- UnlockReleaseBuffer(buffer);
END_CRIT_SECTION();
+ UnlockReleaseBuffer(buffer);
}
else
{
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index d5944205db2..dfffce3e396 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -291,7 +291,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
SplitPageLayout *dist = NULL,
*ptr;
BlockNumber oldrlink = InvalidBlockNumber;
- GistNSN oldnsn = 0;
+ GistNSN oldnsn = InvalidXLogRecPtr;
SplitPageLayout rootpg;
bool is_rootsplit;
int npage;
@@ -654,7 +654,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
/* Start from the root */
firststack.blkno = GIST_ROOT_BLKNO;
- firststack.lsn = 0;
+ firststack.lsn = InvalidXLogRecPtr;
firststack.retry_from_parent = false;
firststack.parent = NULL;
firststack.downlinkoffnum = InvalidOffsetNumber;
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 83bda209c42..575342a21b6 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -27,6 +27,7 @@
#include "postgres.h"
#include "common/hashfn.h"
+#include "utils/builtins.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/pg_locale.h"
@@ -233,6 +234,7 @@ hashoidvector(PG_FUNCTION_ARGS)
{
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
+ check_valid_oidvector(key);
return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
}
@@ -241,6 +243,7 @@ hashoidvectorextended(PG_FUNCTION_ARGS)
{
oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
+ check_valid_oidvector(key);
return hash_any_extended((unsigned char *) key->values,
key->dim1 * sizeof(Oid),
PG_GETARG_INT64(1));
@@ -385,7 +388,7 @@ hashtextextended(PG_FUNCTION_ARGS)
Datum
hashvarlena(PG_FUNCTION_ARGS)
{
- struct varlena *key = PG_GETARG_VARLENA_PP(0);
+ varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any((unsigned char *) VARDATA_ANY(key),
@@ -400,7 +403,7 @@ hashvarlena(PG_FUNCTION_ARGS)
Datum
hashvarlenaextended(PG_FUNCTION_ARGS)
{
- struct varlena *key = PG_GETARG_VARLENA_PP(0);
+ varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f30a56ecf55..98d53caeea8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -111,11 +111,11 @@ static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool ke
/*
- * Each tuple lock mode has a corresponding heavyweight lock, and one or two
- * corresponding MultiXactStatuses (one to merely lock tuples, another one to
- * update them). This table (and the macros below) helps us determine the
- * heavyweight lock mode and MultiXactStatus values to use for any particular
- * tuple lock strength.
+ * This table lists the heavyweight lock mode that corresponds to each tuple
+ * lock mode, as well as one or two corresponding MultiXactStatus values:
+ * .lockstatus to merely lock tuples, and .updstatus to update them. The
+ * latter is set to -1 if the corresponding tuple lock mode does not allow
+ * updating tuples -- see get_mxact_status_for_lock().
*
* These interact with InplaceUpdateTupleLock, an alias for ExclusiveLock.
*
@@ -127,29 +127,30 @@ static const struct
LOCKMODE hwlock;
int lockstatus;
int updstatus;
-}
+} tupleLockExtraInfo[] =
- tupleLockExtraInfo[MaxLockTupleMode + 1] =
{
- { /* LockTupleKeyShare */
- AccessShareLock,
- MultiXactStatusForKeyShare,
- -1 /* KeyShare does not allow updating tuples */
+ [LockTupleKeyShare] = {
+ .hwlock = AccessShareLock,
+ .lockstatus = MultiXactStatusForKeyShare,
+ /* KeyShare does not allow updating tuples */
+ .updstatus = -1
},
- { /* LockTupleShare */
- RowShareLock,
- MultiXactStatusForShare,
- -1 /* Share does not allow updating tuples */
+ [LockTupleShare] = {
+ .hwlock = RowShareLock,
+ .lockstatus = MultiXactStatusForShare,
+ /* Share does not allow updating tuples */
+ .updstatus = -1
},
- { /* LockTupleNoKeyExclusive */
- ExclusiveLock,
- MultiXactStatusForNoKeyUpdate,
- MultiXactStatusNoKeyUpdate
+ [LockTupleNoKeyExclusive] = {
+ .hwlock = ExclusiveLock,
+ .lockstatus = MultiXactStatusForNoKeyUpdate,
+ .updstatus = MultiXactStatusNoKeyUpdate
},
- { /* LockTupleExclusive */
- AccessExclusiveLock,
- MultiXactStatusForUpdate,
- MultiXactStatusUpdate
+ [LockTupleExclusive] = {
+ .hwlock = AccessExclusiveLock,
+ .lockstatus = MultiXactStatusForUpdate,
+ .updstatus = MultiXactStatusUpdate
}
};
@@ -1421,16 +1422,6 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg_internal("only heap AM is supported")));
- /*
- * We don't expect direct calls to heap_getnext with valid CheckXidAlive
- * for catalog or regular tables. See detailed comments in xact.c where
- * these variables are declared. Normally we have such a check at tableam
- * level API but this is called from many places so we need to ensure it
- * here.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected heap_getnext call during logical decoding");
-
/* Note: no locking manipulations needed */
if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
@@ -4544,7 +4535,7 @@ HeapDetermineColumnsInfo(Relation relation,
* Check if the old tuple's attribute is stored externally and is a
* member of external_cols.
*/
- if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value1)) &&
+ if (VARATT_IS_EXTERNAL((varlena *) DatumGetPointer(value1)) &&
bms_is_member(attidx, external_cols))
*has_external = true;
}
@@ -4698,10 +4689,10 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
if (result == TM_Invisible)
{
/*
- * This is possible, but only when locking a tuple for ON CONFLICT
- * UPDATE. We return this value here rather than throwing an error in
- * order to give that case the opportunity to throw a more specific
- * error.
+ * This is possible, but only when locking a tuple for ON CONFLICT DO
+ * SELECT/UPDATE. We return this value here rather than throwing an
+ * error in order to give that case the opportunity to throw a more
+ * specific error.
*/
result = TM_Invisible;
goto out_locked;
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index cbef73e5d4b..b83e2013d50 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -861,7 +861,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
break;
case HEAPTUPLE_RECENTLY_DEAD:
*tups_recently_dead += 1;
- /* fall through */
+ pg_fallthrough;
case HEAPTUPLE_LIVE:
/* Live or recently dead, must copy it */
isdead = false;
diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c
index e28fe47a449..ba541bd60c9 100644
--- a/src/backend/access/heap/heaptoast.c
+++ b/src/backend/access/heap/heaptoast.c
@@ -371,9 +371,9 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
*/
if (!toast_isnull[i] && TupleDescCompactAttr(tupleDesc, i)->attlen == -1)
{
- struct varlena *new_value;
+ varlena *new_value;
- new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
+ new_value = (varlena *) DatumGetPointer(toast_values[i]);
if (VARATT_IS_EXTERNAL(new_value))
{
new_value = detoast_external_attr(new_value);
@@ -485,9 +485,9 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup,
has_nulls = true;
else if (TupleDescCompactAttr(tupleDesc, i)->attlen == -1)
{
- struct varlena *new_value;
+ varlena *new_value;
- new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
+ new_value = (varlena *) DatumGetPointer(toast_values[i]);
if (VARATT_IS_EXTERNAL(new_value) ||
VARATT_IS_COMPRESSED(new_value))
{
@@ -586,9 +586,9 @@ toast_build_flattened_tuple(TupleDesc tupleDesc,
*/
if (!isnull[i] && TupleDescCompactAttr(tupleDesc, i)->attlen == -1)
{
- struct varlena *new_value;
+ varlena *new_value;
- new_value = (struct varlena *) DatumGetPointer(new_values[i]);
+ new_value = (varlena *) DatumGetPointer(new_values[i]);
if (VARATT_IS_EXTERNAL(new_value))
{
new_value = detoast_external_attr(new_value);
@@ -625,7 +625,7 @@ toast_build_flattened_tuple(TupleDesc tupleDesc,
void
heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
int32 sliceoffset, int32 slicelength,
- struct varlena *result)
+ varlena *result)
{
Relation *toastidxs;
ScanKeyData toastkey[3];
@@ -768,7 +768,7 @@ heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize,
chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE;
memcpy(VARDATA(result) +
- (curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
+ curchunk * TOAST_MAX_CHUNK_SIZE - sliceoffset + chcpystrt,
chunkdata + chcpystrt,
(chcpyend - chcpystrt) + 1);
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index a29be6f467b..5e89b86a62c 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -420,6 +420,14 @@ systable_beginscan(Relation heapRelation,
sysscan->snapshot = NULL;
}
+ /*
+ * If CheckXidAlive is set then set a flag to indicate that system table
+ * scan is in-progress. See detailed comments in xact.c where these
+ * variables are declared.
+ */
+ if (TransactionIdIsValid(CheckXidAlive))
+ bsysscan = true;
+
if (irel)
{
int i;
@@ -468,14 +476,6 @@ systable_beginscan(Relation heapRelation,
sysscan->iscan = NULL;
}
- /*
- * If CheckXidAlive is set then set a flag to indicate that system table
- * scan is in-progress. See detailed comments in xact.c where these
- * variables are declared.
- */
- if (TransactionIdIsValid(CheckXidAlive))
- bsysscan = true;
-
return sysscan;
}
@@ -707,13 +707,6 @@ systable_beginscan_ordered(Relation heapRelation,
elog(ERROR, "column is not in index");
}
- sysscan->iscan = index_beginscan(heapRelation, indexRelation,
- snapshot, NULL, nkeys, 0);
- index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
- sysscan->scan = NULL;
-
- pfree(idxkey);
-
/*
* If CheckXidAlive is set then set a flag to indicate that system table
* scan is in-progress. See detailed comments in xact.c where these
@@ -722,6 +715,13 @@ systable_beginscan_ordered(Relation heapRelation,
if (TransactionIdIsValid(CheckXidAlive))
bsysscan = true;
+ sysscan->iscan = index_beginscan(heapRelation, indexRelation,
+ snapshot, NULL, nkeys, 0);
+ index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
+ sysscan->scan = NULL;
+
+ pfree(idxkey);
+
return sysscan;
}
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 8425805a292..1d343377e98 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -57,6 +57,7 @@
#include
+#include "utils/builtins.h"
#include "utils/fmgrprotos.h"
#include "utils/skipsupport.h"
#include "utils/sortsupport.h"
@@ -587,6 +588,9 @@ btoidvectorcmp(PG_FUNCTION_ARGS)
oidvector *b = (oidvector *) PG_GETARG_POINTER(1);
int i;
+ check_valid_oidvector(a);
+ check_valid_oidvector(b);
+
/* We arbitrarily choose to sort first by vector length */
if (a->dim1 != b->dim1)
PG_RETURN_INT32(a->dim1 - b->dim1);
diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index b028b0c3e88..39c0a5d610f 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -1198,7 +1198,7 @@ _bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk, ScanKey skey,
{
case BTLessStrategyNumber:
cmpexact = 1; /* exclude exact match, if any */
- /* FALL THRU */
+ pg_fallthrough;
case BTLessEqualStrategyNumber:
if (cmpresult >= cmpexact)
matchelem++;
@@ -1220,7 +1220,7 @@ _bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk, ScanKey skey,
break;
case BTGreaterEqualStrategyNumber:
cmpexact = 1; /* include exact match, if any */
- /* FALL THRU */
+ pg_fallthrough;
case BTGreaterStrategyNumber:
if (cmpresult >= cmpexact)
matchelem++;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 90ab4e91b56..3a45508f62e 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -69,8 +69,8 @@
/*
* DISABLE_LEADER_PARTICIPATION disables the leader's participation in
* parallel index builds. This may be useful as a debugging aid.
-#undef DISABLE_LEADER_PARTICIPATION
*/
+/* #define DISABLE_LEADER_PARTICIPATION */
/*
* Status record for spooling/sorting phase. (Note we may have two of
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index 87491796523..dfda1af412e 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -117,8 +117,8 @@ table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key)
Oid relid = RelationGetRelid(relation);
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
- return relation->rd_tableam->scan_begin(relation, snapshot, nkeys, key,
- NULL, flags);
+ return table_beginscan_common(relation, snapshot, nkeys, key,
+ NULL, flags);
}
@@ -184,8 +184,8 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
snapshot = SnapshotAny;
}
- return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
- pscan, flags);
+ return table_beginscan_common(relation, snapshot, 0, NULL,
+ pscan, flags);
}
TableScanDesc
@@ -214,8 +214,8 @@ table_beginscan_parallel_tidrange(Relation relation,
snapshot = SnapshotAny;
}
- sscan = relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL,
- pscan, flags);
+ sscan = table_beginscan_common(relation, snapshot, 0, NULL,
+ pscan, flags);
return sscan;
}
@@ -269,14 +269,6 @@ table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid)
Relation rel = scan->rs_rd;
const TableAmRoutine *tableam = rel->rd_tableam;
- /*
- * We don't expect direct calls to table_tuple_get_latest_tid with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_tuple_get_latest_tid call during logical decoding");
-
/*
* Since this can be called with user-supplied TID, don't trust the input
* too much.
diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c
index d8a604a0b3e..0d792a60ca0 100644
--- a/src/backend/access/table/toast_helper.c
+++ b/src/backend/access/table/toast_helper.c
@@ -49,8 +49,8 @@ toast_tuple_init(ToastTupleContext *ttc)
for (i = 0; i < numAttrs; i++)
{
Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
- struct varlena *old_value;
- struct varlena *new_value;
+ varlena *old_value;
+ varlena *new_value;
ttc->ttc_attr[i].tai_colflags = 0;
ttc->ttc_attr[i].tai_oldexternal = NULL;
@@ -62,9 +62,9 @@ toast_tuple_init(ToastTupleContext *ttc)
* For UPDATE get the old and new values of this attribute
*/
old_value =
- (struct varlena *) DatumGetPointer(ttc->ttc_oldvalues[i]);
+ (varlena *) DatumGetPointer(ttc->ttc_oldvalues[i]);
new_value =
- (struct varlena *) DatumGetPointer(ttc->ttc_values[i]);
+ (varlena *) DatumGetPointer(ttc->ttc_values[i]);
/*
* If the old value is stored on disk, check if it has changed so
@@ -102,7 +102,7 @@ toast_tuple_init(ToastTupleContext *ttc)
/*
* For INSERT simply get the new value
*/
- new_value = (struct varlena *) DatumGetPointer(ttc->ttc_values[i]);
+ new_value = (varlena *) DatumGetPointer(ttc->ttc_values[i]);
}
/*
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 01a89104ef0..44786dc131f 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -357,7 +357,7 @@ InitializeParallelDSM(ParallelContext *pcxt)
fps->stmt_ts = GetCurrentStatementStartTimestamp();
fps->serializable_xact_handle = ShareSerializableXact();
SpinLockInit(&fps->mutex);
- fps->last_xlog_end = 0;
+ fps->last_xlog_end = InvalidXLogRecPtr;
shm_toc_insert(pcxt->toc, PARALLEL_KEY_FIXED, fps);
/* We can skip the rest of this if we're not budgeting for any workers. */
@@ -530,7 +530,7 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
/* Reset a few bits of fixed parallel state to a clean state. */
fps = shm_toc_lookup(pcxt->toc, PARALLEL_KEY_FIXED, false);
- fps->last_xlog_end = 0;
+ fps->last_xlog_end = InvalidXLogRecPtr;
/* Recreate error queues (if they exist). */
if (pcxt->nworkers > 0)
@@ -1327,7 +1327,6 @@ ParallelWorkerMain(Datum main_arg)
InitializingParallelWorker = true;
/* Establish signal handlers. */
- pqsignal(SIGTERM, die);
BackgroundWorkerUnblockSignals();
/* Determine and set our parallel worker number. */
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 601ce3faa64..eabc4d48208 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -470,7 +470,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid,
proc->databaseId = databaseid;
proc->roleId = owner;
proc->tempNamespaceId = InvalidOid;
- proc->isRegularBackend = false;
+ proc->backendType = B_INVALID;
proc->lwWaiting = LW_WS_NOT_WAITING;
proc->lwWaitMode = 0;
proc->waitLock = NULL;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 16614e152dd..13cce9b49f1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2060,7 +2060,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
/* Have to write it ourselves */
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
WriteRqst.Write = OldPageRqstPtr;
- WriteRqst.Flush = 0;
+ WriteRqst.Flush = InvalidXLogRecPtr;
XLogWrite(WriteRqst, tli, false);
LWLockRelease(WALWriteLock);
pgWalUsage.wal_buffers_full++;
@@ -3077,7 +3077,7 @@ XLogBackgroundFlush(void)
else
{
/* no flushing, this time round */
- WriteRqst.Flush = 0;
+ WriteRqst.Flush = InvalidXLogRecPtr;
}
#ifdef WAL_DEBUG
@@ -5207,7 +5207,7 @@ BootStrapXLOG(uint32 data_checksum_version)
/* Insert the initial checkpoint record */
recptr = ((char *) page + SizeOfXLogLongPHD);
record = (XLogRecord *) recptr;
- record->xl_prev = 0;
+ record->xl_prev = InvalidXLogRecPtr;
record->xl_xid = InvalidTransactionId;
record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
@@ -6768,6 +6768,28 @@ ShutdownXLOG(int code, Datum arg)
}
}
+/*
+ * Format checkpoint request flags as a space-separated string for
+ * log messages.
+ */
+static const char *
+CheckpointFlagsString(int flags)
+{
+ static char buf[128];
+
+ snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
+ (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
+ (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
+ (flags & CHECKPOINT_FAST) ? " fast" : "",
+ (flags & CHECKPOINT_FORCE) ? " force" : "",
+ (flags & CHECKPOINT_WAIT) ? " wait" : "",
+ (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
+ (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
+ (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "");
+
+ return buf;
+}
+
/*
* Log start of a checkpoint.
*/
@@ -6776,35 +6798,21 @@ LogCheckpointStart(int flags, bool restartpoint)
{
if (restartpoint)
ereport(LOG,
- /* translator: the placeholders show checkpoint options */
- (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
- (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
- (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
- (flags & CHECKPOINT_FAST) ? " fast" : "",
- (flags & CHECKPOINT_FORCE) ? " force" : "",
- (flags & CHECKPOINT_WAIT) ? " wait" : "",
- (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
- (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
- (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
+ /* translator: the placeholder shows checkpoint options */
+ (errmsg("restartpoint starting:%s",
+ CheckpointFlagsString(flags))));
else
ereport(LOG,
- /* translator: the placeholders show checkpoint options */
- (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
- (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
- (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
- (flags & CHECKPOINT_FAST) ? " fast" : "",
- (flags & CHECKPOINT_FORCE) ? " force" : "",
- (flags & CHECKPOINT_WAIT) ? " wait" : "",
- (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
- (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
- (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
+ /* translator: the placeholder shows checkpoint options */
+ (errmsg("checkpoint starting:%s",
+ CheckpointFlagsString(flags))));
}
/*
* Log end of a checkpoint.
*/
static void
-LogCheckpointEnd(bool restartpoint)
+LogCheckpointEnd(bool restartpoint, int flags)
{
long write_msecs,
sync_msecs,
@@ -6854,12 +6862,13 @@ LogCheckpointEnd(bool restartpoint)
*/
if (restartpoint)
ereport(LOG,
- (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
+ (errmsg("restartpoint complete:%s: wrote %d buffers (%.1f%%), "
"wrote %d SLRU buffers; %d WAL file(s) added, "
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
+ CheckpointFlagsString(flags),
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -6878,12 +6887,13 @@ LogCheckpointEnd(bool restartpoint)
LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo))));
else
ereport(LOG,
- (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
+ (errmsg("checkpoint complete:%s: wrote %d buffers (%.1f%%), "
"wrote %d SLRU buffers; %d WAL file(s) added, "
"%d removed, %d recycled; write=%ld.%03d s, "
"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
+ CheckpointFlagsString(flags),
CheckpointStats.ckpt_bufs_written,
(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
CheckpointStats.ckpt_slru_written,
@@ -7480,7 +7490,7 @@ CreateCheckPoint(int flags)
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
/* Real work is done; log and update stats. */
- LogCheckpointEnd(false);
+ LogCheckpointEnd(false, flags);
/* Reset the process title */
update_checkpoint_display(flags, false, true);
@@ -7951,7 +7961,7 @@ CreateRestartPoint(int flags)
TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning());
/* Real work is done; log and update stats. */
- LogCheckpointEnd(true);
+ LogCheckpointEnd(true, flags);
/* Reset the process title */
update_checkpoint_display(flags, true, true);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index d3acaa636c3..a9a1678acc9 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -1355,11 +1355,12 @@ log_newpage_range(Relation rel, ForkNumber forknum,
recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
for (i = 0; i < nbufs; i++)
- {
PageSetLSN(BufferGetPage(bufpack[i]), recptr);
- UnlockReleaseBuffer(bufpack[i]);
- }
+
END_CRIT_SECTION();
+
+ for (i = 0; i < nbufs; i++)
+ UnlockReleaseBuffer(bufpack[i]);
}
}
diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c
index 3c3f067aafb..24cfa96d737 100644
--- a/src/backend/access/transam/xlogprefetcher.c
+++ b/src/backend/access/transam/xlogprefetcher.c
@@ -967,7 +967,7 @@ XLogPrefetcherBeginRead(XLogPrefetcher *prefetcher, XLogRecPtr recPtr)
/* Book-keeping to avoid readahead on first read. */
prefetcher->begin_ptr = recPtr;
- prefetcher->no_readahead_until = 0;
+ prefetcher->no_readahead_until = InvalidXLogRecPtr;
/* This will forget about any queued up records in the decoder. */
XLogBeginRead(prefetcher->reader, recPtr);
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index a81dcbb5d79..c0c2744d45b 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -261,7 +261,7 @@ static TimestampTz XLogReceiptTime = 0;
static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
/* Local copy of WalRcv->flushedUpto */
-static XLogRecPtr flushedUpto = 0;
+static XLogRecPtr flushedUpto = InvalidXLogRecPtr;
static TimeLineID receiveTLI = 0;
/*
@@ -1068,9 +1068,6 @@ readRecoverySignalFile(void)
* Check for recovery signal files and if found, fsync them since they
* represent server state information. We don't sweat too much about the
* possibility of fsync failure, however.
- *
- * If present, standby signal file takes precedence. If neither is present
- * then we won't enter archive recovery.
*/
if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
{
@@ -1085,7 +1082,8 @@ readRecoverySignalFile(void)
}
standby_signal_file_found = true;
}
- else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
+
+ if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
{
int fd;
@@ -1099,6 +1097,10 @@ readRecoverySignalFile(void)
recovery_signal_file_found = true;
}
+ /*
+ * If both signal files are present, standby signal file takes precedence.
+ * If neither is present then we won't enter archive recovery.
+ */
StandbyModeRequested = false;
ArchiveRecoveryRequested = false;
if (standby_signal_file_found)
@@ -3918,7 +3920,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
PrimarySlotName,
wal_receiver_create_temp_slot);
- flushedUpto = 0;
+ flushedUpto = InvalidXLogRecPtr;
}
/*
@@ -4096,7 +4098,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
static int
emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
{
- static XLogRecPtr lastComplaint = 0;
+ static XLogRecPtr lastComplaint = InvalidXLogRecPtr;
if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
{
diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 463c0756b5e..2d74c648335 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -78,6 +78,11 @@ typedef struct
pg_checksum_type manifest_checksum_type;
} basebackup_options;
+#define TAR_NUM_TERMINATION_BLOCKS 2
+
+StaticAssertDecl(TAR_NUM_TERMINATION_BLOCKS * TAR_BLOCK_SIZE <= BLCKSZ,
+ "BLCKSZ too small for " CppAsString2(TAR_NUM_TERMINATION_BLOCKS) " tar termination blocks");
+
static int64 sendTablespace(bbsink *sink, char *path, Oid spcoid, bool sizeonly,
struct backup_manifest_info *manifest,
IncrementalBackupInfo *ib);
@@ -382,10 +387,8 @@ perform_base_backup(basebackup_options *opt, bbsink *sink,
else
{
/* Properly terminate the tarfile. */
- StaticAssertDecl(2 * TAR_BLOCK_SIZE <= BLCKSZ,
- "BLCKSZ too small for 2 tar blocks");
- memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
- bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
+ memset(sink->bbs_buffer, 0, TAR_NUM_TERMINATION_BLOCKS * TAR_BLOCK_SIZE);
+ bbsink_archive_contents(sink, TAR_NUM_TERMINATION_BLOCKS * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
@@ -635,10 +638,8 @@ perform_base_backup(basebackup_options *opt, bbsink *sink,
}
/* Properly terminate the tar file. */
- StaticAssertStmt(2 * TAR_BLOCK_SIZE <= BLCKSZ,
- "BLCKSZ too small for 2 tar blocks");
- memset(sink->bbs_buffer, 0, 2 * TAR_BLOCK_SIZE);
- bbsink_archive_contents(sink, 2 * TAR_BLOCK_SIZE);
+ memset(sink->bbs_buffer, 0, TAR_NUM_TERMINATION_BLOCKS * TAR_BLOCK_SIZE);
+ bbsink_archive_contents(sink, TAR_NUM_TERMINATION_BLOCKS * TAR_BLOCK_SIZE);
/* OK, that's the end of the archive. */
bbsink_end_archive(sink);
diff --git a/src/backend/backup/walsummary.c b/src/backend/backup/walsummary.c
index 21164faac7e..4cd1824fbc6 100644
--- a/src/backend/backup/walsummary.c
+++ b/src/backend/backup/walsummary.c
@@ -214,7 +214,7 @@ OpenWalSummaryFile(WalSummaryFile *ws, bool missing_ok)
LSN_FORMAT_ARGS(ws->end_lsn));
file = PathNameOpenFile(path, O_RDONLY);
- if (file < 0 && (errno != EEXIST || !missing_ok))
+ if (file < 0 && (errno != ENOENT || !missing_ok))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
@@ -251,7 +251,7 @@ RemoveWalSummaryIfOlderThan(WalSummaryFile *ws, time_t cutoff_time)
if (unlink(path) != 0)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m", path)));
+ errmsg("could not remove file \"%s\": %m", path)));
ereport(DEBUG2,
(errmsg_internal("removing file \"%s\"", path)));
}
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index dd57624b4f9..e7699be55aa 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -26,6 +26,7 @@
#include "bootstrap/bootstrap.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "common/link-canary.h"
#include "miscadmin.h"
@@ -46,6 +47,7 @@
static void CheckerModeMain(void);
static void bootstrap_signals(void);
static Form_pg_attribute AllocateAttribute(void);
+static void InsertOneProargdefaultsValue(char *value);
static void populate_typ_list(void);
static Oid gettype(char *type);
static void cleanup(void);
@@ -91,38 +93,28 @@ static const struct typinfo TypInfo[] = {
F_BYTEAIN, F_BYTEAOUT},
{"char", CHAROID, 0, 1, true, TYPALIGN_CHAR, TYPSTORAGE_PLAIN, InvalidOid,
F_CHARIN, F_CHAROUT},
+ {"cstring", CSTRINGOID, 0, -2, false, TYPALIGN_CHAR, TYPSTORAGE_PLAIN, InvalidOid,
+ F_CSTRING_IN, F_CSTRING_OUT},
{"int2", INT2OID, 0, 2, true, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid,
F_INT2IN, F_INT2OUT},
{"int4", INT4OID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_INT4IN, F_INT4OUT},
+ {"int8", INT8OID, 0, 8, true, TYPALIGN_DOUBLE, TYPSTORAGE_PLAIN, InvalidOid,
+ F_INT8IN, F_INT8OUT},
{"float4", FLOAT4OID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_FLOAT4IN, F_FLOAT4OUT},
+ {"float8", FLOAT8OID, 0, 8, true, TYPALIGN_DOUBLE, TYPSTORAGE_PLAIN, InvalidOid,
+ F_FLOAT8IN, F_FLOAT8OUT},
{"name", NAMEOID, CHAROID, NAMEDATALEN, false, TYPALIGN_CHAR, TYPSTORAGE_PLAIN, C_COLLATION_OID,
F_NAMEIN, F_NAMEOUT},
- {"regclass", REGCLASSOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_REGCLASSIN, F_REGCLASSOUT},
{"regproc", REGPROCOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_REGPROCIN, F_REGPROCOUT},
- {"regtype", REGTYPEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_REGTYPEIN, F_REGTYPEOUT},
- {"regrole", REGROLEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_REGROLEIN, F_REGROLEOUT},
- {"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_REGNAMESPACEIN, F_REGNAMESPACEOUT},
- {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_REGDATABASEIN, F_REGDATABASEOUT},
{"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
F_TEXTIN, F_TEXTOUT},
+ {"jsonb", JSONBOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
+ F_JSONB_IN, F_JSONB_OUT},
{"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_OIDIN, F_OIDOUT},
- {"oid8", OID8OID, 0, 8, true, TYPALIGN_DOUBLE, TYPSTORAGE_PLAIN, InvalidOid,
- F_OID8IN, F_OID8OUT},
- {"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid,
- F_TIDIN, F_TIDOUT},
- {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_XIDIN, F_XIDOUT},
- {"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
- F_CIDIN, F_CIDOUT},
{"pg_node_tree", PG_NODE_TREEOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
F_PG_NODE_TREE_IN, F_PG_NODE_TREE_OUT},
{"int2vector", INT2VECTOROID, INT2OID, -1, false, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
@@ -131,13 +123,13 @@ static const struct typinfo TypInfo[] = {
F_OIDVECTORIN, F_OIDVECTOROUT},
{"_int4", INT4ARRAYOID, INT4OID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
F_ARRAY_IN, F_ARRAY_OUT},
- {"_text", 1009, TEXTOID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
+ {"_text", TEXTARRAYOID, TEXTOID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
F_ARRAY_IN, F_ARRAY_OUT},
- {"_oid", 1028, OIDOID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
+ {"_oid", OIDARRAYOID, OIDOID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
F_ARRAY_IN, F_ARRAY_OUT},
- {"_char", 1002, CHAROID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
+ {"_char", CHARARRAYOID, CHAROID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
F_ARRAY_IN, F_ARRAY_OUT},
- {"_aclitem", 1034, ACLITEMOID, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, InvalidOid,
+ {"_aclitem", ACLITEMARRAYOID, ACLITEMOID, -1, false, TYPALIGN_DOUBLE, TYPSTORAGE_EXTENDED, InvalidOid,
F_ARRAY_IN, F_ARRAY_OUT}
};
@@ -242,7 +234,7 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("--%s must be first argument", optarg)));
- /* FALLTHROUGH */
+ pg_fallthrough;
case 'c':
{
char *name,
@@ -660,6 +652,7 @@ InsertOneTuple(void)
void
InsertOneValue(char *value, int i)
{
+ Form_pg_attribute attr;
Oid typoid;
int16 typlen;
bool typbyval;
@@ -668,19 +661,42 @@ InsertOneValue(char *value, int i)
Oid typioparam;
Oid typinput;
Oid typoutput;
+ Oid typcollation;
Assert(i >= 0 && i < MAXATTR);
elog(DEBUG4, "inserting column %d value \"%s\"", i, value);
- typoid = TupleDescAttr(boot_reldesc->rd_att, i)->atttypid;
+ attr = TupleDescAttr(RelationGetDescr(boot_reldesc), i);
+ typoid = attr->atttypid;
boot_get_type_io_data(typoid,
&typlen, &typbyval, &typalign,
&typdelim, &typioparam,
- &typinput, &typoutput);
+ &typinput, &typoutput,
+ &typcollation);
- values[i] = OidInputFunctionCall(typinput, value, typioparam, -1);
+ /*
+ * pg_node_tree values can't be inserted normally (pg_node_tree_in would
+ * just error out), so provide special cases for such columns that we
+ * would like to fill during bootstrap.
+ */
+ if (typoid == PG_NODE_TREEOID)
+ {
+ /* pg_proc.proargdefaults */
+ if (RelationGetRelid(boot_reldesc) == ProcedureRelationId &&
+ i == Anum_pg_proc_proargdefaults - 1)
+ InsertOneProargdefaultsValue(value);
+ else /* maybe other cases later */
+ elog(ERROR, "can't handle pg_node_tree input for %s.%s",
+ RelationGetRelationName(boot_reldesc),
+ NameStr(attr->attname));
+ }
+ else
+ {
+ /* Normal case */
+ values[i] = OidInputFunctionCall(typinput, value, typioparam, -1);
+ }
/*
* We use ereport not elog here so that parameters aren't evaluated unless
@@ -691,6 +707,111 @@ InsertOneValue(char *value, int i)
OidOutputFunctionCall(typoutput, values[i]))));
}
+/* ----------------
+ * InsertOneProargdefaultsValue
+ *
+ * In general, proargdefaults can be a list of any expressions, but
+ * for bootstrap we only support a list of Const nodes. The input
+ * has the form of a text array, and we feed non-null elements to the
+ * typinput functions for the appropriate parameters.
+ * ----------------
+ */
+static void
+InsertOneProargdefaultsValue(char *value)
+{
+ int pronargs;
+ oidvector *proargtypes;
+ Datum arrayval;
+ Datum *array_datums;
+ bool *array_nulls;
+ int array_count;
+ List *proargdefaults;
+ char *nodestring;
+
+ /* The pg_proc columns we need to use must have been filled already */
+ StaticAssertDecl(Anum_pg_proc_pronargs < Anum_pg_proc_proargdefaults,
+ "pronargs must come before proargdefaults");
+ StaticAssertDecl(Anum_pg_proc_pronargdefaults < Anum_pg_proc_proargdefaults,
+ "pronargdefaults must come before proargdefaults");
+ StaticAssertDecl(Anum_pg_proc_proargtypes < Anum_pg_proc_proargdefaults,
+ "proargtypes must come before proargdefaults");
+ if (Nulls[Anum_pg_proc_pronargs - 1])
+ elog(ERROR, "pronargs must not be null");
+ if (Nulls[Anum_pg_proc_proargtypes - 1])
+ elog(ERROR, "proargtypes must not be null");
+ pronargs = DatumGetInt16(values[Anum_pg_proc_pronargs - 1]);
+ proargtypes = DatumGetPointer(values[Anum_pg_proc_proargtypes - 1]);
+ Assert(pronargs == proargtypes->dim1);
+
+ /* Parse the input string as an array value, then deconstruct to Datums */
+ arrayval = OidFunctionCall3(F_ARRAY_IN,
+ CStringGetDatum(value),
+ ObjectIdGetDatum(CSTRINGOID),
+ Int32GetDatum(-1));
+ deconstruct_array_builtin(DatumGetArrayTypeP(arrayval), CSTRINGOID,
+ &array_datums, &array_nulls, &array_count);
+
+ /* The values should correspond to the last N argtypes */
+ if (array_count > pronargs)
+ elog(ERROR, "too many proargdefaults entries");
+
+ /* Build the List of Const nodes */
+ proargdefaults = NIL;
+ for (int i = 0; i < array_count; i++)
+ {
+ Oid argtype = proargtypes->values[pronargs - array_count + i];
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ char typdelim;
+ Oid typioparam;
+ Oid typinput;
+ Oid typoutput;
+ Oid typcollation;
+ Datum defval;
+ bool defnull;
+ Const *defConst;
+
+ boot_get_type_io_data(argtype,
+ &typlen, &typbyval, &typalign,
+ &typdelim, &typioparam,
+ &typinput, &typoutput,
+ &typcollation);
+
+ defnull = array_nulls[i];
+ if (defnull)
+ defval = (Datum) 0;
+ else
+ defval = OidInputFunctionCall(typinput,
+ DatumGetCString(array_datums[i]),
+ typioparam, -1);
+
+ defConst = makeConst(argtype,
+ -1, /* never any typmod */
+ typcollation,
+ typlen,
+ defval,
+ defnull,
+ typbyval);
+ proargdefaults = lappend(proargdefaults, defConst);
+ }
+
+ /*
+ * Flatten the List to a node-tree string, then convert to a text datum,
+ * which is the storage representation of pg_node_tree.
+ */
+ nodestring = nodeToString(proargdefaults);
+ values[Anum_pg_proc_proargdefaults - 1] = CStringGetTextDatum(nodestring);
+ Nulls[Anum_pg_proc_proargdefaults - 1] = false;
+
+ /*
+ * Hack: fill in pronargdefaults with the right value. This is surely
+ * ugly, but it beats making the programmer do it.
+ */
+ values[Anum_pg_proc_pronargdefaults - 1] = Int16GetDatum(array_count);
+ Nulls[Anum_pg_proc_pronargdefaults - 1] = false;
+}
+
/* ----------------
* InsertOneNull
* ----------------
@@ -831,10 +952,11 @@ gettype(char *type)
* boot_get_type_io_data
*
* Obtain type I/O information at bootstrap time. This intentionally has
- * almost the same API as lsyscache.c's get_type_io_data, except that
+ * an API very close to that of lsyscache.c's get_type_io_data, except that
* we only support obtaining the typinput and typoutput routines, not
- * the binary I/O routines. It is exported so that array_in and array_out
- * can be made to work during early bootstrap.
+ * the binary I/O routines, and we also return the type's collation.
+ * This is exported so that array_in and array_out can be made to work
+ * during early bootstrap.
* ----------------
*/
void
@@ -845,7 +967,8 @@ boot_get_type_io_data(Oid typid,
char *typdelim,
Oid *typioparam,
Oid *typinput,
- Oid *typoutput)
+ Oid *typoutput,
+ Oid *typcollation)
{
if (Typ != NIL)
{
@@ -876,6 +999,8 @@ boot_get_type_io_data(Oid typid,
*typinput = ap->am_typ.typinput;
*typoutput = ap->am_typ.typoutput;
+
+ *typcollation = ap->am_typ.typcollation;
}
else
{
@@ -904,6 +1029,8 @@ boot_get_type_io_data(Oid typid,
*typinput = TypInfo[typeindex].inproc;
*typoutput = TypInfo[typeindex].outproc;
+
+ *typcollation = TypInfo[typeindex].collation;
}
}
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index a431fc0926f..aef855abccc 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -2115,7 +2115,7 @@ static void
ExecGrant_common(InternalGrant *istmt, Oid classid, AclMode default_privs,
void (*object_check) (InternalGrant *istmt, HeapTuple tuple))
{
- int cacheid;
+ SysCacheIdentifier cacheid;
Relation relation;
ListCell *cell;
@@ -3058,7 +3058,7 @@ object_aclmask_ext(Oid classid, Oid objectid, Oid roleid,
AclMode mask, AclMaskHow how,
bool *is_missing)
{
- int cacheid;
+ SysCacheIdentifier cacheid;
AclMode result;
HeapTuple tuple;
Datum aclDatum;
@@ -4089,7 +4089,7 @@ pg_largeobject_aclcheck_snapshot(Oid lobj_oid, Oid roleid, AclMode mode,
bool
object_ownercheck(Oid classid, Oid objectid, Oid roleid)
{
- int cacheid;
+ SysCacheIdentifier cacheid;
Oid ownerId;
/* Superusers bypass all permission checking. */
@@ -4101,7 +4101,7 @@ object_ownercheck(Oid classid, Oid objectid, Oid roleid)
classid = LargeObjectMetadataRelationId;
cacheid = get_object_catcache_oid(classid);
- if (cacheid != -1)
+ if (cacheid != SYSCACHEID_INVALID)
{
/* we can get the object's tuple from the syscache */
HeapTuple tuple;
@@ -4486,7 +4486,7 @@ recordExtObjInitPriv(Oid objoid, Oid classoid)
/* This will error on unsupported classoid. */
else if (get_object_attnum_acl(classoid) != InvalidAttrNumber)
{
- int cacheid;
+ SysCacheIdentifier cacheid;
Datum aclDatum;
bool isNull;
HeapTuple tuple;
@@ -4870,7 +4870,7 @@ RemoveRoleFromInitPriv(Oid roleid, Oid classid, Oid objid, int32 objsubid)
ScanKeyData key[3];
SysScanDesc scan;
HeapTuple oldtuple;
- int cacheid;
+ SysCacheIdentifier cacheid;
HeapTuple objtuple;
Oid ownerId;
Datum oldAclDatum;
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index f89267f0342..570c434ede8 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -641,7 +641,7 @@ findDependentObjects(const ObjectAddress *object,
break;
/* Otherwise, treat this like an internal dependency */
- /* FALL THRU */
+ pg_fallthrough;
case DEPENDENCY_INTERNAL:
@@ -1238,7 +1238,7 @@ reportDependentObjects(const ObjectAddresses *targetObjects,
static void
DropObjectById(const ObjectAddress *object)
{
- int cacheId;
+ SysCacheIdentifier cacheId;
Relation rel;
HeapTuple tup;
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index b2c1b1c5733..48c6805f752 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -795,9 +795,9 @@
# Now generate syscache info
print_boilerplate($syscache_ids_fh, "syscache_ids.h", "SysCache identifiers");
-print $syscache_ids_fh "enum SysCacheIdentifier
+print $syscache_ids_fh "typedef enum SysCacheIdentifier
{
-";
+\tSYSCACHEID_INVALID = -1,\n";
print_boilerplate($syscache_info_fh, "syscache_info.h",
"SysCache definitions");
@@ -812,7 +812,14 @@
my $last_syscache;
foreach my $syscache (sort keys %syscaches)
{
- print $syscache_ids_fh "\t$syscache,\n";
+ if (not defined $last_syscache)
+ {
+ print $syscache_ids_fh "\t$syscache = 0,\n";
+ }
+ else
+ {
+ print $syscache_ids_fh "\t$syscache,\n";
+ }
$last_syscache = $syscache;
print $syscache_info_fh "\t[$syscache] = {\n";
@@ -825,7 +832,7 @@
print $syscache_info_fh "\t},\n";
}
-print $syscache_ids_fh "};\n";
+print $syscache_ids_fh "} SysCacheIdentifier;\n";
print $syscache_ids_fh "#define SysCacheSize ($last_syscache + 1)\n";
print $syscache_info_fh "};\n";
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 606434823cf..a6ed9849e77 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -2635,6 +2635,7 @@ AddRelationNewConstraints(Relation rel,
* requested validity.
*/
if (AdjustNotNullInheritance(RelationGetRelid(rel), colnum,
+ cdef->conname,
is_local, cdef->is_no_inherit,
cdef->skip_validation))
continue;
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index c3b79a2ba48..4b0f4ba115d 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -229,7 +229,8 @@ static void AccessTempTableNamespace(bool force);
static void InitTempTableNamespace(void);
static void RemoveTempRelations(Oid tempNamespaceId);
static void RemoveTempRelationsCallback(int code, Datum arg);
-static void InvalidationCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void InvalidationCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames,
bool include_out_arguments, int pronargs,
int **argnumbers, int *fgc_flags);
@@ -4863,7 +4864,7 @@ InitializeSearchPath(void)
* Syscache inval callback function
*/
static void
-InvalidationCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidationCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
/*
* Force search path to be recomputed on next use, also invalidating the
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index 02af64b82c6..d32aaff2821 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -99,10 +99,11 @@ typedef struct
* error messages */
Oid class_oid; /* oid of catalog */
Oid oid_index_oid; /* oid of index on system oid column */
- int oid_catcache_id; /* id of catcache on system oid column */
- int name_catcache_id; /* id of catcache on (name,namespace), or
- * (name) if the object does not live in a
- * namespace */
+ SysCacheIdentifier oid_catcache_id; /* id of catcache on system oid column */
+ SysCacheIdentifier name_catcache_id; /* id of catcache on
+ * (name,namespace), or (name) if
+ * the object does not live in a
+ * namespace */
AttrNumber attnum_oid; /* attribute number of oid column */
AttrNumber attnum_name; /* attnum of name field */
AttrNumber attnum_namespace; /* attnum of namespace field */
@@ -135,8 +136,8 @@ static const ObjectPropertyType ObjectProperty[] =
"access method operator",
AccessMethodOperatorRelationId,
AccessMethodOperatorOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_amop_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -149,8 +150,8 @@ static const ObjectPropertyType ObjectProperty[] =
"access method procedure",
AccessMethodProcedureRelationId,
AccessMethodProcedureOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_amproc_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -163,8 +164,8 @@ static const ObjectPropertyType ObjectProperty[] =
"cast",
CastRelationId,
CastOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_cast_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -178,7 +179,7 @@ static const ObjectPropertyType ObjectProperty[] =
CollationRelationId,
CollationOidIndexId,
COLLOID,
- -1, /* COLLNAMEENCNSP also takes encoding */
+ SYSCACHEID_INVALID, /* COLLNAMEENCNSP also takes encoding */
Anum_pg_collation_oid,
Anum_pg_collation_collname,
Anum_pg_collation_collnamespace,
@@ -192,7 +193,7 @@ static const ObjectPropertyType ObjectProperty[] =
ConstraintRelationId,
ConstraintOidIndexId,
CONSTROID,
- -1,
+ SYSCACHEID_INVALID,
Anum_pg_constraint_oid,
Anum_pg_constraint_conname,
Anum_pg_constraint_connamespace,
@@ -220,7 +221,7 @@ static const ObjectPropertyType ObjectProperty[] =
DatabaseRelationId,
DatabaseOidIndexId,
DATABASEOID,
- -1,
+ SYSCACHEID_INVALID,
Anum_pg_database_oid,
Anum_pg_database_datname,
InvalidAttrNumber,
@@ -233,8 +234,8 @@ static const ObjectPropertyType ObjectProperty[] =
"default ACL",
DefaultAclRelationId,
DefaultAclOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_default_acl_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -247,8 +248,8 @@ static const ObjectPropertyType ObjectProperty[] =
"extension",
ExtensionRelationId,
ExtensionOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_extension_oid,
Anum_pg_extension_extname,
InvalidAttrNumber, /* extension doesn't belong to extnamespace */
@@ -290,7 +291,7 @@ static const ObjectPropertyType ObjectProperty[] =
ProcedureRelationId,
ProcedureOidIndexId,
PROCOID,
- -1, /* PROCNAMEARGSNSP also takes argument types */
+ SYSCACHEID_INVALID, /* PROCNAMEARGSNSP also takes argument types */
Anum_pg_proc_oid,
Anum_pg_proc_proname,
Anum_pg_proc_pronamespace,
@@ -317,8 +318,8 @@ static const ObjectPropertyType ObjectProperty[] =
"large object metadata",
LargeObjectMetadataRelationId,
LargeObjectMetadataOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_largeobject_metadata_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -332,7 +333,7 @@ static const ObjectPropertyType ObjectProperty[] =
OperatorClassRelationId,
OpclassOidIndexId,
CLAOID,
- -1, /* CLAAMNAMENSP also takes opcmethod */
+ SYSCACHEID_INVALID, /* CLAAMNAMENSP also takes opcmethod */
Anum_pg_opclass_oid,
Anum_pg_opclass_opcname,
Anum_pg_opclass_opcnamespace,
@@ -346,7 +347,7 @@ static const ObjectPropertyType ObjectProperty[] =
OperatorRelationId,
OperatorOidIndexId,
OPEROID,
- -1, /* OPERNAMENSP also takes left and right type */
+ SYSCACHEID_INVALID, /* OPERNAMENSP also takes left and right type */
Anum_pg_operator_oid,
Anum_pg_operator_oprname,
Anum_pg_operator_oprnamespace,
@@ -360,7 +361,7 @@ static const ObjectPropertyType ObjectProperty[] =
OperatorFamilyRelationId,
OpfamilyOidIndexId,
OPFAMILYOID,
- -1, /* OPFAMILYAMNAMENSP also takes opfmethod */
+ SYSCACHEID_INVALID, /* OPFAMILYAMNAMENSP also takes opfmethod */
Anum_pg_opfamily_oid,
Anum_pg_opfamily_opfname,
Anum_pg_opfamily_opfnamespace,
@@ -387,8 +388,8 @@ static const ObjectPropertyType ObjectProperty[] =
"role membership",
AuthMemRelationId,
AuthMemOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_auth_members_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -401,8 +402,8 @@ static const ObjectPropertyType ObjectProperty[] =
"rule",
RewriteRelationId,
RewriteOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_rewrite_oid,
Anum_pg_rewrite_rulename,
InvalidAttrNumber,
@@ -444,7 +445,7 @@ static const ObjectPropertyType ObjectProperty[] =
TableSpaceRelationId,
TablespaceOidIndexId,
TABLESPACEOID,
- -1,
+ SYSCACHEID_INVALID,
Anum_pg_tablespace_oid,
Anum_pg_tablespace_spcname,
InvalidAttrNumber,
@@ -458,7 +459,7 @@ static const ObjectPropertyType ObjectProperty[] =
TransformRelationId,
TransformOidIndexId,
TRFOID,
- -1,
+ SYSCACHEID_INVALID,
Anum_pg_transform_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -471,8 +472,8 @@ static const ObjectPropertyType ObjectProperty[] =
"trigger",
TriggerRelationId,
TriggerOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_trigger_oid,
Anum_pg_trigger_tgname,
InvalidAttrNumber,
@@ -485,8 +486,8 @@ static const ObjectPropertyType ObjectProperty[] =
"policy",
PolicyRelationId,
PolicyOidIndexId,
- -1,
- -1,
+ SYSCACHEID_INVALID,
+ SYSCACHEID_INVALID,
Anum_pg_policy_oid,
Anum_pg_policy_polname,
InvalidAttrNumber,
@@ -626,7 +627,7 @@ static const ObjectPropertyType ObjectProperty[] =
UserMappingRelationId,
UserMappingOidIndexId,
USERMAPPINGOID,
- -1,
+ SYSCACHEID_INVALID,
Anum_pg_user_mapping_oid,
InvalidAttrNumber,
InvalidAttrNumber,
@@ -2231,7 +2232,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("name list length must be exactly %d", 1)));
/* fall through to check args length */
- /* FALLTHROUGH */
+ pg_fallthrough;
case OBJECT_DOMCONSTRAINT:
case OBJECT_CAST:
case OBJECT_PUBLICATION_REL:
@@ -2256,7 +2257,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("name list length must be at least %d", 3)));
/* fall through to check args length */
- /* FALLTHROUGH */
+ pg_fallthrough;
case OBJECT_OPERATOR:
if (list_length(args) != 2)
ereport(ERROR,
@@ -2571,7 +2572,7 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
Oid
get_object_namespace(const ObjectAddress *address)
{
- int cache;
+ SysCacheIdentifier cache;
HeapTuple tuple;
Oid oid;
const ObjectPropertyType *property;
@@ -2583,7 +2584,7 @@ get_object_namespace(const ObjectAddress *address)
/* Currently, we can only handle object types with system caches. */
cache = property->oid_catcache_id;
- Assert(cache != -1);
+ Assert(cache != SYSCACHEID_INVALID);
/* Fetch tuple from syscache and extract namespace attribute. */
tuple = SearchSysCache1(cache, ObjectIdGetDatum(address->objectId));
@@ -2640,7 +2641,7 @@ get_object_oid_index(Oid class_id)
return prop->oid_index_oid;
}
-int
+SysCacheIdentifier
get_object_catcache_oid(Oid class_id)
{
const ObjectPropertyType *prop = get_object_property_data(class_id);
@@ -2648,7 +2649,7 @@ get_object_catcache_oid(Oid class_id)
return prop->oid_catcache_id;
}
-int
+SysCacheIdentifier
get_object_catcache_name(Oid class_id)
{
const ObjectPropertyType *prop = get_object_property_data(class_id);
@@ -2806,9 +2807,9 @@ get_catalog_object_by_oid_extended(Relation catalog,
{
HeapTuple tuple;
Oid classId = RelationGetRelid(catalog);
- int oidCacheId = get_object_catcache_oid(classId);
+ SysCacheIdentifier oidCacheId = get_object_catcache_oid(classId);
- if (oidCacheId > 0)
+ if (oidCacheId >= 0)
{
if (locktup)
tuple = SearchSysCacheLockedCopy1(oidCacheId,
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index cbbcf166e45..b12765ae691 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -731,14 +731,15 @@ extractNotNullColumn(HeapTuple constrTup)
* If a constraint exists but the connoinherit flag is not what the caller
* wants, throw an error about the incompatibility. If the desired
* constraint is valid but the existing constraint is not valid, also
- * throw an error about that (the opposite case is acceptable).
+ * throw an error about that (the opposite case is acceptable). If
+ * the proposed constraint has a different name, also throw an error.
*
* If everything checks out, we adjust conislocal/coninhcount and return
* true. If is_local is true we flip conislocal true, or do nothing if
* it's already true; otherwise we increment coninhcount by 1.
*/
bool
-AdjustNotNullInheritance(Oid relid, AttrNumber attnum,
+AdjustNotNullInheritance(Oid relid, AttrNumber attnum, const char *new_conname,
bool is_local, bool is_no_inherit, bool is_notvalid)
{
HeapTuple tup;
@@ -777,6 +778,22 @@ AdjustNotNullInheritance(Oid relid, AttrNumber attnum,
errhint("You might need to validate it using %s.",
"ALTER TABLE ... VALIDATE CONSTRAINT"));
+ /*
+ * If, for a new constraint that is being defined locally (i.e., not
+ * being passed down via inheritance), a name was specified, then
+ * verify that the existing constraint has the same name. Otherwise
+ * throw an error. Names of inherited constraints are ignored because
+ * they are not directly user-specified, so matching is not important.
+ */
+ if (is_local && new_conname &&
+ strcmp(new_conname, NameStr(conform->conname)) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot create not-null constraint \"%s\" on column \"%s\" of table \"%s\"",
+ new_conname, get_attname(relid, attnum, false), get_rel_name(relid)),
+ errdetail("A not-null constraint named \"%s\" already exists for this column.",
+ NameStr(conform->conname)));
+
if (!is_local)
{
if (pg_add_s16_overflow(conform->coninhcount, 1,
diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c
index 55309d16f15..07c2d41c189 100644
--- a/src/backend/catalog/pg_depend.c
+++ b/src/backend/catalog/pg_depend.c
@@ -23,12 +23,14 @@
#include "catalog/pg_constraint.h"
#include "catalog/pg_depend.h"
#include "catalog/pg_extension.h"
+#include "catalog/pg_type.h"
#include "catalog/partition.h"
#include "commands/extension.h"
#include "miscadmin.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/syscache.h"
static bool isObjectPinned(const ObjectAddress *object);
@@ -813,6 +815,77 @@ getAutoExtensionsOfObject(Oid classId, Oid objectId)
return result;
}
+/*
+ * Look up a type belonging to an extension.
+ *
+ * Returns the type's OID, or InvalidOid if not found.
+ *
+ * Notice that the type is specified by name only, without a schema.
+ * That's because this will typically be used by relocatable extensions
+ * which can't make a-priori assumptions about which schema their objects
+ * are in. As long as the extension only defines one type of this name,
+ * the answer is unique anyway.
+ *
+ * We might later add the ability to look up functions, operators, etc.
+ */
+Oid
+getExtensionType(Oid extensionOid, const char *typname)
+{
+ Oid result = InvalidOid;
+ Relation depRel;
+ ScanKeyData key[3];
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(ExtensionRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(extensionOid));
+ ScanKeyInit(&key[2],
+ Anum_pg_depend_refobjsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(0));
+
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 3, key);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup);
+
+ if (depform->classid == TypeRelationId &&
+ depform->deptype == DEPENDENCY_EXTENSION)
+ {
+ Oid typoid = depform->objid;
+ HeapTuple typtup;
+
+ typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typoid));
+ if (!HeapTupleIsValid(typtup))
+ continue; /* should we throw an error? */
+ if (strcmp(NameStr(((Form_pg_type) GETSTRUCT(typtup))->typname),
+ typname) == 0)
+ {
+ result = typoid;
+ ReleaseSysCache(typtup);
+ break; /* no need to keep searching */
+ }
+ ReleaseSysCache(typtup);
+ }
+ }
+
+ systable_endscan(scan);
+
+ table_close(depRel, AccessShareLock);
+
+ return result;
+}
+
/*
* Detect whether a sequence is marked as "owned" by a column
*
diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c
index acff7a0096d..5df4b3f7a91 100644
--- a/src/backend/catalog/pg_proc.c
+++ b/src/backend/catalog/pg_proc.c
@@ -1206,7 +1206,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal,
if (cursorpos > 0)
newcp++;
}
- chlen = pg_mblen(prosrc);
+ chlen = pg_mblen_cstr(prosrc);
if (strncmp(prosrc, literal, chlen) != 0)
goto fail;
prosrc += chlen;
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c
index 3db41ecd74f..c9998531b2f 100644
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -1458,7 +1458,7 @@ shdepDropOwned(List *roleids, DropBehavior behavior)
sdepForm->objid);
break;
}
- /* FALLTHROUGH */
+ pg_fallthrough;
case SHARED_DEPENDENCY_OWNER:
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
index 2b103245290..acf42b853ed 100644
--- a/src/backend/catalog/pg_subscription.c
+++ b/src/backend/catalog/pg_subscription.c
@@ -129,6 +129,12 @@ GetSubscription(Oid subid, bool missing_ok)
Anum_pg_subscription_subsynccommit);
sub->synccommit = TextDatumGetCString(datum);
+ /* Get walrcvtimeout */
+ datum = SysCacheGetAttrNotNull(SUBSCRIPTIONOID,
+ tup,
+ Anum_pg_subscription_subwalrcvtimeout);
+ sub->walrcvtimeout = TextDatumGetCString(datum);
+
/* Get publications */
datum = SysCacheGetAttrNotNull(SUBSCRIPTIONOID,
tup,
diff --git a/src/backend/catalog/system_functions.sql b/src/backend/catalog/system_functions.sql
index eb9e31ae1bf..69699f8830a 100644
--- a/src/backend/catalog/system_functions.sql
+++ b/src/backend/catalog/system_functions.sql
@@ -7,7 +7,8 @@
*
* This file redefines certain built-in functions that are impractical
* to fully define in pg_proc.dat. In most cases that's because they use
- * SQL-standard function bodies and/or default expressions. The node
+ * SQL-standard function bodies and/or default expressions. (But defaults
+ * that are just constants can be entered in pg_proc.dat.) The node
* tree representations of those are too unreadable, platform-dependent,
* and changeable to want to deal with them manually. Hence, we put stub
* definitions of such functions into pg_proc.dat and then replace them
@@ -66,13 +67,6 @@ CREATE OR REPLACE FUNCTION bit_length(text)
IMMUTABLE PARALLEL SAFE STRICT COST 1
RETURN octet_length($1) * 8;
-CREATE OR REPLACE FUNCTION
- random_normal(mean float8 DEFAULT 0, stddev float8 DEFAULT 1)
- RETURNS float8
- LANGUAGE internal
- VOLATILE PARALLEL RESTRICTED STRICT COST 1
-AS 'drandom_normal';
-
CREATE OR REPLACE FUNCTION log(numeric)
RETURNS numeric
LANGUAGE sql
@@ -383,279 +377,6 @@ BEGIN ATOMIC
SELECT * FROM ts_debug(get_current_ts_config(), $1);
END;
-CREATE OR REPLACE FUNCTION
- pg_backup_start(label text, fast boolean DEFAULT false)
- RETURNS pg_lsn STRICT VOLATILE LANGUAGE internal AS 'pg_backup_start'
- PARALLEL RESTRICTED;
-
-CREATE OR REPLACE FUNCTION pg_backup_stop (
- wait_for_archive boolean DEFAULT true, OUT lsn pg_lsn,
- OUT labelfile text, OUT spcmapfile text)
- RETURNS record STRICT VOLATILE LANGUAGE internal as 'pg_backup_stop'
- PARALLEL RESTRICTED;
-
-CREATE OR REPLACE FUNCTION
- pg_promote(wait boolean DEFAULT true, wait_seconds integer DEFAULT 60)
- RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
- PARALLEL SAFE;
-
-CREATE OR REPLACE FUNCTION
- pg_terminate_backend(pid integer, timeout int8 DEFAULT 0)
- RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend'
- PARALLEL SAFE;
-
--- legacy definition for compatibility with 9.3
-CREATE OR REPLACE FUNCTION
- json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
- RETURNS anyelement LANGUAGE internal STABLE AS 'json_populate_record' PARALLEL SAFE;
-
--- legacy definition for compatibility with 9.3
-CREATE OR REPLACE FUNCTION
- json_populate_recordset(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
- RETURNS SETOF anyelement LANGUAGE internal STABLE ROWS 100 AS 'json_populate_recordset' PARALLEL SAFE;
-
-CREATE OR REPLACE FUNCTION pg_logical_slot_get_changes(
- IN slot_name name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
- OUT lsn pg_lsn, OUT xid xid, OUT data text)
-RETURNS SETOF RECORD
-LANGUAGE INTERNAL
-VOLATILE ROWS 1000 COST 1000
-AS 'pg_logical_slot_get_changes';
-
-CREATE OR REPLACE FUNCTION pg_logical_slot_peek_changes(
- IN slot_name name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
- OUT lsn pg_lsn, OUT xid xid, OUT data text)
-RETURNS SETOF RECORD
-LANGUAGE INTERNAL
-VOLATILE ROWS 1000 COST 1000
-AS 'pg_logical_slot_peek_changes';
-
-CREATE OR REPLACE FUNCTION pg_logical_slot_get_binary_changes(
- IN slot_name name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
- OUT lsn pg_lsn, OUT xid xid, OUT data bytea)
-RETURNS SETOF RECORD
-LANGUAGE INTERNAL
-VOLATILE ROWS 1000 COST 1000
-AS 'pg_logical_slot_get_binary_changes';
-
-CREATE OR REPLACE FUNCTION pg_logical_slot_peek_binary_changes(
- IN slot_name name, IN upto_lsn pg_lsn, IN upto_nchanges int, VARIADIC options text[] DEFAULT '{}',
- OUT lsn pg_lsn, OUT xid xid, OUT data bytea)
-RETURNS SETOF RECORD
-LANGUAGE INTERNAL
-VOLATILE ROWS 1000 COST 1000
-AS 'pg_logical_slot_peek_binary_changes';
-
-CREATE OR REPLACE FUNCTION pg_logical_emit_message(
- transactional boolean,
- prefix text,
- message text,
- flush boolean DEFAULT false)
-RETURNS pg_lsn
-LANGUAGE INTERNAL
-STRICT VOLATILE
-AS 'pg_logical_emit_message_text';
-
-CREATE OR REPLACE FUNCTION pg_logical_emit_message(
- transactional boolean,
- prefix text,
- message bytea,
- flush boolean DEFAULT false)
-RETURNS pg_lsn
-LANGUAGE INTERNAL
-STRICT VOLATILE
-AS 'pg_logical_emit_message_bytea';
-
-CREATE OR REPLACE FUNCTION pg_create_physical_replication_slot(
- IN slot_name name, IN immediately_reserve boolean DEFAULT false,
- IN temporary boolean DEFAULT false,
- OUT slot_name name, OUT lsn pg_lsn)
-RETURNS RECORD
-LANGUAGE INTERNAL
-STRICT VOLATILE
-AS 'pg_create_physical_replication_slot';
-
-CREATE OR REPLACE FUNCTION pg_create_logical_replication_slot(
- IN slot_name name, IN plugin name,
- IN temporary boolean DEFAULT false,
- IN twophase boolean DEFAULT false,
- IN failover boolean DEFAULT false,
- OUT slot_name name, OUT lsn pg_lsn)
-RETURNS RECORD
-LANGUAGE INTERNAL
-STRICT VOLATILE
-AS 'pg_create_logical_replication_slot';
-
-CREATE OR REPLACE FUNCTION
- make_interval(years int4 DEFAULT 0, months int4 DEFAULT 0, weeks int4 DEFAULT 0,
- days int4 DEFAULT 0, hours int4 DEFAULT 0, mins int4 DEFAULT 0,
- secs double precision DEFAULT 0.0)
-RETURNS interval
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'make_interval';
-
-CREATE OR REPLACE FUNCTION
- jsonb_set(jsonb_in jsonb, path text[] , replacement jsonb,
- create_if_missing boolean DEFAULT true)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_set';
-
-CREATE OR REPLACE FUNCTION
- jsonb_set_lax(jsonb_in jsonb, path text[] , replacement jsonb,
- create_if_missing boolean DEFAULT true,
- null_value_treatment text DEFAULT 'use_json_null')
-RETURNS jsonb
-LANGUAGE INTERNAL
-CALLED ON NULL INPUT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_set_lax';
-
-CREATE OR REPLACE FUNCTION
- parse_ident(str text, strict boolean DEFAULT true)
-RETURNS text[]
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'parse_ident';
-
-CREATE OR REPLACE FUNCTION
- jsonb_insert(jsonb_in jsonb, path text[] , replacement jsonb,
- insert_after boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_insert';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_exists(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS boolean
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_path_exists';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_match(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS boolean
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_path_match';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS SETOF jsonb
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_path_query';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query_array(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_path_query_array';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query_first(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'jsonb_path_query_first';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_exists_tz(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS boolean
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_path_exists_tz';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_match_tz(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS boolean
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_path_match_tz';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query_tz(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS SETOF jsonb
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_path_query_tz';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query_array_tz(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_path_query_array_tz';
-
-CREATE OR REPLACE FUNCTION
- jsonb_path_query_first_tz(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
- silent boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_path_query_first_tz';
-
-CREATE OR REPLACE FUNCTION
- jsonb_strip_nulls(target jsonb, strip_in_arrays boolean DEFAULT false)
-RETURNS jsonb
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'jsonb_strip_nulls';
-
-CREATE OR REPLACE FUNCTION
- json_strip_nulls(target json, strip_in_arrays boolean DEFAULT false)
-RETURNS json
-LANGUAGE INTERNAL
-STRICT STABLE PARALLEL SAFE
-AS 'json_strip_nulls';
-
--- default normalization form is NFC, per SQL standard
-CREATE OR REPLACE FUNCTION
- "normalize"(text, text DEFAULT 'NFC')
-RETURNS text
-LANGUAGE internal
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'unicode_normalize_func';
-
-CREATE OR REPLACE FUNCTION
- is_normalized(text, text DEFAULT 'NFC')
-RETURNS boolean
-LANGUAGE internal
-STRICT IMMUTABLE PARALLEL SAFE
-AS 'unicode_is_normalized';
-
-CREATE OR REPLACE FUNCTION
- pg_stat_reset_shared(target text DEFAULT NULL)
-RETURNS void
-LANGUAGE INTERNAL
-CALLED ON NULL INPUT VOLATILE PARALLEL SAFE
-AS 'pg_stat_reset_shared';
-
-CREATE OR REPLACE FUNCTION
- pg_stat_reset_slru(target text DEFAULT NULL)
-RETURNS void
-LANGUAGE INTERNAL
-CALLED ON NULL INPUT VOLATILE PARALLEL SAFE
-AS 'pg_stat_reset_slru';
-
-CREATE OR REPLACE FUNCTION
- pg_replication_origin_session_setup(node_name text, pid integer DEFAULT 0)
-RETURNS void
-LANGUAGE INTERNAL
-STRICT VOLATILE PARALLEL UNSAFE
-AS 'pg_replication_origin_session_setup';
--
-- The default permissions for functions mean that anyone can execute them.
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7553f31fef0..1ea8f1faa9e 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -363,7 +363,28 @@ CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS
WHEN (stat.a).stakind3 = 5 THEN (stat.a).stanumbers3
WHEN (stat.a).stakind4 = 5 THEN (stat.a).stanumbers4
WHEN (stat.a).stakind5 = 5 THEN (stat.a).stanumbers5
- END) AS elem_count_histogram
+ END) AS elem_count_histogram,
+ (CASE
+ WHEN (stat.a).stakind1 = 6 THEN (stat.a).stavalues1
+ WHEN (stat.a).stakind2 = 6 THEN (stat.a).stavalues2
+ WHEN (stat.a).stakind3 = 6 THEN (stat.a).stavalues3
+ WHEN (stat.a).stakind4 = 6 THEN (stat.a).stavalues4
+ WHEN (stat.a).stakind5 = 6 THEN (stat.a).stavalues5
+ END) AS range_length_histogram,
+ (CASE
+ WHEN (stat.a).stakind1 = 6 THEN (stat.a).stanumbers1[1]
+ WHEN (stat.a).stakind2 = 6 THEN (stat.a).stanumbers2[1]
+ WHEN (stat.a).stakind3 = 6 THEN (stat.a).stanumbers3[1]
+ WHEN (stat.a).stakind4 = 6 THEN (stat.a).stanumbers4[1]
+ WHEN (stat.a).stakind5 = 6 THEN (stat.a).stanumbers5[1]
+ END) AS range_empty_frac,
+ (CASE
+ WHEN (stat.a).stakind1 = 7 THEN (stat.a).stavalues1
+ WHEN (stat.a).stakind2 = 7 THEN (stat.a).stavalues2
+ WHEN (stat.a).stakind3 = 7 THEN (stat.a).stavalues3
+ WHEN (stat.a).stakind4 = 7 THEN (stat.a).stavalues4
+ WHEN (stat.a).stakind5 = 7 THEN (stat.a).stavalues5
+ END) AS range_bounds_histogram
FROM pg_statistic_ext s JOIN pg_class c ON (c.oid = s.stxrelid)
LEFT JOIN pg_statistic_ext_data sd ON (s.oid = sd.stxoid)
LEFT JOIN pg_namespace cn ON (cn.oid = c.relnamespace)
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
index 08957104c70..c6f58d47be6 100644
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@@ -159,8 +159,8 @@ static void
AlterObjectRename_internal(Relation rel, Oid objectId, const char *new_name)
{
Oid classId = RelationGetRelid(rel);
- int oidCacheId = get_object_catcache_oid(classId);
- int nameCacheId = get_object_catcache_name(classId);
+ SysCacheIdentifier oidCacheId = get_object_catcache_oid(classId);
+ SysCacheIdentifier nameCacheId = get_object_catcache_name(classId);
AttrNumber Anum_name = get_object_attnum_name(classId);
AttrNumber Anum_namespace = get_object_attnum_namespace(classId);
AttrNumber Anum_owner = get_object_attnum_owner(classId);
@@ -686,8 +686,8 @@ static Oid
AlterObjectNamespace_internal(Relation rel, Oid objid, Oid nspOid)
{
Oid classId = RelationGetRelid(rel);
- int oidCacheId = get_object_catcache_oid(classId);
- int nameCacheId = get_object_catcache_name(classId);
+ SysCacheIdentifier oidCacheId = get_object_catcache_oid(classId);
+ SysCacheIdentifier nameCacheId = get_object_catcache_name(classId);
AttrNumber Anum_name = get_object_attnum_name(classId);
AttrNumber Anum_namespace = get_object_attnum_namespace(classId);
AttrNumber Anum_owner = get_object_attnum_owner(classId);
diff --git a/src/backend/commands/comment.c b/src/backend/commands/comment.c
index caacb17e5d7..771aba2a69f 100644
--- a/src/backend/commands/comment.c
+++ b/src/backend/commands/comment.c
@@ -41,6 +41,7 @@ CommentObject(CommentStmt *stmt)
{
Relation relation;
ObjectAddress address = InvalidObjectAddress;
+ bool missing_ok;
/*
* When loading a dump, we may see a COMMENT ON DATABASE for the old name
@@ -63,6 +64,14 @@ CommentObject(CommentStmt *stmt)
}
}
+ /*
+ * During binary upgrade, allow nonexistent large objects so that we don't
+ * have to create them during schema restoration. pg_upgrade will
+ * transfer the contents of pg_largeobject_metadata via COPY or by
+ * copying/linking its files from the old cluster later on.
+ */
+ missing_ok = IsBinaryUpgrade && stmt->objtype == OBJECT_LARGEOBJECT;
+
/*
* Translate the parser representation that identifies this object into an
* ObjectAddress. get_object_address() will throw an error if the object
@@ -70,7 +79,8 @@ CommentObject(CommentStmt *stmt)
* against concurrent DROP operations.
*/
address = get_object_address(stmt->objtype, stmt->object,
- &relation, ShareUpdateExclusiveLock, false);
+ &relation, ShareUpdateExclusiveLock,
+ missing_ok);
/* Require ownership of the target object. */
check_object_ownership(GetUserId(), stmt->objtype, address,
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 25ee20b23db..2b7556b287c 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -572,8 +572,8 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
cstate->cur_lineno = buffer->linenos[i];
recheckIndexes =
ExecInsertIndexTuples(resultRelInfo,
- buffer->slots[i], estate, false,
- false, NULL, NIL, false);
+ estate, 0, buffer->slots[i],
+ NIL, NULL);
ExecARInsertTriggers(estate, resultRelInfo,
slots[i], recheckIndexes,
cstate->transition_capture);
@@ -1429,13 +1429,9 @@ CopyFrom(CopyFromState cstate)
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- myslot,
- estate,
- false,
- false,
- NULL,
- NIL,
- false);
+ estate, 0,
+ myslot, NIL,
+ NULL);
}
/* AFTER ROW INSERT Triggers */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 5868a7fa11f..94d6f415a06 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -249,7 +249,9 @@ CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
switch (cstate->copy_src)
{
case COPY_FILE:
+ pgstat_report_wait_start(WAIT_EVENT_COPY_FROM_READ);
bytesread = fread(databuf, 1, maxread, cstate->copy_file);
+ pgstat_report_wait_end();
if (ferror(cstate->copy_file))
ereport(ERROR,
(errcode_for_file_access(),
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 4ab4a3893d5..9ceeff6d99e 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -454,6 +454,7 @@ CopySendEndOfRow(CopyToState cstate)
switch (cstate->copy_dest)
{
case COPY_FILE:
+ pgstat_report_wait_start(WAIT_EVENT_COPY_TO_WRITE);
if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
cstate->copy_file) != 1 ||
ferror(cstate->copy_file))
@@ -486,6 +487,7 @@ CopySendEndOfRow(CopyToState cstate)
(errcode_for_file_access(),
errmsg("could not write to COPY file: %m")));
}
+ pgstat_report_wait_end();
break;
case COPY_FRONTEND:
/* Dump the accumulated row as one CopyData message */
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 87949054f26..33311760df7 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -60,6 +60,7 @@
#include "storage/lmgr.h"
#include "storage/md.h"
#include "storage/procarray.h"
+#include "storage/procsignal.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b7bb111688c..93918a223b8 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -2012,7 +2012,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
show_tablesample(((SampleScan *) plan)->tablesample,
planstate, ancestors, es);
/* fall through to print additional fields the same as SeqScan */
- /* FALLTHROUGH */
+ pg_fallthrough;
case T_SeqScan:
case T_ValuesScan:
case T_CteScan:
@@ -4672,10 +4672,36 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
if (node->onConflictAction != ONCONFLICT_NONE)
{
- ExplainPropertyText("Conflict Resolution",
- node->onConflictAction == ONCONFLICT_NOTHING ?
- "NOTHING" : "UPDATE",
- es);
+ const char *resolution = NULL;
+
+ if (node->onConflictAction == ONCONFLICT_NOTHING)
+ resolution = "NOTHING";
+ else if (node->onConflictAction == ONCONFLICT_UPDATE)
+ resolution = "UPDATE";
+ else
+ {
+ Assert(node->onConflictAction == ONCONFLICT_SELECT);
+ switch (node->onConflictLockStrength)
+ {
+ case LCS_NONE:
+ resolution = "SELECT";
+ break;
+ case LCS_FORKEYSHARE:
+ resolution = "SELECT FOR KEY SHARE";
+ break;
+ case LCS_FORSHARE:
+ resolution = "SELECT FOR SHARE";
+ break;
+ case LCS_FORNOKEYUPDATE:
+ resolution = "SELECT FOR NO KEY UPDATE";
+ break;
+ case LCS_FORUPDATE:
+ resolution = "SELECT FOR UPDATE";
+ break;
+ }
+ }
+
+ ExplainPropertyText("Conflict Resolution", resolution, es);
/*
* Don't display arbiter indexes at all when DO NOTHING variant
@@ -4684,7 +4710,7 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
if (idxNames)
ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
- /* ON CONFLICT DO UPDATE WHERE qual is specially displayed */
+ /* ON CONFLICT DO SELECT/UPDATE WHERE qual is specially displayed */
if (node->onConflictWhere)
{
show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index 596105ee078..574858bfeca 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -45,6 +45,7 @@
#include "catalog/pg_depend.h"
#include "catalog/pg_extension.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "commands/alter.h"
#include "commands/comment.h"
@@ -62,6 +63,7 @@
#include "utils/builtins.h"
#include "utils/conffiles.h"
#include "utils/fmgroids.h"
+#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -141,7 +143,27 @@ typedef struct
char *loc;
} ExtensionLocation;
+/*
+ * Cache structure for get_function_sibling_type (and maybe later,
+ * allied lookup functions).
+ */
+typedef struct ExtensionSiblingCache
+{
+ struct ExtensionSiblingCache *next; /* list link */
+ /* lookup key: requesting function's OID and type name */
+ Oid reqfuncoid;
+ const char *typname;
+ bool valid; /* is entry currently valid? */
+ uint32 exthash; /* cache hash of owning extension's OID */
+ Oid typeoid; /* OID associated with typname */
+} ExtensionSiblingCache;
+
+/* Head of linked list of ExtensionSiblingCache structs */
+static ExtensionSiblingCache *ext_sibling_list = NULL;
+
/* Local functions */
+static void ext_sibling_callback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static List *find_update_path(List *evi_list,
ExtensionVersionInfo *evi_start,
ExtensionVersionInfo *evi_target,
@@ -263,6 +285,114 @@ get_extension_schema(Oid ext_oid)
return result;
}
+/*
+ * get_function_sibling_type - find a type belonging to same extension as func
+ *
+ * Returns the type's OID, or InvalidOid if not found.
+ *
+ * This is useful in extensions, which won't have fixed object OIDs.
+ * We work from the calling function's own OID, which it can get from its
+ * FunctionCallInfo parameter, and look up the owning extension and thence
+ * a type belonging to the same extension.
+ *
+ * Notice that the type is specified by name only, without a schema.
+ * That's because this will typically be used by relocatable extensions
+ * which can't make a-priori assumptions about which schema their objects
+ * are in. As long as the extension only defines one type of this name,
+ * the answer is unique anyway.
+ *
+ * We might later add the ability to look up functions, operators, etc.
+ *
+ * This code is simply a frontend for some pg_depend lookups. Those lookups
+ * are fairly expensive, so we provide a simple cache facility. We assume
+ * that the passed typname is actually a C constant, or at least permanently
+ * allocated, so that we need not copy that string.
+ */
+Oid
+get_function_sibling_type(Oid funcoid, const char *typname)
+{
+ ExtensionSiblingCache *cache_entry;
+ Oid extoid;
+ Oid typeoid;
+
+ /*
+ * See if we have the answer cached. Someday there may be enough callers
+ * to justify a hash table, but for now, a simple linked list is fine.
+ */
+ for (cache_entry = ext_sibling_list; cache_entry != NULL;
+ cache_entry = cache_entry->next)
+ {
+ if (funcoid == cache_entry->reqfuncoid &&
+ strcmp(typname, cache_entry->typname) == 0)
+ break;
+ }
+ if (cache_entry && cache_entry->valid)
+ return cache_entry->typeoid;
+
+ /*
+ * Nope, so do the expensive lookups. We do not expect failures, so we do
+ * not cache negative results.
+ */
+ extoid = getExtensionOfObject(ProcedureRelationId, funcoid);
+ if (!OidIsValid(extoid))
+ return InvalidOid;
+ typeoid = getExtensionType(extoid, typname);
+ if (!OidIsValid(typeoid))
+ return InvalidOid;
+
+ /*
+ * Build, or revalidate, cache entry.
+ */
+ if (cache_entry == NULL)
+ {
+ /* Register invalidation hook if this is first entry */
+ if (ext_sibling_list == NULL)
+ CacheRegisterSyscacheCallback(EXTENSIONOID,
+ ext_sibling_callback,
+ (Datum) 0);
+
+ /* Momentarily zero the space to ensure valid flag is false */
+ cache_entry = (ExtensionSiblingCache *)
+ MemoryContextAllocZero(CacheMemoryContext,
+ sizeof(ExtensionSiblingCache));
+ cache_entry->next = ext_sibling_list;
+ ext_sibling_list = cache_entry;
+ }
+
+ cache_entry->reqfuncoid = funcoid;
+ cache_entry->typname = typname;
+ cache_entry->exthash = GetSysCacheHashValue1(EXTENSIONOID,
+ ObjectIdGetDatum(extoid));
+ cache_entry->typeoid = typeoid;
+ /* Mark it valid only once it's fully populated */
+ cache_entry->valid = true;
+
+ return typeoid;
+}
+
+/*
+ * ext_sibling_callback
+ * Syscache inval callback function for EXTENSIONOID cache
+ *
+ * It seems sufficient to invalidate ExtensionSiblingCache entries when
+ * the owning extension's pg_extension entry is modified or deleted.
+ * Neither a requesting function's OID, nor the OID of the object it's
+ * looking for, could change without an extension update or drop/recreate.
+ */
+static void
+ext_sibling_callback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
+{
+ ExtensionSiblingCache *cache_entry;
+
+ for (cache_entry = ext_sibling_list; cache_entry != NULL;
+ cache_entry = cache_entry->next)
+ {
+ if (hashvalue == 0 ||
+ cache_entry->exthash == hashvalue)
+ cache_entry->valid = false;
+ }
+}
+
/*
* Utility functions to check validity of extension and version names
*/
@@ -1191,7 +1321,7 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
(void) set_config_option("client_min_messages", "warning",
PGC_USERSET, PGC_S_SESSION,
GUC_ACTION_SAVE, true, 0, false);
- if (log_min_messages < WARNING)
+ if (log_min_messages[MyBackendType] < WARNING)
(void) set_config_option_ext("log_min_messages", "warning",
PGC_SUSET, PGC_S_SESSION,
BOOTSTRAP_SUPERUSERID,
@@ -2557,9 +2687,9 @@ extension_file_exists(const char *extensionName)
locations = get_extension_control_directories();
- foreach_ptr(char, location, locations)
+ foreach_ptr(ExtensionLocation, location, locations)
{
- dir = AllocateDir(location);
+ dir = AllocateDir(location->loc);
/*
* If the control directory doesn't exist, we want to silently return
@@ -2571,7 +2701,7 @@ extension_file_exists(const char *extensionName)
}
else
{
- while ((de = ReadDir(dir, location)) != NULL)
+ while ((de = ReadDir(dir, location->loc)) != NULL)
{
char *extname;
diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c
index 9f7e0ed17ce..3e7b09b3494 100644
--- a/src/backend/commands/operatorcmds.c
+++ b/src/backend/commands/operatorcmds.c
@@ -276,7 +276,6 @@ ValidateRestrictionEstimator(List *restrictionName)
{
Oid typeId[4];
Oid restrictionOid;
- AclResult aclresult;
typeId[0] = INTERNALOID; /* PlannerInfo */
typeId[1] = OIDOID; /* operator OID */
@@ -292,11 +291,33 @@ ValidateRestrictionEstimator(List *restrictionName)
errmsg("restriction estimator function %s must return type %s",
NameListToString(restrictionName), "float8")));
- /* Require EXECUTE rights for the estimator */
- aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, GetUserId(), ACL_EXECUTE);
- if (aclresult != ACLCHECK_OK)
- aclcheck_error(aclresult, OBJECT_FUNCTION,
- NameListToString(restrictionName));
+ /*
+ * If the estimator is not a built-in function, require superuser
+ * privilege to install it. This protects against using something that is
+ * not a restriction estimator or has hard-wired assumptions about what
+ * data types it is working with. (Built-in estimators are required to
+ * defend themselves adequately against unexpected data type choices, but
+ * it seems impractical to expect that of extensions' estimators.)
+ *
+ * If it is built-in, only require EXECUTE rights.
+ */
+ if (restrictionOid >= FirstGenbkiObjectId)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to specify a non-built-in restriction estimator function")));
+ }
+ else
+ {
+ AclResult aclresult;
+
+ aclresult = object_aclcheck(ProcedureRelationId, restrictionOid,
+ GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(restrictionName));
+ }
return restrictionOid;
}
@@ -312,7 +333,6 @@ ValidateJoinEstimator(List *joinName)
Oid typeId[5];
Oid joinOid;
Oid joinOid2;
- AclResult aclresult;
typeId[0] = INTERNALOID; /* PlannerInfo */
typeId[1] = OIDOID; /* operator OID */
@@ -350,11 +370,24 @@ ValidateJoinEstimator(List *joinName)
errmsg("join estimator function %s must return type %s",
NameListToString(joinName), "float8")));
- /* Require EXECUTE rights for the estimator */
- aclresult = object_aclcheck(ProcedureRelationId, joinOid, GetUserId(), ACL_EXECUTE);
- if (aclresult != ACLCHECK_OK)
- aclcheck_error(aclresult, OBJECT_FUNCTION,
- NameListToString(joinName));
+ /* privilege checks are the same as in ValidateRestrictionEstimator */
+ if (joinOid >= FirstGenbkiObjectId)
+ {
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to specify a non-built-in join estimator function")));
+ }
+ else
+ {
+ AclResult aclresult;
+
+ aclresult = object_aclcheck(ProcedureRelationId, joinOid,
+ GetUserId(), ACL_EXECUTE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_FUNCTION,
+ NameListToString(joinName));
+ }
return joinOid;
}
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
index 4160f5b6855..5b80396723c 100644
--- a/src/backend/commands/seclabel.c
+++ b/src/backend/commands/seclabel.c
@@ -118,6 +118,7 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
ObjectAddress address;
Relation relation;
ListCell *lc;
+ bool missing_ok;
/*
* Find the named label provider, or if none specified, check whether
@@ -159,6 +160,14 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("security labels are not supported for this type of object")));
+ /*
+ * During binary upgrade, allow nonexistent large objects so that we don't
+ * have to create them during schema restoration. pg_upgrade will
+ * transfer the contents of pg_largeobject_metadata via COPY or by
+ * copying/linking its files from the old cluster later on.
+ */
+ missing_ok = IsBinaryUpgrade && stmt->objtype == OBJECT_LARGEOBJECT;
+
/*
* Translate the parser representation which identifies this object into
* an ObjectAddress. get_object_address() will throw an error if the
@@ -166,7 +175,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
* guard against concurrent modifications.
*/
address = get_object_address(stmt->objtype, stmt->object,
- &relation, ShareUpdateExclusiveLock, false);
+ &relation, ShareUpdateExclusiveLock,
+ missing_ok);
/* Require ownership of the target object. */
check_object_ownership(GetUserId(), stmt->objtype, address,
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 0b3c8499b49..5e3c0964d38 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -73,8 +73,9 @@
#define SUBOPT_FAILOVER 0x00002000
#define SUBOPT_RETAIN_DEAD_TUPLES 0x00004000
#define SUBOPT_MAX_RETENTION_DURATION 0x00008000
-#define SUBOPT_LSN 0x00010000
-#define SUBOPT_ORIGIN 0x00020000
+#define SUBOPT_WAL_RECEIVER_TIMEOUT 0x00010000
+#define SUBOPT_LSN 0x00020000
+#define SUBOPT_ORIGIN 0x00040000
/* check if the 'val' has 'bits' set */
#define IsSet(val, bits) (((val) & (bits)) == (bits))
@@ -104,6 +105,7 @@ typedef struct SubOpts
int32 maxretention;
char *origin;
XLogRecPtr lsn;
+ char *wal_receiver_timeout;
} SubOpts;
/*
@@ -402,6 +404,30 @@ parse_subscription_options(ParseState *pstate, List *stmt_options,
opts->specified_opts |= SUBOPT_LSN;
opts->lsn = lsn;
}
+ else if (IsSet(supported_opts, SUBOPT_WAL_RECEIVER_TIMEOUT) &&
+ strcmp(defel->defname, "wal_receiver_timeout") == 0)
+ {
+ bool parsed;
+ int val;
+
+ if (IsSet(opts->specified_opts, SUBOPT_WAL_RECEIVER_TIMEOUT))
+ errorConflictingDefElem(defel, pstate);
+
+ opts->specified_opts |= SUBOPT_WAL_RECEIVER_TIMEOUT;
+ opts->wal_receiver_timeout = defGetString(defel);
+
+ /*
+ * Test if the given value is valid for wal_receiver_timeout GUC.
+ * Skip this test if the value is -1, since -1 is allowed for the
+ * wal_receiver_timeout subscription option, but not for the GUC
+ * itself.
+ */
+ parsed = parse_int(opts->wal_receiver_timeout, &val, 0, NULL);
+ if (!parsed || val != -1)
+ (void) set_config_option("wal_receiver_timeout", opts->wal_receiver_timeout,
+ PGC_BACKEND, PGC_S_TEST, GUC_ACTION_SET,
+ false, 0, false);
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -612,7 +638,8 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
SUBOPT_DISABLE_ON_ERR | SUBOPT_PASSWORD_REQUIRED |
SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER |
SUBOPT_RETAIN_DEAD_TUPLES |
- SUBOPT_MAX_RETENTION_DURATION | SUBOPT_ORIGIN);
+ SUBOPT_MAX_RETENTION_DURATION |
+ SUBOPT_WAL_RECEIVER_TIMEOUT | SUBOPT_ORIGIN);
parse_subscription_options(pstate, stmt->options, supported_opts, &opts);
/*
@@ -695,6 +722,14 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
if (opts.synchronous_commit == NULL)
opts.synchronous_commit = "off";
+ /*
+ * The default for wal_receiver_timeout of subscriptions is -1, which
+ * means the value is inherited from the server configuration, command
+ * line, or role/database settings.
+ */
+ if (opts.wal_receiver_timeout == NULL)
+ opts.wal_receiver_timeout = "-1";
+
conninfo = stmt->conninfo;
publications = stmt->publication;
@@ -742,6 +777,8 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt,
nulls[Anum_pg_subscription_subslotname - 1] = true;
values[Anum_pg_subscription_subsynccommit - 1] =
CStringGetTextDatum(opts.synchronous_commit);
+ values[Anum_pg_subscription_subwalrcvtimeout - 1] =
+ CStringGetTextDatum(opts.wal_receiver_timeout);
values[Anum_pg_subscription_subpublications - 1] =
publicationListToArray(publications);
values[Anum_pg_subscription_suborigin - 1] =
@@ -1410,6 +1447,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
SUBOPT_RUN_AS_OWNER | SUBOPT_FAILOVER |
SUBOPT_RETAIN_DEAD_TUPLES |
SUBOPT_MAX_RETENTION_DURATION |
+ SUBOPT_WAL_RECEIVER_TIMEOUT |
SUBOPT_ORIGIN);
parse_subscription_options(pstate, stmt->options,
@@ -1665,6 +1703,13 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
origin = opts.origin;
}
+ if (IsSet(opts.specified_opts, SUBOPT_WAL_RECEIVER_TIMEOUT))
+ {
+ values[Anum_pg_subscription_subwalrcvtimeout - 1] =
+ CStringGetTextDatum(opts.wal_receiver_timeout);
+ replaces[Anum_pg_subscription_subwalrcvtimeout - 1] = true;
+ }
+
update_tuple = true;
break;
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index f976c0e5c7e..2f5b7007ff9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -16192,7 +16192,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock
case RELKIND_TOASTVALUE:
if (recursing)
break;
- /* FALL THRU */
+ pg_fallthrough;
default:
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 0b064891932..3511a4ec0fd 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -70,6 +70,7 @@
#include "miscadmin.h"
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
+#include "storage/procsignal.h"
#include "storage/standby.h"
#include "utils/acl.h"
#include "utils/builtins.h"
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 8df915f63fb..98d402c0a3b 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -4392,7 +4392,7 @@ AfterTriggerExecute(EState *estate,
trig_tuple_slot2))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
}
- /* fall through */
+ pg_fallthrough;
case AFTER_TRIGGER_FDW_REUSE:
/*
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index a7a5ac1e83b..61ff5ddc74c 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -4032,6 +4032,7 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
int16 typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
char *s;
bits8 *bitmap;
int bitmask;
@@ -4086,6 +4087,7 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
typlen = op->d.scalararrayop.typlen;
typbyval = op->d.scalararrayop.typbyval;
typalign = op->d.scalararrayop.typalign;
+ typalignby = typalign_to_alignby(typalign);
/* Initialize result appropriately depending on useOr */
result = BoolGetDatum(!useOr);
@@ -4111,7 +4113,7 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op)
{
elt = fetch_att(s, typbyval, typlen);
s = att_addlength_pointer(s, typlen, s);
- s = (char *) att_align_nominal(s, typalign);
+ s = (char *) att_nominal_alignby(s, typalignby);
fcinfo->args[1].value = elt;
fcinfo->args[1].isnull = false;
}
@@ -4255,6 +4257,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
int16 typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
int nitems;
bool has_nulls = false;
char *s;
@@ -4272,6 +4275,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
&typlen,
&typbyval,
&typalign);
+ typalignby = typalign_to_alignby(typalign);
oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory);
@@ -4318,7 +4322,7 @@ ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *eco
element = fetch_att(s, typbyval, typlen);
s = att_addlength_pointer(s, typlen, s);
- s = (char *) att_align_nominal(s, typalign);
+ s = (char *) att_nominal_alignby(s, typalignby);
saophash_insert(elements_tab->hashtab, element, &hashfound);
}
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index 6ae0f959592..9d071e495c6 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -54,9 +54,9 @@
* ---------------------
*
* Speculative insertion is a two-phase mechanism used to implement
- * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted
- * to the heap and update the indexes as usual, but if a constraint is
- * violated, we can still back out the insertion without aborting the whole
+ * INSERT ... ON CONFLICT. The tuple is first inserted into the heap
+ * and the indexes are updated as usual, but if a constraint is violated,
+ * we can still back out of the insertion without aborting the whole
* transaction. In an INSERT ... ON CONFLICT statement, if a conflict is
* detected, the inserted tuple is backed out and the ON CONFLICT action is
* executed instead.
@@ -276,18 +276,18 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* into all the relations indexing the result relation
* when a heap tuple is inserted into the result relation.
*
- * When 'update' is true and 'onlySummarizing' is false,
+ * When EIIT_IS_UPDATE is set and EIIT_ONLY_SUMMARIZING isn't,
* executor is performing an UPDATE that could not use an
* optimization like heapam's HOT (in more general terms a
* call to table_tuple_update() took place and set
* 'update_indexes' to TU_All). Receiving this hint makes
* us consider if we should pass down the 'indexUnchanged'
* hint in turn. That's something that we figure out for
- * each index_insert() call iff 'update' is true.
- * (When 'update' is false we already know not to pass the
+ * each index_insert() call iff EIIT_IS_UPDATE is set.
+ * (When that flag is not set we already know not to pass the
* hint to any index.)
*
- * If onlySummarizing is set, an equivalent optimization to
+ * If EIIT_ONLY_SUMMARIZING is set, an equivalent optimization to
* HOT has been applied and any updated columns are indexed
* only by summarizing indexes (or in more general terms a
* call to table_tuple_update() took place and set
@@ -298,23 +298,21 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
- * potential (unconfirmed) conflicts. (if noDupErr == true,
+ * potential (unconfirmed) conflicts. (if EIIT_NO_DUPE_ERROR,
* the same is done for non-deferred constraints, but report
* if conflict was speculative or deferred conflict to caller)
*
- * If 'arbiterIndexes' is nonempty, noDupErr applies only to
- * those indexes. NIL means noDupErr applies to all indexes.
+ * If 'arbiterIndexes' is nonempty, EIIT_NO_DUPE_ERROR applies only to
+ * those indexes. NIL means EIIT_NO_DUPE_ERROR applies to all indexes.
* ----------------------------------------------------------------
*/
List *
ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
- TupleTableSlot *slot,
EState *estate,
- bool update,
- bool noDupErr,
- bool *specConflict,
+ bits32 flags,
+ TupleTableSlot *slot,
List *arbiterIndexes,
- bool onlySummarizing)
+ bool *specConflict)
{
ItemPointer tupleid = &slot->tts_tid;
List *result = NIL;
@@ -374,7 +372,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* Skip processing of non-summarizing indexes if we only update
* summarizing indexes
*/
- if (onlySummarizing && !indexInfo->ii_Summarizing)
+ if ((flags & EIIT_ONLY_SUMMARIZING) && !indexInfo->ii_Summarizing)
continue;
/* Check for partial index */
@@ -409,7 +407,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
isnull);
/* Check whether to apply noDupErr to this index */
- applyNoDupErr = noDupErr &&
+ applyNoDupErr = (flags & EIIT_NO_DUPE_ERROR) &&
(arbiterIndexes == NIL ||
list_member_oid(arbiterIndexes,
indexRelation->rd_index->indexrelid));
@@ -441,10 +439,11 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
* index. If we're being called as part of an UPDATE statement,
* consider if the 'indexUnchanged' = true hint should be passed.
*/
- indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
- estate,
- indexInfo,
- indexRelation);
+ indexUnchanged = ((flags & EIIT_IS_UPDATE) &&
+ index_unchanged_by_update(resultRelInfo,
+ estate,
+ indexInfo,
+ indexRelation));
satisfiesConstraint =
index_insert(indexRelation, /* index relation */
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 772e81f3154..f87978c137e 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -106,7 +106,7 @@ struct SharedExecutorInstrumentation
/* array of num_plan_nodes * num_workers Instrumentation objects follows */
};
#define GetInstrumentationArray(sei) \
- (AssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation *), \
+ (StaticAssertVariableIsOfTypeMacro(sei, SharedExecutorInstrumentation *), \
(Instrumentation *) (((char *) sei) + sei->instrument_offset))
/* Context object for ExecParallelEstimate. */
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index d13e786cf13..bab294f5e91 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -883,20 +883,27 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
/*
- * In the DO UPDATE case, we have some more state to initialize.
+ * In the DO UPDATE and DO SELECT cases, we have some more state to
+ * initialize.
*/
- if (node->onConflictAction == ONCONFLICT_UPDATE)
+ if (node->onConflictAction == ONCONFLICT_UPDATE ||
+ node->onConflictAction == ONCONFLICT_SELECT)
{
- OnConflictSetState *onconfl = makeNode(OnConflictSetState);
+ OnConflictActionState *onconfl = makeNode(OnConflictActionState);
TupleConversionMap *map;
map = ExecGetRootToChildMap(leaf_part_rri, estate);
- Assert(node->onConflictSet != NIL);
+ Assert(node->onConflictSet != NIL ||
+ node->onConflictAction == ONCONFLICT_SELECT);
Assert(rootResultRelInfo->ri_onConflict != NULL);
leaf_part_rri->ri_onConflict = onconfl;
+ /* Lock strength for DO SELECT [FOR UPDATE/SHARE] */
+ onconfl->oc_LockStrength =
+ rootResultRelInfo->ri_onConflict->oc_LockStrength;
+
/*
* Need a separate existing slot for each partition, as the
* partition could be of a different AM, even if the tuple
@@ -909,7 +916,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
/*
* If the partition's tuple descriptor matches exactly the root
* parent (the common case), we can re-use most of the parent's ON
- * CONFLICT SET state, skipping a bunch of work. Otherwise, we
+ * CONFLICT action state, skipping a bunch of work. Otherwise, we
* need to create state specific to this partition.
*/
if (map == NULL)
@@ -917,7 +924,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
/*
* It's safe to reuse these from the partition root, as we
* only process one tuple at a time (therefore we won't
- * overwrite needed data in slots), and the results of
+ * overwrite needed data in slots), and the results of any
* projections are independent of the underlying storage.
* Projections and where clauses themselves don't store state
* / are independent of the underlying storage.
@@ -931,66 +938,81 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
}
else
{
- List *onconflset;
- List *onconflcols;
-
/*
- * Translate expressions in onConflictSet to account for
- * different attribute numbers. For that, map partition
- * varattnos twice: first to catch the EXCLUDED
- * pseudo-relation (INNER_VAR), and second to handle the main
- * target relation (firstVarno).
+ * For ON CONFLICT DO UPDATE, translate expressions in
+ * onConflictSet to account for different attribute numbers.
+ * For that, map partition varattnos twice: first to catch the
+ * EXCLUDED pseudo-relation (INNER_VAR), and second to handle
+ * the main target relation (firstVarno).
*/
- onconflset = copyObject(node->onConflictSet);
- if (part_attmap == NULL)
- part_attmap =
- build_attrmap_by_name(RelationGetDescr(partrel),
- RelationGetDescr(firstResultRel),
- false);
- onconflset = (List *)
- map_variable_attnos((Node *) onconflset,
- INNER_VAR, 0,
- part_attmap,
- RelationGetForm(partrel)->reltype,
- &found_whole_row);
- /* We ignore the value of found_whole_row. */
- onconflset = (List *)
- map_variable_attnos((Node *) onconflset,
- firstVarno, 0,
- part_attmap,
- RelationGetForm(partrel)->reltype,
- &found_whole_row);
- /* We ignore the value of found_whole_row. */
-
- /* Finally, adjust the target colnos to match the partition. */
- onconflcols = adjust_partition_colnos(node->onConflictCols,
- leaf_part_rri);
-
- /* create the tuple slot for the UPDATE SET projection */
- onconfl->oc_ProjSlot =
- table_slot_create(partrel,
- &mtstate->ps.state->es_tupleTable);
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ {
+ List *onconflset;
+ List *onconflcols;
+
+ onconflset = copyObject(node->onConflictSet);
+ if (part_attmap == NULL)
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(partrel),
+ RelationGetDescr(firstResultRel),
+ false);
+ onconflset = (List *)
+ map_variable_attnos((Node *) onconflset,
+ INNER_VAR, 0,
+ part_attmap,
+ RelationGetForm(partrel)->reltype,
+ &found_whole_row);
+ /* We ignore the value of found_whole_row. */
+ onconflset = (List *)
+ map_variable_attnos((Node *) onconflset,
+ firstVarno, 0,
+ part_attmap,
+ RelationGetForm(partrel)->reltype,
+ &found_whole_row);
+ /* We ignore the value of found_whole_row. */
- /* build UPDATE SET projection state */
- onconfl->oc_ProjInfo =
- ExecBuildUpdateProjection(onconflset,
- true,
- onconflcols,
- partrelDesc,
- econtext,
- onconfl->oc_ProjSlot,
- &mtstate->ps);
+ /*
+ * Finally, adjust the target colnos to match the
+ * partition.
+ */
+ onconflcols = adjust_partition_colnos(node->onConflictCols,
+ leaf_part_rri);
+
+ /* create the tuple slot for the UPDATE SET projection */
+ onconfl->oc_ProjSlot =
+ table_slot_create(partrel,
+ &mtstate->ps.state->es_tupleTable);
+
+ /* build UPDATE SET projection state */
+ onconfl->oc_ProjInfo =
+ ExecBuildUpdateProjection(onconflset,
+ true,
+ onconflcols,
+ partrelDesc,
+ econtext,
+ onconfl->oc_ProjSlot,
+ &mtstate->ps);
+ }
/*
- * If there is a WHERE clause, initialize state where it will
- * be evaluated, mapping the attribute numbers appropriately.
- * As with onConflictSet, we need to map partition varattnos
- * to the partition's tupdesc.
+ * For both ON CONFLICT DO UPDATE and ON CONFLICT DO SELECT,
+ * there may be a WHERE clause. If so, initialize state where
+ * it will be evaluated, mapping the attribute numbers
+ * appropriately. As with onConflictSet, we need to map
+ * partition varattnos twice, to catch both the EXCLUDED
+ * pseudo-relation (INNER_VAR), and the main target relation
+ * (firstVarno).
*/
if (node->onConflictWhere)
{
List *clause;
+ if (part_attmap == NULL)
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(partrel),
+ RelationGetDescr(firstResultRel),
+ false);
+
clause = copyObject((List *) node->onConflictWhere);
clause = (List *)
map_variable_attnos((Node *) clause,
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 743b1ee2b28..2497ee7edc5 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -846,11 +846,18 @@ ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo,
conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
if (resultRelInfo->ri_NumIndices > 0)
+ {
+ bits32 flags;
+
+ if (conflictindexes != NIL)
+ flags = EIIT_NO_DUPE_ERROR;
+ else
+ flags = 0;
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, false,
- conflictindexes ? true : false,
- &conflict,
- conflictindexes, false);
+ estate, flags,
+ slot, conflictindexes,
+ &conflict);
+ }
/*
* Checks the conflict indexes to fetch the conflicting local row and
@@ -943,11 +950,18 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
conflictindexes = resultRelInfo->ri_onConflictArbiterIndexes;
if (resultRelInfo->ri_NumIndices > 0 && (update_indexes != TU_None))
+ {
+ bits32 flags = EIIT_IS_UPDATE;
+
+ if (conflictindexes != NIL)
+ flags |= EIIT_NO_DUPE_ERROR;
+ if (update_indexes == TU_Summarizing)
+ flags |= EIIT_ONLY_SUMMARIZING;
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, true,
- conflictindexes ? true : false,
- &conflict, conflictindexes,
- (update_indexes == TU_Summarizing));
+ estate, flags,
+ slot, conflictindexes,
+ &conflict);
+ }
/*
* Refer to the comments above the call to CheckAndReportConflict() in
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index baa76596ac2..7d487a165fa 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2257,7 +2257,7 @@ ExecAgg(PlanState *pstate)
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
- /* FALLTHROUGH */
+ pg_fallthrough;
case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index f5d3edb90e2..c0eb5a1f0c5 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -258,7 +258,7 @@ MultiExecParallelHash(HashState *node)
* way, wait for everyone to arrive here so we can proceed.
*/
BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ALLOCATE);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_BUILD_HASH_INNER:
@@ -1330,13 +1330,13 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
/* All other participants just flush their tuples to disk. */
ExecParallelHashCloseBatchAccessors(hashtable);
}
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BATCHES_REALLOCATE:
/* Wait for the above to be finished. */
BarrierArriveAndWait(&pstate->grow_batches_barrier,
WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BATCHES_REPARTITION:
/* Make sure that we have the current dimensions and buckets. */
@@ -1349,7 +1349,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
/* Wait for the above to be finished. */
BarrierArriveAndWait(&pstate->grow_batches_barrier,
WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BATCHES_DECIDE:
@@ -1411,7 +1411,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
dsa_free(hashtable->area, pstate->old_batches);
pstate->old_batches = InvalidDsaPointer;
}
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BATCHES_FINISH:
/* Wait for the above to complete. */
@@ -1689,13 +1689,13 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable)
/* Clear the flag. */
pstate->growth = PHJ_GROWTH_OK;
}
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BUCKETS_REALLOCATE:
/* Wait for the above to complete. */
BarrierArriveAndWait(&pstate->grow_buckets_barrier,
WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_GROW_BUCKETS_REINSERT:
/* Reinsert all tuples into the hash table. */
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 114620a8137..5aa8a09b265 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -416,7 +416,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
else
node->hj_JoinState = HJ_NEED_NEW_OUTER;
- /* FALL THRU */
+ pg_fallthrough;
case HJ_NEED_NEW_OUTER:
@@ -505,7 +505,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
/* OK, let's scan the bucket for matches */
node->hj_JoinState = HJ_SCAN_BUCKET;
- /* FALL THRU */
+ pg_fallthrough;
case HJ_SCAN_BUCKET:
@@ -1313,13 +1313,13 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
if (BarrierArriveAndWait(batch_barrier,
WAIT_EVENT_HASH_BATCH_ELECT))
ExecParallelHashTableAlloc(hashtable, batchno);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_BATCH_ALLOCATE:
/* Wait for allocation to complete. */
BarrierArriveAndWait(batch_barrier,
WAIT_EVENT_HASH_BATCH_ALLOCATE);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_BATCH_LOAD:
/* Start (or join in) loading tuples. */
@@ -1339,7 +1339,7 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
sts_end_parallel_scan(inner_tuples);
BarrierArriveAndWait(batch_barrier,
WAIT_EVENT_HASH_BATCH_LOAD);
- /* Fall through. */
+ pg_fallthrough;
case PHJ_BATCH_PROBE:
diff --git a/src/backend/executor/nodeLimit.c b/src/backend/executor/nodeLimit.c
index c40a73dcf17..8f75cbbead2 100644
--- a/src/backend/executor/nodeLimit.c
+++ b/src/backend/executor/nodeLimit.c
@@ -68,7 +68,7 @@ ExecLimit(PlanState *pstate)
*/
recompute_limits(node);
- /* FALL THRU */
+ pg_fallthrough;
case LIMIT_RESCAN:
@@ -215,7 +215,7 @@ ExecLimit(PlanState *pstate)
}
Assert(node->lstate == LIMIT_WINDOWEND_TIES);
- /* FALL THRU */
+ pg_fallthrough;
case LIMIT_WINDOWEND_TIES:
if (ScanDirectionIsForward(direction))
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index f5e9d369940..793c76d4f82 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -147,12 +147,24 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context,
ItemPointer tupleid,
TupleTableSlot *oldslot,
TupleTableSlot *newslot);
+static bool ExecOnConflictLockRow(ModifyTableContext *context,
+ TupleTableSlot *existing,
+ ItemPointer conflictTid,
+ Relation relation,
+ LockTupleMode lockmode,
+ bool isUpdate);
static bool ExecOnConflictUpdate(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
ItemPointer conflictTid,
TupleTableSlot *excludedSlot,
bool canSetTag,
TupleTableSlot **returning);
+static bool ExecOnConflictSelect(ModifyTableContext *context,
+ ResultRelInfo *resultRelInfo,
+ ItemPointer conflictTid,
+ TupleTableSlot *excludedSlot,
+ bool canSetTag,
+ TupleTableSlot **returning);
static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
EState *estate,
PartitionTupleRouting *proute,
@@ -274,7 +286,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
*
* context: context for the ModifyTable operation
* resultRelInfo: current result rel
- * cmdType: operation/merge action performed (INSERT, UPDATE, or DELETE)
+ * isDelete: true if the operation/merge action is a DELETE
* oldSlot: slot holding old tuple deleted or updated
* newSlot: slot holding new tuple inserted or updated
* planSlot: slot holding tuple returned by top subplan node
@@ -283,12 +295,15 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
* econtext's scan tuple and its old & new tuples are not needed (FDW direct-
* modify is disabled if the RETURNING list refers to any OLD/NEW values).
*
+ * Note: For the SELECT path of INSERT ... ON CONFLICT DO SELECT, oldSlot and
+ * newSlot are both the existing tuple, since it's not changed.
+ *
* Returns a slot holding the result tuple
*/
static TupleTableSlot *
ExecProcessReturning(ModifyTableContext *context,
ResultRelInfo *resultRelInfo,
- CmdType cmdType,
+ bool isDelete,
TupleTableSlot *oldSlot,
TupleTableSlot *newSlot,
TupleTableSlot *planSlot)
@@ -298,23 +313,17 @@ ExecProcessReturning(ModifyTableContext *context,
ExprContext *econtext = projectReturning->pi_exprContext;
/* Make tuple and any needed join variables available to ExecProject */
- switch (cmdType)
+ if (isDelete)
{
- case CMD_INSERT:
- case CMD_UPDATE:
- /* return new tuple by default */
- if (newSlot)
- econtext->ecxt_scantuple = newSlot;
- break;
-
- case CMD_DELETE:
- /* return old tuple by default */
- if (oldSlot)
- econtext->ecxt_scantuple = oldSlot;
- break;
-
- default:
- elog(ERROR, "unrecognized commandType: %d", (int) cmdType);
+ /* return old tuple by default */
+ if (oldSlot)
+ econtext->ecxt_scantuple = oldSlot;
+ }
+ else
+ {
+ /* return new tuple by default */
+ if (newSlot)
+ econtext->ecxt_scantuple = newSlot;
}
econtext->ecxt_outertuple = planSlot;
@@ -1158,6 +1167,26 @@ ExecInsert(ModifyTableContext *context,
else
goto vlock;
}
+ else if (onconflict == ONCONFLICT_SELECT)
+ {
+ /*
+ * In case of ON CONFLICT DO SELECT, optionally lock the
+ * conflicting tuple, fetch it and project RETURNING on
+ * it. Be prepared to retry if locking fails because of a
+ * concurrent UPDATE/DELETE to the conflict tuple.
+ */
+ TupleTableSlot *returning = NULL;
+
+ if (ExecOnConflictSelect(context, resultRelInfo,
+ &conflictTid, slot, canSetTag,
+ &returning))
+ {
+ InstrCountTuples2(&mtstate->ps, 1);
+ return returning;
+ }
+ else
+ goto vlock;
+ }
else
{
/*
@@ -1197,10 +1226,9 @@ ExecInsert(ModifyTableContext *context,
/* insert index entries for tuple */
recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, false, true,
- &specConflict,
- arbiterIndexes,
- false);
+ estate, EIIT_NO_DUPE_ERROR,
+ slot, arbiterIndexes,
+ &specConflict);
/* adjust the tuple's state accordingly */
table_tuple_complete_speculative(resultRelationDesc, slot,
@@ -1237,10 +1265,9 @@ ExecInsert(ModifyTableContext *context,
/* insert index entries for tuple */
if (resultRelInfo->ri_NumIndices > 0)
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, estate, false,
- false, NULL, NIL,
- false);
+ recheckIndexes = ExecInsertIndexTuples(resultRelInfo, estate,
+ 0, slot, NIL,
+ NULL);
}
}
@@ -1329,7 +1356,7 @@ ExecInsert(ModifyTableContext *context,
}
}
- result = ExecProcessReturning(context, resultRelInfo, CMD_INSERT,
+ result = ExecProcessReturning(context, resultRelInfo, false,
oldSlot, slot, planSlot);
/*
@@ -1890,7 +1917,7 @@ ExecDelete(ModifyTableContext *context,
return NULL;
}
- rslot = ExecProcessReturning(context, resultRelInfo, CMD_DELETE,
+ rslot = ExecProcessReturning(context, resultRelInfo, true,
slot, NULL, context->planSlot);
/*
@@ -2327,11 +2354,15 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt,
/* insert index entries for tuple if necessary */
if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None))
- recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
- slot, context->estate,
- true, false,
- NULL, NIL,
- (updateCxt->updateIndexes == TU_Summarizing));
+ {
+ bits32 flags = EIIT_IS_UPDATE;
+
+ if (updateCxt->updateIndexes == TU_Summarizing)
+ flags |= EIIT_ONLY_SUMMARIZING;
+ recheckIndexes = ExecInsertIndexTuples(resultRelInfo, context->estate,
+ flags, slot, NIL,
+ NULL);
+ }
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(context->estate, resultRelInfo,
@@ -2692,56 +2723,37 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
- return ExecProcessReturning(context, resultRelInfo, CMD_UPDATE,
+ return ExecProcessReturning(context, resultRelInfo, false,
oldSlot, slot, context->planSlot);
return NULL;
}
/*
- * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
+ * ExecOnConflictLockRow --- lock the row for ON CONFLICT DO SELECT/UPDATE
*
- * Try to lock tuple for update as part of speculative insertion. If
- * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
- * (but still lock row, even though it may not satisfy estate's
- * snapshot).
+ * Try to lock tuple for update as part of speculative insertion for ON
+ * CONFLICT DO UPDATE or ON CONFLICT DO SELECT FOR UPDATE/SHARE.
*
- * Returns true if we're done (with or without an update), or false if
- * the caller must retry the INSERT from scratch.
+ * Returns true if the row is successfully locked, or false if the caller must
+ * retry the INSERT from scratch.
*/
static bool
-ExecOnConflictUpdate(ModifyTableContext *context,
- ResultRelInfo *resultRelInfo,
- ItemPointer conflictTid,
- TupleTableSlot *excludedSlot,
- bool canSetTag,
- TupleTableSlot **returning)
+ExecOnConflictLockRow(ModifyTableContext *context,
+ TupleTableSlot *existing,
+ ItemPointer conflictTid,
+ Relation relation,
+ LockTupleMode lockmode,
+ bool isUpdate)
{
- ModifyTableState *mtstate = context->mtstate;
- ExprContext *econtext = mtstate->ps.ps_ExprContext;
- Relation relation = resultRelInfo->ri_RelationDesc;
- ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
- TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
TM_FailureData tmfd;
- LockTupleMode lockmode;
TM_Result test;
Datum xminDatum;
TransactionId xmin;
bool isnull;
/*
- * Parse analysis should have blocked ON CONFLICT for all system
- * relations, which includes these. There's no fundamental obstacle to
- * supporting this; we'd just need to handle LOCKTAG_TUPLE like the other
- * ExecUpdate() caller.
- */
- Assert(!resultRelInfo->ri_needLockTagTuple);
-
- /* Determine lock mode to use */
- lockmode = ExecUpdateLockMode(context->estate, resultRelInfo);
-
- /*
- * Lock tuple for update. Don't follow updates when tuple cannot be
+ * Lock tuple with lockmode. Don't follow updates when tuple cannot be
* locked without doing so. A row locking conflict here means our
* previous conclusion that the tuple is conclusively committed is not
* true anymore.
@@ -2786,7 +2798,7 @@ ExecOnConflictUpdate(ModifyTableContext *context,
(errcode(ERRCODE_CARDINALITY_VIOLATION),
/* translator: %s is a SQL command name */
errmsg("%s command cannot affect row a second time",
- "ON CONFLICT DO UPDATE"),
+ isUpdate ? "ON CONFLICT DO UPDATE" : "ON CONFLICT DO SELECT"),
errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
/* This shouldn't happen */
@@ -2834,6 +2846,50 @@ ExecOnConflictUpdate(ModifyTableContext *context,
}
/* Success, the tuple is locked. */
+ return true;
+}
+
+/*
+ * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
+ *
+ * Try to lock tuple for update as part of speculative insertion. If
+ * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
+ * (but still lock row, even though it may not satisfy estate's
+ * snapshot).
+ *
+ * Returns true if we're done (with or without an update), or false if
+ * the caller must retry the INSERT from scratch.
+ */
+static bool
+ExecOnConflictUpdate(ModifyTableContext *context,
+ ResultRelInfo *resultRelInfo,
+ ItemPointer conflictTid,
+ TupleTableSlot *excludedSlot,
+ bool canSetTag,
+ TupleTableSlot **returning)
+{
+ ModifyTableState *mtstate = context->mtstate;
+ ExprContext *econtext = mtstate->ps.ps_ExprContext;
+ Relation relation = resultRelInfo->ri_RelationDesc;
+ ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+ TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
+ LockTupleMode lockmode;
+
+ /*
+ * Parse analysis should have blocked ON CONFLICT for all system
+ * relations, which includes these. There's no fundamental obstacle to
+ * supporting this; we'd just need to handle LOCKTAG_TUPLE like the other
+ * ExecUpdate() caller.
+ */
+ Assert(!resultRelInfo->ri_needLockTagTuple);
+
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(context->estate, resultRelInfo);
+
+ /* Lock tuple for update */
+ if (!ExecOnConflictLockRow(context, existing, conflictTid,
+ resultRelInfo->ri_RelationDesc, lockmode, true))
+ return false;
/*
* Verify that the tuple is visible to our MVCC snapshot if the current
@@ -2875,11 +2931,13 @@ ExecOnConflictUpdate(ModifyTableContext *context,
* security barrier quals (if any), enforced here as RLS checks/WCOs.
*
* The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
- * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
- * but that's almost the extent of its special handling for ON
- * CONFLICT DO UPDATE.
+ * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK.
+ * Since SELECT permission on the target table is always required for
+ * INSERT ... ON CONFLICT DO UPDATE, the rewriter also adds SELECT RLS
+ * checks/WCOs for SELECT security quals, using WCOs of the same kind,
+ * and this check enforces them too.
*
- * The rewriter will also have associated UPDATE applicable straight
+ * The rewriter will also have associated UPDATE-applicable straight
* RLS checks/WCOs for the benefit of the ExecUpdate() call that
* follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
* kinds, so there is no danger of spurious over-enforcement in the
@@ -2924,6 +2982,141 @@ ExecOnConflictUpdate(ModifyTableContext *context,
return true;
}
+/*
+ * ExecOnConflictSelect --- execute SELECT of INSERT ON CONFLICT DO SELECT
+ *
+ * If SELECT FOR UPDATE/SHARE is specified, try to lock tuple as part of
+ * speculative insertion. If a qual originating from ON CONFLICT DO SELECT is
+ * satisfied, select (but still lock row, even though it may not satisfy
+ * estate's snapshot).
+ *
+ * Returns true if we're done (with or without a select), or false if the
+ * caller must retry the INSERT from scratch.
+ */
+static bool
+ExecOnConflictSelect(ModifyTableContext *context,
+ ResultRelInfo *resultRelInfo,
+ ItemPointer conflictTid,
+ TupleTableSlot *excludedSlot,
+ bool canSetTag,
+ TupleTableSlot **returning)
+{
+ ModifyTableState *mtstate = context->mtstate;
+ ExprContext *econtext = mtstate->ps.ps_ExprContext;
+ Relation relation = resultRelInfo->ri_RelationDesc;
+ ExprState *onConflictSelectWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
+ TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
+ LockClauseStrength lockStrength = resultRelInfo->ri_onConflict->oc_LockStrength;
+
+ /*
+ * Parse analysis should have blocked ON CONFLICT for all system
+ * relations, which includes these. There's no fundamental obstacle to
+ * supporting this; we'd just need to handle LOCKTAG_TUPLE appropriately.
+ */
+ Assert(!resultRelInfo->ri_needLockTagTuple);
+
+ /* Fetch/lock existing tuple, according to the requested lock strength */
+ if (lockStrength == LCS_NONE)
+ {
+ if (!table_tuple_fetch_row_version(relation,
+ conflictTid,
+ SnapshotAny,
+ existing))
+ elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+ }
+ else
+ {
+ LockTupleMode lockmode;
+
+ switch (lockStrength)
+ {
+ case LCS_FORKEYSHARE:
+ lockmode = LockTupleKeyShare;
+ break;
+ case LCS_FORSHARE:
+ lockmode = LockTupleShare;
+ break;
+ case LCS_FORNOKEYUPDATE:
+ lockmode = LockTupleNoKeyExclusive;
+ break;
+ case LCS_FORUPDATE:
+ lockmode = LockTupleExclusive;
+ break;
+ default:
+ elog(ERROR, "Unexpected lock strength %d", (int) lockStrength);
+ }
+
+ if (!ExecOnConflictLockRow(context, existing, conflictTid,
+ resultRelInfo->ri_RelationDesc, lockmode, false))
+ return false;
+ }
+
+ /*
+ * Verify that the tuple is visible to our MVCC snapshot if the current
+ * isolation level mandates that. See comments in ExecOnConflictUpdate().
+ */
+ ExecCheckTupleVisible(context->estate, relation, existing);
+
+ /*
+ * Make tuple and any needed join variables available to ExecQual. The
+ * EXCLUDED tuple is installed in ecxt_innertuple, while the target's
+ * existing tuple is installed in the scantuple. EXCLUDED has been made
+ * to reference INNER_VAR in setrefs.c, but there is no other redirection.
+ */
+ econtext->ecxt_scantuple = existing;
+ econtext->ecxt_innertuple = excludedSlot;
+ econtext->ecxt_outertuple = NULL;
+
+ if (!ExecQual(onConflictSelectWhere, econtext))
+ {
+ ExecClearTuple(existing); /* see return below */
+ InstrCountFiltered1(&mtstate->ps, 1);
+ return true; /* done with the tuple */
+ }
+
+ if (resultRelInfo->ri_WithCheckOptions != NIL)
+ {
+ /*
+ * Check target's existing tuple against SELECT-applicable USING
+ * security barrier quals (if any), enforced here as RLS checks/WCOs.
+ *
+ * The rewriter creates WCOs from the USING quals of SELECT policies,
+ * and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK. If FOR
+ * UPDATE/SHARE was specified, UPDATE permissions are required on the
+ * target table, and the rewriter also adds WCOs built from the USING
+ * quals of UPDATE policies, using WCOs of the same kind, and this
+ * check enforces them too.
+ */
+ ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
+ existing,
+ mtstate->ps.state);
+ }
+
+ /* RETURNING is required for DO SELECT */
+ Assert(resultRelInfo->ri_projectReturning);
+
+ *returning = ExecProcessReturning(context, resultRelInfo, false,
+ existing, existing, context->planSlot);
+
+ if (canSetTag)
+ context->estate->es_processed++;
+
+ /*
+ * Before releasing the existing tuple, make sure that the returning slot
+ * has a local copy of any pass-by-reference values.
+ */
+ ExecMaterializeSlot(*returning);
+
+ /*
+ * Clear out existing tuple, as there might not be another conflict among
+ * the next input rows. Don't want to hold resources till the end of the
+ * query.
+ */
+ ExecClearTuple(existing);
+
+ return true;
+}
+
/*
* Perform MERGE.
*/
@@ -3549,7 +3742,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
case CMD_UPDATE:
rslot = ExecProcessReturning(context,
resultRelInfo,
- CMD_UPDATE,
+ false,
resultRelInfo->ri_oldTupleSlot,
newslot,
context->planSlot);
@@ -3558,7 +3751,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
case CMD_DELETE:
rslot = ExecProcessReturning(context,
resultRelInfo,
- CMD_DELETE,
+ true,
resultRelInfo->ri_oldTupleSlot,
NULL,
context->planSlot);
@@ -4329,7 +4522,8 @@ ExecModifyTable(PlanState *pstate)
Assert((resultRelInfo->ri_projectReturning->pi_state.flags & EEO_FLAG_HAS_OLD) == 0 &&
(resultRelInfo->ri_projectReturning->pi_state.flags & EEO_FLAG_HAS_NEW) == 0);
- slot = ExecProcessReturning(&context, resultRelInfo, operation,
+ slot = ExecProcessReturning(&context, resultRelInfo,
+ operation == CMD_DELETE,
NULL, NULL, context.planSlot);
return slot;
@@ -5031,49 +5225,60 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
/*
- * If needed, Initialize target list, projection and qual for ON CONFLICT
- * DO UPDATE.
+ * For ON CONFLICT DO SELECT/UPDATE, initialize the ON CONFLICT action
+ * state.
*/
- if (node->onConflictAction == ONCONFLICT_UPDATE)
+ if (node->onConflictAction == ONCONFLICT_UPDATE ||
+ node->onConflictAction == ONCONFLICT_SELECT)
{
- OnConflictSetState *onconfl = makeNode(OnConflictSetState);
- ExprContext *econtext;
- TupleDesc relationDesc;
+ OnConflictActionState *onconfl = makeNode(OnConflictActionState);
/* already exists if created by RETURNING processing above */
if (mtstate->ps.ps_ExprContext == NULL)
ExecAssignExprContext(estate, &mtstate->ps);
- econtext = mtstate->ps.ps_ExprContext;
- relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
-
- /* create state for DO UPDATE SET operation */
+ /* action state for DO SELECT/UPDATE */
resultRelInfo->ri_onConflict = onconfl;
+ /* lock strength for DO SELECT [FOR UPDATE/SHARE] */
+ onconfl->oc_LockStrength = node->onConflictLockStrength;
+
/* initialize slot for the existing tuple */
onconfl->oc_Existing =
table_slot_create(resultRelInfo->ri_RelationDesc,
&mtstate->ps.state->es_tupleTable);
/*
- * Create the tuple slot for the UPDATE SET projection. We want a slot
- * of the table's type here, because the slot will be used to insert
- * into the table, and for RETURNING processing - which may access
- * system attributes.
+ * For ON CONFLICT DO UPDATE, initialize target list and projection.
*/
- onconfl->oc_ProjSlot =
- table_slot_create(resultRelInfo->ri_RelationDesc,
- &mtstate->ps.state->es_tupleTable);
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ {
+ ExprContext *econtext;
+ TupleDesc relationDesc;
+
+ econtext = mtstate->ps.ps_ExprContext;
+ relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
- /* build UPDATE SET projection state */
- onconfl->oc_ProjInfo =
- ExecBuildUpdateProjection(node->onConflictSet,
- true,
- node->onConflictCols,
- relationDesc,
- econtext,
- onconfl->oc_ProjSlot,
- &mtstate->ps);
+ /*
+ * Create the tuple slot for the UPDATE SET projection. We want a
+ * slot of the table's type here, because the slot will be used to
+ * insert into the table, and for RETURNING processing - which may
+ * access system attributes.
+ */
+ onconfl->oc_ProjSlot =
+ table_slot_create(resultRelInfo->ri_RelationDesc,
+ &mtstate->ps.state->es_tupleTable);
+
+ /* build UPDATE SET projection state */
+ onconfl->oc_ProjInfo =
+ ExecBuildUpdateProjection(node->onConflictSet,
+ true,
+ node->onConflictCols,
+ relationDesc,
+ econtext,
+ onconfl->oc_ProjSlot,
+ &mtstate->ps);
+ }
/* initialize state to evaluate the WHERE clause, if any */
if (node->onConflictWhere)
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 4aa28918e90..503817da65b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -79,13 +79,13 @@ MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
{
case TIDLessEqOperator:
tidopexpr->inclusive = true;
- /* fall through */
+ pg_fallthrough;
case TIDLessOperator:
tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND;
break;
case TIDGreaterEqOperator:
tidopexpr->inclusive = true;
- /* fall through */
+ pg_fallthrough;
case TIDGreaterOperator:
tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND;
break;
diff --git a/src/backend/executor/tstoreReceiver.c b/src/backend/executor/tstoreReceiver.c
index 2ce96b69402..8531d4ca432 100644
--- a/src/backend/executor/tstoreReceiver.c
+++ b/src/backend/executor/tstoreReceiver.c
@@ -161,7 +161,7 @@ tstoreReceiveSlot_detoast(TupleTableSlot *slot, DestReceiver *self)
{
if (VARATT_IS_EXTERNAL(DatumGetPointer(val)))
{
- val = PointerGetDatum(detoast_external_attr((struct varlena *)
+ val = PointerGetDatum(detoast_external_attr((varlena *)
DatumGetPointer(val)));
myState->tofree[nfree++] = val;
}
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c
index 4636b90cd0f..c8a1f841293 100644
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -81,7 +81,7 @@ extern Datum AttributeTemplate(PG_FUNCTION_ARGS);
Datum
AttributeTemplate(PG_FUNCTION_ARGS)
{
- AssertVariableIsOfType(&AttributeTemplate, PGFunction);
+ StaticAssertVariableIsOfType(&AttributeTemplate, PGFunction);
PG_RETURN_NULL();
}
@@ -99,8 +99,8 @@ ExecEvalSubroutineTemplate(ExprState *state,
struct ExprEvalStep *op,
ExprContext *econtext)
{
- AssertVariableIsOfType(&ExecEvalSubroutineTemplate,
- ExecEvalSubroutine);
+ StaticAssertVariableIsOfType(&ExecEvalSubroutineTemplate,
+ ExecEvalSubroutine);
}
extern bool ExecEvalBoolSubroutineTemplate(ExprState *state,
@@ -111,8 +111,8 @@ ExecEvalBoolSubroutineTemplate(ExprState *state,
struct ExprEvalStep *op,
ExprContext *econtext)
{
- AssertVariableIsOfType(&ExecEvalBoolSubroutineTemplate,
- ExecEvalBoolSubroutine);
+ StaticAssertVariableIsOfType(&ExecEvalBoolSubroutineTemplate,
+ ExecEvalBoolSubroutine);
return false;
}
diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c
index 795bfed8d19..e04aa2e68ed 100644
--- a/src/backend/libpq/auth.c
+++ b/src/backend/libpq/auth.c
@@ -2002,7 +2002,7 @@ pam_passwd_conv_proc(int num_msg, PG_PAM_CONST struct pam_message **msg,
ereport(LOG,
(errmsg("error from underlying PAM layer: %s",
msg[i]->msg)));
- /* FALL THROUGH */
+ pg_fallthrough;
case PAM_TEXT_INFO:
/* we don't bother to log TEXT_INFO messages */
if ((reply[i].resp = strdup("")) == NULL)
diff --git a/src/backend/libpq/crypt.c b/src/backend/libpq/crypt.c
index 52722060451..dbdd0e40f41 100644
--- a/src/backend/libpq/crypt.c
+++ b/src/backend/libpq/crypt.c
@@ -20,10 +20,15 @@
#include "common/scram-common.h"
#include "libpq/crypt.h"
#include "libpq/scram.h"
+#include "miscadmin.h"
#include "utils/builtins.h"
+#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
+/* Threshold for password expiration warnings. */
+int password_expiration_warning_threshold = 604800;
+
/* Enables deprecation warnings for MD5 passwords. */
bool md5_password_warnings = true;
@@ -71,13 +76,71 @@ get_role_password(const char *role, const char **logdetail)
ReleaseSysCache(roleTup);
/*
- * Password OK, but check to be sure we are not past rolvaliduntil
+ * Password OK, but check to be sure we are not past rolvaliduntil or
+ * password_expiration_warning_threshold.
*/
- if (!isnull && vuntil < GetCurrentTimestamp())
+ if (!isnull)
{
- *logdetail = psprintf(_("User \"%s\" has an expired password."),
- role);
- return NULL;
+ TimestampTz now = GetCurrentTimestamp();
+ uint64 expire_time = TimestampDifferenceMicroseconds(now, vuntil);
+
+ /*
+ * If we're past rolvaliduntil, the connection attempt should fail, so
+ * update logdetail and return NULL.
+ */
+ if (vuntil < now)
+ {
+ *logdetail = psprintf(_("User \"%s\" has an expired password."),
+ role);
+ return NULL;
+ }
+
+ /*
+ * If we're past the warning threshold, the connection attempt should
+ * succeed, but we still want to emit a warning. To do so, we queue
+ * the warning message using StoreConnectionWarning() so that it will
+ * be emitted at the end of InitPostgres(), and we return normally.
+ */
+ if (expire_time / USECS_PER_SEC < password_expiration_warning_threshold)
+ {
+ MemoryContext oldcontext;
+ int days;
+ int hours;
+ int minutes;
+ char *warning;
+ char *detail;
+
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ days = expire_time / USECS_PER_DAY;
+ hours = (expire_time % USECS_PER_DAY) / USECS_PER_HOUR;
+ minutes = (expire_time % USECS_PER_HOUR) / USECS_PER_MINUTE;
+
+ warning = pstrdup(_("role password will expire soon"));
+
+ if (days > 0)
+ detail = psprintf(ngettext("The password for role \"%s\" will expire in %d day.",
+ "The password for role \"%s\" will expire in %d days.",
+ days),
+ role, days);
+ else if (hours > 0)
+ detail = psprintf(ngettext("The password for role \"%s\" will expire in %d hour.",
+ "The password for role \"%s\" will expire in %d hours.",
+ hours),
+ role, hours);
+ else if (minutes > 0)
+ detail = psprintf(ngettext("The password for role \"%s\" will expire in %d minute.",
+ "The password for role \"%s\" will expire in %d minutes.",
+ minutes),
+ role, minutes);
+ else
+ detail = psprintf(_("The password for role \"%s\" will expire in less than 1 minute."),
+ role);
+
+ StoreConnectionWarning(warning, detail);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
}
return shadow_pass;
diff --git a/src/backend/libpq/pg_hba.conf.sample b/src/backend/libpq/pg_hba.conf.sample
index b64c8dea97c..475100f8865 100644
--- a/src/backend/libpq/pg_hba.conf.sample
+++ b/src/backend/libpq/pg_hba.conf.sample
@@ -109,14 +109,14 @@
# TYPE DATABASE USER ADDRESS METHOD
-@remove-line-for-nolocal@# "local" is for Unix domain socket connections only
-@remove-line-for-nolocal@local all all @authmethodlocal@
+# "local" is for Unix domain socket connections only
+local all all @authmethodlocal@
# IPv4 local connections:
host all all 127.0.0.1/32 @authmethodhost@
# IPv6 local connections:
host all all ::1/128 @authmethodhost@
# Allow replication connections from localhost, by a user with the
# replication privilege.
-@remove-line-for-nolocal@local replication all @authmethodlocal@
+local replication all @authmethodlocal@
host replication all 127.0.0.1/32 @authmethodhost@
host replication all ::1/128 @authmethodhost@
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index b4581e54d93..90275e25872 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -128,8 +128,10 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root,
Relids required_outer);
static void accumulate_append_subpath(Path *path,
List **subpaths,
- List **special_subpaths);
-static Path *get_singleton_append_subpath(Path *path);
+ List **special_subpaths,
+ List **child_append_relid_sets);
+static Path *get_singleton_append_subpath(Path *path,
+ List **child_append_relid_sets);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
@@ -1404,22 +1406,21 @@ void
add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *live_childrels)
{
- List *subpaths = NIL;
- bool subpaths_valid = true;
- List *startup_subpaths = NIL;
- bool startup_subpaths_valid = true;
- List *partial_subpaths = NIL;
- List *pa_partial_subpaths = NIL;
- List *pa_nonpartial_subpaths = NIL;
- bool partial_subpaths_valid = true;
- bool pa_subpaths_valid;
+ AppendPathInput unparameterized = {0};
+ AppendPathInput startup = {0};
+ AppendPathInput partial_only = {0};
+ AppendPathInput parallel_append = {0};
+ bool unparameterized_valid = true;
+ bool startup_valid = true;
+ bool partial_only_valid = true;
+ bool parallel_append_valid = true;
List *all_child_pathkeys = NIL;
List *all_child_outers = NIL;
ListCell *l;
double partial_rows = -1;
/* If appropriate, consider parallel append */
- pa_subpaths_valid = enable_parallel_append && rel->consider_parallel;
+ parallel_append_valid = enable_parallel_append && rel->consider_parallel;
/*
* For every non-dummy child, remember the cheapest path. Also, identify
@@ -1443,9 +1444,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (childrel->pathlist != NIL &&
childrel->cheapest_total_path->param_info == NULL)
accumulate_append_subpath(childrel->cheapest_total_path,
- &subpaths, NULL);
+ &unparameterized.subpaths, NULL, &unparameterized.child_append_relid_sets);
else
- subpaths_valid = false;
+ unparameterized_valid = false;
/*
* When the planner is considering cheap startup plans, we'll also
@@ -1471,11 +1472,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
/* cheapest_startup_path must not be a parameterized path. */
Assert(cheapest_path->param_info == NULL);
accumulate_append_subpath(cheapest_path,
- &startup_subpaths,
- NULL);
+ &startup.subpaths,
+ NULL,
+ &startup.child_append_relid_sets);
}
else
- startup_subpaths_valid = false;
+ startup_valid = false;
/* Same idea, but for a partial plan. */
@@ -1483,16 +1485,17 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
{
cheapest_partial_path = linitial(childrel->partial_pathlist);
accumulate_append_subpath(cheapest_partial_path,
- &partial_subpaths, NULL);
+ &partial_only.partial_subpaths, NULL,
+ &partial_only.child_append_relid_sets);
}
else
- partial_subpaths_valid = false;
+ partial_only_valid = false;
/*
* Same idea, but for a parallel append mixing partial and non-partial
* paths.
*/
- if (pa_subpaths_valid)
+ if (parallel_append_valid)
{
Path *nppath = NULL;
@@ -1502,7 +1505,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (cheapest_partial_path == NULL && nppath == NULL)
{
/* Neither a partial nor a parallel-safe path? Forget it. */
- pa_subpaths_valid = false;
+ parallel_append_valid = false;
}
else if (nppath == NULL ||
(cheapest_partial_path != NULL &&
@@ -1511,8 +1514,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
/* Partial path is cheaper or the only option. */
Assert(cheapest_partial_path != NULL);
accumulate_append_subpath(cheapest_partial_path,
- &pa_partial_subpaths,
- &pa_nonpartial_subpaths);
+ ¶llel_append.partial_subpaths,
+ ¶llel_append.subpaths,
+ ¶llel_append.child_append_relid_sets);
}
else
{
@@ -1530,8 +1534,9 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* figure that out.
*/
accumulate_append_subpath(nppath,
- &pa_nonpartial_subpaths,
- NULL);
+ ¶llel_append.subpaths,
+ NULL,
+ ¶llel_append.child_append_relid_sets);
}
}
@@ -1605,28 +1610,28 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* unparameterized Append path for the rel. (Note: this is correct even
* if we have zero or one live subpath due to constraint exclusion.)
*/
- if (subpaths_valid)
- add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL,
+ if (unparameterized_valid)
+ add_path(rel, (Path *) create_append_path(root, rel, unparameterized,
NIL, NULL, 0, false,
-1));
/* build an AppendPath for the cheap startup paths, if valid */
- if (startup_subpaths_valid)
- add_path(rel, (Path *) create_append_path(root, rel, startup_subpaths,
- NIL, NIL, NULL, 0, false, -1));
+ if (startup_valid)
+ add_path(rel, (Path *) create_append_path(root, rel, startup,
+ NIL, NULL, 0, false, -1));
/*
* Consider an append of unordered, unparameterized partial paths. Make
* it parallel-aware if possible.
*/
- if (partial_subpaths_valid && partial_subpaths != NIL)
+ if (partial_only_valid && partial_only.partial_subpaths != NIL)
{
AppendPath *appendpath;
ListCell *lc;
int parallel_workers = 0;
/* Find the highest number of workers requested for any subpath. */
- foreach(lc, partial_subpaths)
+ foreach(lc, partial_only.partial_subpaths)
{
Path *path = lfirst(lc);
@@ -1653,7 +1658,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
Assert(parallel_workers > 0);
/* Generate a partial append path. */
- appendpath = create_append_path(root, rel, NIL, partial_subpaths,
+ appendpath = create_append_path(root, rel, partial_only,
NIL, NULL, parallel_workers,
enable_parallel_append,
-1);
@@ -1674,7 +1679,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* a non-partial path that is substantially cheaper than any partial path;
* otherwise, we should use the append path added in the previous step.)
*/
- if (pa_subpaths_valid && pa_nonpartial_subpaths != NIL)
+ if (parallel_append_valid && parallel_append.subpaths != NIL)
{
AppendPath *appendpath;
ListCell *lc;
@@ -1684,7 +1689,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* Find the highest number of workers requested for any partial
* subpath.
*/
- foreach(lc, pa_partial_subpaths)
+ foreach(lc, parallel_append.partial_subpaths)
{
Path *path = lfirst(lc);
@@ -1702,8 +1707,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
max_parallel_workers_per_gather);
Assert(parallel_workers > 0);
- appendpath = create_append_path(root, rel, pa_nonpartial_subpaths,
- pa_partial_subpaths,
+ appendpath = create_append_path(root, rel, parallel_append,
NIL, NULL, parallel_workers, true,
partial_rows);
add_partial_path(rel, (Path *) appendpath);
@@ -1713,7 +1717,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
* Also build unparameterized ordered append paths based on the collected
* list of child pathkeys.
*/
- if (subpaths_valid)
+ if (unparameterized_valid)
generate_orderedappend_paths(root, rel, live_childrels,
all_child_pathkeys);
@@ -1734,10 +1738,10 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
{
Relids required_outer = (Relids) lfirst(l);
ListCell *lcr;
+ AppendPathInput parameterized = {0};
+ bool parameterized_valid = true;
/* Select the child paths for an Append with this parameterization */
- subpaths = NIL;
- subpaths_valid = true;
foreach(lcr, live_childrels)
{
RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
@@ -1746,7 +1750,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (childrel->pathlist == NIL)
{
/* failed to make a suitable path for this child */
- subpaths_valid = false;
+ parameterized_valid = false;
break;
}
@@ -1756,15 +1760,16 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
if (subpath == NULL)
{
/* failed to make a suitable path for this child */
- subpaths_valid = false;
+ parameterized_valid = false;
break;
}
- accumulate_append_subpath(subpath, &subpaths, NULL);
+ accumulate_append_subpath(subpath, ¶meterized.subpaths, NULL,
+ ¶meterized.child_append_relid_sets);
}
- if (subpaths_valid)
+ if (parameterized_valid)
add_path(rel, (Path *)
- create_append_path(root, rel, subpaths, NIL,
+ create_append_path(root, rel, parameterized,
NIL, required_outer, 0, false,
-1));
}
@@ -1785,13 +1790,14 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
{
Path *path = (Path *) lfirst(l);
AppendPath *appendpath;
+ AppendPathInput append = {0};
/* skip paths with no pathkeys. */
if (path->pathkeys == NIL)
continue;
- appendpath = create_append_path(root, rel, NIL, list_make1(path),
- NIL, NULL,
+ append.partial_subpaths = list_make1(path);
+ appendpath = create_append_path(root, rel, append, NIL, NULL,
path->parallel_workers, true,
partial_rows);
add_partial_path(rel, (Path *) appendpath);
@@ -1873,9 +1879,9 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
foreach(lcp, all_child_pathkeys)
{
List *pathkeys = (List *) lfirst(lcp);
- List *startup_subpaths = NIL;
- List *total_subpaths = NIL;
- List *fractional_subpaths = NIL;
+ AppendPathInput startup = {0};
+ AppendPathInput total = {0};
+ AppendPathInput fractional = {0};
bool startup_neq_total = false;
bool fraction_neq_total = false;
bool match_partition_order;
@@ -2038,16 +2044,23 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
* just a single subpath (and hence aren't doing anything
* useful).
*/
- cheapest_startup = get_singleton_append_subpath(cheapest_startup);
- cheapest_total = get_singleton_append_subpath(cheapest_total);
+ cheapest_startup =
+ get_singleton_append_subpath(cheapest_startup,
+ &startup.child_append_relid_sets);
+ cheapest_total =
+ get_singleton_append_subpath(cheapest_total,
+ &total.child_append_relid_sets);
- startup_subpaths = lappend(startup_subpaths, cheapest_startup);
- total_subpaths = lappend(total_subpaths, cheapest_total);
+ startup.subpaths = lappend(startup.subpaths, cheapest_startup);
+ total.subpaths = lappend(total.subpaths, cheapest_total);
if (cheapest_fractional)
{
- cheapest_fractional = get_singleton_append_subpath(cheapest_fractional);
- fractional_subpaths = lappend(fractional_subpaths, cheapest_fractional);
+ cheapest_fractional =
+ get_singleton_append_subpath(cheapest_fractional,
+ &fractional.child_append_relid_sets);
+ fractional.subpaths =
+ lappend(fractional.subpaths, cheapest_fractional);
}
}
else
@@ -2057,13 +2070,16 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
* child paths for the MergeAppend.
*/
accumulate_append_subpath(cheapest_startup,
- &startup_subpaths, NULL);
+ &startup.subpaths, NULL,
+ &startup.child_append_relid_sets);
accumulate_append_subpath(cheapest_total,
- &total_subpaths, NULL);
+ &total.subpaths, NULL,
+ &total.child_append_relid_sets);
if (cheapest_fractional)
accumulate_append_subpath(cheapest_fractional,
- &fractional_subpaths, NULL);
+ &fractional.subpaths, NULL,
+ &fractional.child_append_relid_sets);
}
}
@@ -2073,8 +2089,7 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
/* We only need Append */
add_path(rel, (Path *) create_append_path(root,
rel,
- startup_subpaths,
- NIL,
+ startup,
pathkeys,
NULL,
0,
@@ -2083,19 +2098,17 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
if (startup_neq_total)
add_path(rel, (Path *) create_append_path(root,
rel,
- total_subpaths,
- NIL,
+ total,
pathkeys,
NULL,
0,
false,
-1));
- if (fractional_subpaths && fraction_neq_total)
+ if (fractional.subpaths && fraction_neq_total)
add_path(rel, (Path *) create_append_path(root,
rel,
- fractional_subpaths,
- NIL,
+ fractional,
pathkeys,
NULL,
0,
@@ -2107,20 +2120,23 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
/* We need MergeAppend */
add_path(rel, (Path *) create_merge_append_path(root,
rel,
- startup_subpaths,
+ startup.subpaths,
+ startup.child_append_relid_sets,
pathkeys,
NULL));
if (startup_neq_total)
add_path(rel, (Path *) create_merge_append_path(root,
rel,
- total_subpaths,
+ total.subpaths,
+ total.child_append_relid_sets,
pathkeys,
NULL));
- if (fractional_subpaths && fraction_neq_total)
+ if (fractional.subpaths && fraction_neq_total)
add_path(rel, (Path *) create_merge_append_path(root,
rel,
- fractional_subpaths,
+ fractional.subpaths,
+ fractional.child_append_relid_sets,
pathkeys,
NULL));
}
@@ -2223,7 +2239,8 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel,
* paths).
*/
static void
-accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
+accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths,
+ List **child_append_relid_sets)
{
if (IsA(path, AppendPath))
{
@@ -2232,6 +2249,11 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
if (!apath->path.parallel_aware || apath->first_partial_path == 0)
{
*subpaths = list_concat(*subpaths, apath->subpaths);
+ *child_append_relid_sets =
+ lappend(*child_append_relid_sets, path->parent->relids);
+ *child_append_relid_sets =
+ list_concat(*child_append_relid_sets,
+ apath->child_append_relid_sets);
return;
}
else if (special_subpaths != NULL)
@@ -2246,6 +2268,11 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
apath->first_partial_path);
*special_subpaths = list_concat(*special_subpaths,
new_special_subpaths);
+ *child_append_relid_sets =
+ lappend(*child_append_relid_sets, path->parent->relids);
+ *child_append_relid_sets =
+ list_concat(*child_append_relid_sets,
+ apath->child_append_relid_sets);
return;
}
}
@@ -2254,6 +2281,11 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
MergeAppendPath *mpath = (MergeAppendPath *) path;
*subpaths = list_concat(*subpaths, mpath->subpaths);
+ *child_append_relid_sets =
+ lappend(*child_append_relid_sets, path->parent->relids);
+ *child_append_relid_sets =
+ list_concat(*child_append_relid_sets,
+ mpath->child_append_relid_sets);
return;
}
@@ -2265,10 +2297,15 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths)
* Returns the single subpath of an Append/MergeAppend, or just
* return 'path' if it's not a single sub-path Append/MergeAppend.
*
+ * As a side effect, whenever we return a single subpath rather than the
+ * original path, add the relid sets for the original path to
+ * child_append_relid_sets, so that those relids don't entirely disappear
+ * from the final plan.
+ *
* Note: 'path' must not be a parallel-aware path.
*/
static Path *
-get_singleton_append_subpath(Path *path)
+get_singleton_append_subpath(Path *path, List **child_append_relid_sets)
{
Assert(!path->parallel_aware);
@@ -2277,14 +2314,28 @@ get_singleton_append_subpath(Path *path)
AppendPath *apath = (AppendPath *) path;
if (list_length(apath->subpaths) == 1)
+ {
+ *child_append_relid_sets =
+ lappend(*child_append_relid_sets, path->parent->relids);
+ *child_append_relid_sets =
+ list_concat(*child_append_relid_sets,
+ apath->child_append_relid_sets);
return (Path *) linitial(apath->subpaths);
+ }
}
else if (IsA(path, MergeAppendPath))
{
MergeAppendPath *mpath = (MergeAppendPath *) path;
if (list_length(mpath->subpaths) == 1)
+ {
+ *child_append_relid_sets =
+ lappend(*child_append_relid_sets, path->parent->relids);
+ *child_append_relid_sets =
+ list_concat(*child_append_relid_sets,
+ mpath->child_append_relid_sets);
return (Path *) linitial(mpath->subpaths);
+ }
}
return path;
@@ -2304,6 +2355,8 @@ get_singleton_append_subpath(Path *path)
static void
set_dummy_rel_pathlist(RelOptInfo *rel)
{
+ AppendPathInput in = {0};
+
/* Set dummy size estimates --- we leave attr_widths[] as zeroes */
rel->rows = 0;
rel->reltarget->width = 0;
@@ -2313,7 +2366,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
rel->partial_pathlist = NIL;
/* Set up the dummy path */
- add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
+ add_path(rel, (Path *) create_append_path(NULL, rel, in,
NIL, rel->lateral_relids,
0, false, -1));
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 4da0b17f137..89ca4e08bf1 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1461,7 +1461,6 @@ cost_tidrangescan(Path *path, PlannerInfo *root,
enable_mask |= PGS_CONSIDER_NONPARTIAL;
path->disabled_nodes =
(baserel->pgs_mask & enable_mask) != enable_mask ? 1 : 0;
- path->disabled_nodes = 0;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;
}
@@ -2590,11 +2589,6 @@ cost_material(Path *path,
double nbytes = relation_byte_size(tuples, width);
double work_mem_bytes = work_mem * (Size) 1024;
- if (path->parallel_workers == 0 &&
- path->parent != NULL &&
- (path->parent->pgs_mask & PGS_CONSIDER_NONPARTIAL) == 0)
- enabled = false;
-
path->rows = tuples;
/*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 1e4246b49d5..044560da7bf 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -1048,6 +1048,7 @@ try_partial_nestloop_path(PlannerInfo *root,
initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
outer_path, inner_path, extra);
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+ workspace.startup_cost,
workspace.total_cost, pathkeys))
return;
@@ -1237,6 +1238,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
extra);
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+ workspace.startup_cost,
workspace.total_cost, pathkeys))
return;
@@ -1369,6 +1371,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path, extra, parallel_hash);
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+ workspace.startup_cost,
workspace.total_cost, NIL))
return;
@@ -1895,8 +1898,17 @@ match_unsorted_outer(PlannerInfo *root,
/*
* Consider materializing the cheapest inner path, unless that is
* disabled or the path in question materializes its output anyway.
+ *
+ * At present, we only consider materialization for non-partial outer
+ * paths, so it's correct to test PGS_CONSIDER_NONPARTIAL here. If we
+ * ever want to consider materialization for partial paths, we'll need
+ * to create matpath whenever PGS_NESTLOOP_MATERIALIZE is set, use it
+ * for partial paths either way, and use it for non-partial paths only
+ * when PGS_CONSIDER_NONPARTIAL is also set.
*/
- if ((extra->pgs_mask & PGS_NESTLOOP_MATERIALIZE) != 0 &&
+ if ((extra->pgs_mask &
+ (PGS_NESTLOOP_MATERIALIZE | PGS_CONSIDER_NONPARTIAL)) ==
+ (PGS_NESTLOOP_MATERIALIZE | PGS_CONSIDER_NONPARTIAL) &&
inner_cheapest_total != NULL &&
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
matpath = (Path *)
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 2615651c073..443e2dca7c0 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1513,6 +1513,7 @@ void
mark_dummy_rel(RelOptInfo *rel)
{
MemoryContext oldcontext;
+ AppendPathInput in = {0};
/* Already marked? */
if (is_dummy_rel(rel))
@@ -1529,7 +1530,7 @@ mark_dummy_rel(RelOptInfo *rel)
rel->partial_pathlist = NIL;
/* Set up the dummy path */
- add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
+ add_path(rel, (Path *) create_append_path(NULL, rel, in,
NIL, rel->lateral_relids,
0, false, -1));
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c26e841f537..21f1988cf22 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1263,6 +1263,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
plan->plan.lefttree = NULL;
plan->plan.righttree = NULL;
plan->apprelids = rel->relids;
+ plan->child_append_relid_sets = best_path->child_append_relid_sets;
if (pathkeys != NIL)
{
@@ -1475,6 +1476,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
plan->lefttree = NULL;
plan->righttree = NULL;
node->apprelids = rel->relids;
+ node->child_append_relid_sets = best_path->child_append_relid_sets;
/*
* Compute sort column info, and adjust MergeAppend's tlist as needed.
@@ -6527,7 +6529,6 @@ materialize_finished_plan(Plan *subplan)
subplan->total_cost -= initplan_cost;
/* Set cost data */
- matpath.parent = NULL;
cost_material(&matpath,
enable_material,
subplan->disabled_nodes,
@@ -7042,6 +7043,7 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
if (!onconflict)
{
node->onConflictAction = ONCONFLICT_NONE;
+ node->onConflictLockStrength = LCS_NONE;
node->onConflictSet = NIL;
node->onConflictCols = NIL;
node->onConflictWhere = NULL;
@@ -7053,6 +7055,9 @@ make_modifytable(PlannerInfo *root, Plan *subplan,
{
node->onConflictAction = onconflict->action;
+ /* Lock strength for ON CONFLICT DO SELECT [FOR UPDATE/SHARE] */
+ node->onConflictLockStrength = onconflict->lockStrength;
+
/*
* Here we convert the ON CONFLICT UPDATE tlist, if any, to the
* executor's convention of having consecutive resno's. The actual
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 757bdc7b1de..42604a0f75c 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -511,7 +511,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
/* Allow plugins to take control after we've initialized "glob" */
if (planner_setup_hook)
- (*planner_setup_hook) (glob, parse, query_string, &tuple_fraction, es);
+ (*planner_setup_hook) (glob, parse, query_string, cursorOptions,
+ &tuple_fraction, es);
/* primary planning entry point (may recurse for subqueries) */
root = subquery_planner(glob, parse, NULL, NULL, false, tuple_fraction,
@@ -654,6 +655,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
result->unprunableRelids = bms_difference(glob->allRelids,
glob->prunableRelids);
result->permInfos = glob->finalrteperminfos;
+ result->subrtinfos = glob->subrtinfos;
result->resultRelations = glob->resultRelations;
result->appendRelations = glob->appendRelations;
result->subplans = glob->subplans;
@@ -664,6 +666,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
result->paramExecTypes = glob->paramExecTypes;
/* utilityStmt should be null, but we might as well copy it */
result->utilityStmt = parse->utilityStmt;
+ result->elidedNodes = glob->elidedNodes;
result->stmt_location = parse->stmt_location;
result->stmt_len = parse->stmt_len;
@@ -3461,11 +3464,11 @@ adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
case PATHKEYS_BETTER2:
/* 'pathkeys' are stronger, use these ones instead */
currpathkeys = pathkeys;
- /* FALLTHROUGH */
+ pg_fallthrough;
case PATHKEYS_BETTER1:
/* 'pathkeys' are less strict */
- /* FALLTHROUGH */
+ pg_fallthrough;
case PATHKEYS_EQUAL:
/* mark this aggregate as covered by 'currpathkeys' */
@@ -4060,7 +4063,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
* might get between 0 and N output rows. Offhand I think that's
* desired.)
*/
- List *paths = NIL;
+ AppendPathInput append = {0};
while (--nrows >= 0)
{
@@ -4068,13 +4071,12 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
create_group_result_path(root, grouped_rel,
grouped_rel->reltarget,
(List *) parse->havingQual);
- paths = lappend(paths, path);
+ append.subpaths = lappend(append.subpaths, path);
}
path = (Path *)
create_append_path(root,
grouped_rel,
- paths,
- NIL,
+ append,
NIL,
NULL,
0,
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 16d200cfb46..1b5b9b5ed9c 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -211,6 +211,9 @@ static List *set_windowagg_runcondition_references(PlannerInfo *root,
List *runcondition,
Plan *plan);
+static void record_elided_node(PlannerGlobal *glob, int plan_node_id,
+ NodeTag elided_type, Bitmapset *relids);
+
/*****************************************************************************
*
@@ -399,6 +402,26 @@ add_rtes_to_flat_rtable(PlannerInfo *root, bool recursing)
Index rti;
ListCell *lc;
+ /*
+ * Record enough information to make it possible for code that looks at
+ * the final range table to understand how it was constructed. (If
+ * finalrtable is still NIL, then this is the very topmost PlannerInfo,
+ * which will always have plan_name == NULL and rtoffset == 0; we omit the
+ * degenerate list entry.)
+ */
+ if (root->glob->finalrtable != NIL)
+ {
+ SubPlanRTInfo *rtinfo = makeNode(SubPlanRTInfo);
+
+ rtinfo->plan_name = root->plan_name;
+ rtinfo->rtoffset = list_length(root->glob->finalrtable);
+
+ /* When recursing = true, it's an unplanned or dummy subquery. */
+ rtinfo->dummy = recursing;
+
+ root->glob->subrtinfos = lappend(root->glob->subrtinfos, rtinfo);
+ }
+
/*
* Add the query's own RTEs to the flattened rangetable.
*
@@ -1140,7 +1163,8 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
* those are already used by RETURNING and it seems better to
* be non-conflicting.
*/
- if (splan->onConflictSet)
+ if (splan->onConflictAction == ONCONFLICT_UPDATE ||
+ splan->onConflictAction == ONCONFLICT_SELECT)
{
indexed_tlist *itlist;
@@ -1440,10 +1464,17 @@ set_subqueryscan_references(PlannerInfo *root,
if (trivial_subqueryscan(plan))
{
+ Index scanrelid;
+
/*
* We can omit the SubqueryScan node and just pull up the subplan.
*/
result = clean_up_removed_plan_level((Plan *) plan, plan->subplan);
+
+ /* Remember that we removed a SubqueryScan */
+ scanrelid = plan->scan.scanrelid + rtoffset;
+ record_elided_node(root->glob, plan->subplan->plan_node_id,
+ T_SubqueryScan, bms_make_singleton(scanrelid));
}
else
{
@@ -1871,7 +1902,17 @@ set_append_references(PlannerInfo *root,
Plan *p = (Plan *) linitial(aplan->appendplans);
if (p->parallel_aware == aplan->plan.parallel_aware)
- return clean_up_removed_plan_level((Plan *) aplan, p);
+ {
+ Plan *result;
+
+ result = clean_up_removed_plan_level((Plan *) aplan, p);
+
+ /* Remember that we removed an Append */
+ record_elided_node(root->glob, p->plan_node_id, T_Append,
+ offset_relid_set(aplan->apprelids, rtoffset));
+
+ return result;
+ }
}
/*
@@ -1939,7 +1980,17 @@ set_mergeappend_references(PlannerInfo *root,
Plan *p = (Plan *) linitial(mplan->mergeplans);
if (p->parallel_aware == mplan->plan.parallel_aware)
- return clean_up_removed_plan_level((Plan *) mplan, p);
+ {
+ Plan *result;
+
+ result = clean_up_removed_plan_level((Plan *) mplan, p);
+
+ /* Remember that we removed a MergeAppend */
+ record_elided_node(root->glob, p->plan_node_id, T_MergeAppend,
+ offset_relid_set(mplan->apprelids, rtoffset));
+
+ return result;
+ }
}
/*
@@ -3096,7 +3147,7 @@ search_indexed_tlist_for_sortgroupref(Expr *node,
* other-relation Vars by OUTER_VAR references, while leaving target Vars
* alone. Thus inner_itlist = NULL and acceptable_rel = the ID of the
* target relation should be passed.
- * 3) ON CONFLICT UPDATE SET/WHERE clauses. Here references to EXCLUDED are
+ * 3) ON CONFLICT SET and WHERE clauses. Here references to EXCLUDED are
* to be replaced with INNER_VAR references, while leaving target Vars (the
* to-be-updated relation) alone. Correspondingly inner_itlist is to be
* EXCLUDED elements, outer_itlist = NULL and acceptable_rel the target
@@ -3754,3 +3805,21 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
return expression_tree_walker(node, extract_query_dependencies_walker,
context);
}
+
+/*
+ * Record some details about a node removed from the plan during setrefs
+ * processing, for the benefit of code trying to reconstruct planner decisions
+ * from examination of the final plan tree.
+ */
+static void
+record_elided_node(PlannerGlobal *glob, int plan_node_id,
+ NodeTag elided_type, Bitmapset *relids)
+{
+ ElidedNode *n = makeNode(ElidedNode);
+
+ n->plan_node_id = plan_node_id;
+ n->elided_type = elided_type;
+ n->relids = relids;
+
+ glob->elidedNodes = lappend(glob->elidedNodes, n);
+}
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index c80bfc88d82..c90f4b32733 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -88,6 +88,8 @@ typedef struct reduce_outer_joins_pass1_state
{
Relids relids; /* base relids within this subtree */
bool contains_outer; /* does subtree contain outer join(s)? */
+ Relids nullable_rels; /* base relids that are nullable within this
+ * subtree */
List *sub_states; /* List of states for subtree components */
} reduce_outer_joins_pass1_state;
@@ -161,6 +163,8 @@ static void reduce_outer_joins_pass2(Node *jtnode,
List *forced_null_vars);
static void report_reduced_full_join(reduce_outer_joins_pass2_state *state2,
int rtindex, Relids relids);
+static bool has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars,
+ reduce_outer_joins_pass1_state *right_state);
static Node *remove_useless_results_recurse(PlannerInfo *root, Node *jtnode,
Node **parent_quals,
Relids *dropped_outer_joins);
@@ -3144,13 +3148,16 @@ flatten_simple_union_all(PlannerInfo *root)
* to each side separately.)
*
* Another transformation we apply here is to recognize cases like
- * SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL;
- * If the join clause is strict for b.y, then only null-extended rows could
- * pass the upper WHERE, and we can conclude that what the query is really
- * specifying is an anti-semijoin. We change the join type from JOIN_LEFT
- * to JOIN_ANTI. The IS NULL clause then becomes redundant, and must be
- * removed to prevent bogus selectivity calculations, but we leave it to
- * distribute_qual_to_rels to get rid of such clauses.
+ * SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL;
+ * If we can prove that b.z must be non-null for any matching row, either
+ * because the join clause is strict for b.z and b.z happens to be the join
+ * key b.y, or because b.z is defined NOT NULL by table constraints and is
+ * not nullable due to lower-level outer joins, then only null-extended rows
+ * could pass the upper WHERE, and we can conclude that what the query is
+ * really specifying is an anti-semijoin. We change the join type from
+ * JOIN_LEFT to JOIN_ANTI. The IS NULL clause then becomes redundant, and
+ * must be removed to prevent bogus selectivity calculations, but we leave
+ * it to distribute_qual_to_rels to get rid of such clauses.
*
* Also, we get rid of JOIN_RIGHT cases by flipping them around to become
* JOIN_LEFT. This saves some code here and in some later planner routines;
@@ -3174,8 +3181,9 @@ reduce_outer_joins(PlannerInfo *root)
* to stop descending the jointree as soon as there are no outer joins
* below our current point. This consideration forces a two-pass process.
* The first pass gathers information about which base rels appear below
- * each side of each join clause, and about whether there are outer
- * join(s) below each side of each join clause. The second pass examines
+ * each side of each join clause, about whether there are outer join(s)
+ * below each side of each join clause, and about which base rels are from
+ * the nullable side of those outer join(s). The second pass examines
* qual clauses and changes join types as it descends the tree.
*/
state1 = reduce_outer_joins_pass1((Node *) root->parse->jointree);
@@ -3243,6 +3251,7 @@ reduce_outer_joins_pass1(Node *jtnode)
result = palloc_object(reduce_outer_joins_pass1_state);
result->relids = NULL;
result->contains_outer = false;
+ result->nullable_rels = NULL;
result->sub_states = NIL;
if (jtnode == NULL)
@@ -3266,29 +3275,62 @@ reduce_outer_joins_pass1(Node *jtnode)
result->relids = bms_add_members(result->relids,
sub_state->relids);
result->contains_outer |= sub_state->contains_outer;
+ result->nullable_rels = bms_add_members(result->nullable_rels,
+ sub_state->nullable_rels);
result->sub_states = lappend(result->sub_states, sub_state);
}
}
else if (IsA(jtnode, JoinExpr))
{
JoinExpr *j = (JoinExpr *) jtnode;
- reduce_outer_joins_pass1_state *sub_state;
+ reduce_outer_joins_pass1_state *left_state;
+ reduce_outer_joins_pass1_state *right_state;
+
+ /* Recurse to children */
+ left_state = reduce_outer_joins_pass1(j->larg);
+ right_state = reduce_outer_joins_pass1(j->rarg);
/* join's own RT index is not wanted in result->relids */
- if (IS_OUTER_JOIN(j->jointype))
- result->contains_outer = true;
-
- sub_state = reduce_outer_joins_pass1(j->larg);
- result->relids = bms_add_members(result->relids,
- sub_state->relids);
- result->contains_outer |= sub_state->contains_outer;
- result->sub_states = lappend(result->sub_states, sub_state);
-
- sub_state = reduce_outer_joins_pass1(j->rarg);
- result->relids = bms_add_members(result->relids,
- sub_state->relids);
- result->contains_outer |= sub_state->contains_outer;
- result->sub_states = lappend(result->sub_states, sub_state);
+ result->relids = bms_union(left_state->relids, right_state->relids);
+
+ /* Store children's states for pass 2 */
+ result->sub_states = list_make2(left_state, right_state);
+
+ /* Collect outer join information */
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_SEMI:
+ /* No new nullability; propagate state from children */
+ result->contains_outer = left_state->contains_outer ||
+ right_state->contains_outer;
+ result->nullable_rels = bms_union(left_state->nullable_rels,
+ right_state->nullable_rels);
+ break;
+ case JOIN_LEFT:
+ case JOIN_ANTI:
+ /* RHS is nullable; LHS keeps existing status */
+ result->contains_outer = true;
+ result->nullable_rels = bms_union(left_state->nullable_rels,
+ right_state->relids);
+ break;
+ case JOIN_RIGHT:
+ /* LHS is nullable; RHS keeps existing status */
+ result->contains_outer = true;
+ result->nullable_rels = bms_union(left_state->relids,
+ right_state->nullable_rels);
+ break;
+ case JOIN_FULL:
+ /* Both sides are nullable */
+ result->contains_outer = true;
+ result->nullable_rels = bms_union(left_state->relids,
+ right_state->relids);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ break;
+ }
}
else
elog(ERROR, "unrecognized node type: %d",
@@ -3440,15 +3482,16 @@ reduce_outer_joins_pass2(Node *jtnode,
/*
* See if we can reduce JOIN_LEFT to JOIN_ANTI. This is the case if
- * the join's own quals are strict for any var that was forced null by
- * higher qual levels. NOTE: there are other ways that we could
- * detect an anti-join, in particular if we were to check whether Vars
- * coming from the RHS must be non-null because of table constraints.
- * That seems complicated and expensive though (in particular, one
- * would have to be wary of lower outer joins). For the moment this
- * seems sufficient.
+ * any var from the RHS was forced null by higher qual levels, but is
+ * known to be non-nullable. We detect this either by seeing if the
+ * join's own quals are strict for the var, or by checking if the var
+ * is defined NOT NULL by table constraints (being careful to exclude
+ * vars that are nullable due to lower-level outer joins). In either
+ * case, the only way the higher qual clause's requirement for NULL
+ * can be met is if the join fails to match, producing a null-extended
+ * row. Thus, we can treat this as an anti-join.
*/
- if (jointype == JOIN_LEFT)
+ if (jointype == JOIN_LEFT && forced_null_vars != NIL)
{
List *nonnullable_vars;
Bitmapset *overlap;
@@ -3460,9 +3503,13 @@ reduce_outer_joins_pass2(Node *jtnode,
* It's not sufficient to check whether nonnullable_vars and
* forced_null_vars overlap: we need to know if the overlap
* includes any RHS variables.
+ *
+ * Also check if any forced-null var is defined NOT NULL by table
+ * constraints.
*/
overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars);
- if (bms_overlap(overlap, right_state->relids))
+ if (bms_overlap(overlap, right_state->relids) ||
+ has_notnull_forced_var(root, forced_null_vars, right_state))
jointype = JOIN_ANTI;
}
@@ -3598,6 +3645,97 @@ report_reduced_full_join(reduce_outer_joins_pass2_state *state2,
state2->partial_reduced = lappend(state2->partial_reduced, statep);
}
+/*
+ * has_notnull_forced_var
+ * Check if "forced_null_vars" contains any Vars belonging to the subtree
+ * indicated by "right_state" that are known to be non-nullable due to
+ * table constraints.
+ *
+ * Note that we must also consider the situation where a NOT NULL Var can be
+ * nulled by lower-level outer joins.
+ *
+ * Helper for reduce_outer_joins_pass2.
+ */
+static bool
+has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars,
+ reduce_outer_joins_pass1_state *right_state)
+{
+ int varno = -1;
+
+ foreach_node(Bitmapset, attrs, forced_null_vars)
+ {
+ RangeTblEntry *rte;
+ Bitmapset *notnullattnums;
+ Bitmapset *forcednullattnums = NULL;
+ int attno;
+
+ varno++;
+
+ /* Skip empty bitmaps */
+ if (bms_is_empty(attrs))
+ continue;
+
+ /* Skip Vars that do not belong to the target relations */
+ if (!bms_is_member(varno, right_state->relids))
+ continue;
+
+ /*
+ * Skip Vars that can be nulled by lower-level outer joins within the
+ * given subtree. These Vars might be NULL even if the schema defines
+ * them as NOT NULL.
+ */
+ if (bms_is_member(varno, right_state->nullable_rels))
+ continue;
+
+ /*
+ * Iterate over attributes and adjust the bitmap indexes by
+ * FirstLowInvalidHeapAttributeNumber to get the actual attribute
+ * numbers.
+ */
+ attno = -1;
+ while ((attno = bms_next_member(attrs, attno)) >= 0)
+ {
+ AttrNumber real_attno = attno + FirstLowInvalidHeapAttributeNumber;
+
+ /* system columns cannot be NULL */
+ if (real_attno < 0)
+ return true;
+
+ forcednullattnums = bms_add_member(forcednullattnums, real_attno);
+ }
+
+ rte = rt_fetch(varno, root->parse->rtable);
+
+ /*
+ * We must skip inheritance parent tables, as some child tables may
+ * have a NOT NULL constraint for a column while others may not. This
+ * cannot happen with partitioned tables, though.
+ */
+ if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
+ {
+ bms_free(forcednullattnums);
+ continue;
+ }
+
+ /* Get the column not-null constraint information for this relation */
+ notnullattnums = find_relation_notnullatts(root, rte->relid);
+
+ /*
+ * Check if any forced-null attributes are defined as NOT NULL by
+ * table constraints.
+ */
+ if (bms_overlap(notnullattnums, forcednullattnums))
+ {
+ bms_free(forcednullattnums);
+ return true;
+ }
+
+ bms_free(forcednullattnums);
+ }
+
+ return false;
+}
+
/*
* remove_useless_result_rtes
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 78c95c36dd5..f50c296e3d9 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -696,9 +696,9 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
ListCell *lc;
ListCell *lc2;
ListCell *lc3;
- List *cheapest_pathlist = NIL;
- List *ordered_pathlist = NIL;
- List *partial_pathlist = NIL;
+ AppendPathInput cheapest = {0};
+ AppendPathInput ordered = {0};
+ AppendPathInput partial = {0};
bool partial_paths_valid = true;
bool consider_parallel = true;
List *rellist;
@@ -783,7 +783,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
if (is_dummy_rel(rel))
continue;
- cheapest_pathlist = lappend(cheapest_pathlist,
+ cheapest.subpaths = lappend(cheapest.subpaths,
rel->cheapest_total_path);
if (try_sorted)
@@ -795,7 +795,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
false);
if (ordered_path != NULL)
- ordered_pathlist = lappend(ordered_pathlist, ordered_path);
+ ordered.subpaths = lappend(ordered.subpaths, ordered_path);
else
{
/*
@@ -818,20 +818,20 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
else if (rel->partial_pathlist == NIL)
partial_paths_valid = false;
else
- partial_pathlist = lappend(partial_pathlist,
- linitial(rel->partial_pathlist));
+ partial.partial_subpaths = lappend(partial.partial_subpaths,
+ linitial(rel->partial_pathlist));
}
}
/* Build result relation. */
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
result_rel->reltarget = create_setop_pathtarget(root, tlist,
- cheapest_pathlist);
+ cheapest.subpaths);
result_rel->consider_parallel = consider_parallel;
result_rel->consider_startup = (root->tuple_fraction > 0);
/* If all UNION children were dummy rels, make the resulting rel dummy */
- if (cheapest_pathlist == NIL)
+ if (cheapest.subpaths == NIL)
{
mark_dummy_rel(result_rel);
@@ -842,8 +842,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
* Append the child results together using the cheapest paths from each
* union child.
*/
- apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
- NIL, NIL, NULL, 0, false, -1);
+ apath = (Path *) create_append_path(root, result_rel, cheapest,
+ NIL, NULL, 0, false, -1);
/*
* Estimate number of groups. For now we just assume the output is unique
@@ -862,7 +862,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
int parallel_workers = 0;
/* Find the highest number of workers requested for any subpath. */
- foreach(lc, partial_pathlist)
+ foreach(lc, partial.partial_subpaths)
{
Path *subpath = lfirst(lc);
@@ -881,14 +881,14 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
if (enable_parallel_append)
{
parallel_workers = Max(parallel_workers,
- pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
+ pg_leftmost_one_pos32(list_length(partial.partial_subpaths)) + 1);
parallel_workers = Min(parallel_workers,
max_parallel_workers_per_gather);
}
Assert(parallel_workers > 0);
papath = (Path *)
- create_append_path(root, result_rel, NIL, partial_pathlist,
+ create_append_path(root, result_rel, partial,
NIL, NULL, parallel_workers,
enable_parallel_append, -1);
gpath = (Path *)
@@ -901,7 +901,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
double dNumGroups;
bool can_sort = grouping_is_sortable(groupList);
bool can_hash = grouping_is_hashable(groupList);
- Path *first_path = linitial(cheapest_pathlist);
+ Path *first_path = linitial(cheapest.subpaths);
/*
* Estimate the number of UNION output rows. In the case when only a
@@ -911,7 +911,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
* contain Vars with varno==0, which estimate_num_groups() wouldn't
* like.
*/
- if (list_length(cheapest_pathlist) == 1 &&
+ if (list_length(cheapest.subpaths) == 1 &&
first_path->parent->reloptkind != RELOPT_UPPER_REL)
{
dNumGroups = estimate_num_groups(root,
@@ -1017,7 +1017,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
path = (Path *) create_merge_append_path(root,
result_rel,
- ordered_pathlist,
+ ordered.subpaths,
+ NIL,
union_pathkeys,
NULL);
@@ -1216,6 +1217,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
if (op->all)
{
Path *apath;
+ AppendPathInput append = {0};
+
+ append.subpaths = list_make1(lpath);
/*
* EXCEPT ALL: If the right-hand input is dummy then we can
@@ -1224,8 +1228,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
* between the set op targetlist and the targetlist of the
* left input. The Append will be removed in setrefs.c.
*/
- apath = (Path *) create_append_path(root, result_rel, list_make1(lpath),
- NIL, NIL, NULL, 0, false, -1);
+ apath = (Path *) create_append_path(root, result_rel,
+ append, NIL, NULL, 0,
+ false, -1);
add_path(result_rel, apath);
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 32204776c45..a41d81734cf 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -1547,7 +1547,7 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
* the intersection of the sets of nonnullable rels, just as
* for OR. Fall through to share code.
*/
- /* FALL THRU */
+ pg_fallthrough;
case OR_EXPR:
/*
@@ -1805,7 +1805,7 @@ find_nonnullable_vars_walker(Node *node, bool top_level)
* the intersection of the sets of nonnullable vars, just as
* for OR. Fall through to share code.
*/
- /* FALL THRU */
+ pg_fallthrough;
case OR_EXPR:
/*
@@ -2705,6 +2705,7 @@ eval_const_expressions_mutator(Node *node,
bool has_null_input = false;
bool all_null_input = true;
bool has_nonconst_input = false;
+ bool has_nullable_nonconst = false;
Expr *simple;
DistinctExpr *newexpr;
@@ -2721,7 +2722,8 @@ eval_const_expressions_mutator(Node *node,
/*
* We must do our own check for NULLs because DistinctExpr has
* different results for NULL input than the underlying
- * operator does.
+ * operator does. We also check if any non-constant input is
+ * potentially nullable.
*/
foreach(arg, args)
{
@@ -2731,12 +2733,24 @@ eval_const_expressions_mutator(Node *node,
all_null_input &= ((Const *) lfirst(arg))->constisnull;
}
else
+ {
has_nonconst_input = true;
+ all_null_input = false;
+
+ if (!has_nullable_nonconst &&
+ !expr_is_nonnullable(context->root,
+ (Expr *) lfirst(arg), false))
+ has_nullable_nonconst = true;
+ }
}
- /* all constants? then can optimize this out */
if (!has_nonconst_input)
{
+ /*
+ * All inputs are constants. We can optimize this out
+ * completely.
+ */
+
/* all nulls? then not distinct */
if (all_null_input)
return makeBoolConst(false, false);
@@ -2781,6 +2795,72 @@ eval_const_expressions_mutator(Node *node,
return (Node *) csimple;
}
}
+ else if (!has_nullable_nonconst)
+ {
+ /*
+ * There are non-constant inputs, but since all of them
+ * are proven non-nullable, "IS DISTINCT FROM" semantics
+ * are much simpler.
+ */
+
+ OpExpr *eqexpr;
+
+ /*
+ * If one input is an explicit NULL constant, and the
+ * other is a non-nullable expression, the result is
+ * always TRUE.
+ */
+ if (has_null_input)
+ return makeBoolConst(true, false);
+
+ /*
+ * Otherwise, both inputs are known non-nullable. In this
+ * case, "IS DISTINCT FROM" is equivalent to the standard
+ * inequality operator (usually "<>"). We convert this to
+ * an OpExpr, which is a more efficient representation for
+ * the planner. It can enable the use of partial indexes
+ * and constraint exclusion. Furthermore, if the clause
+ * is negated (ie, "IS NOT DISTINCT FROM"), the resulting
+ * "=" operator can allow the planner to use index scans,
+ * merge joins, hash joins, and EC-based qual deductions.
+ */
+ eqexpr = makeNode(OpExpr);
+ eqexpr->opno = expr->opno;
+ eqexpr->opfuncid = expr->opfuncid;
+ eqexpr->opresulttype = BOOLOID;
+ eqexpr->opretset = expr->opretset;
+ eqexpr->opcollid = expr->opcollid;
+ eqexpr->inputcollid = expr->inputcollid;
+ eqexpr->args = args;
+ eqexpr->location = expr->location;
+
+ return eval_const_expressions_mutator(negate_clause((Node *) eqexpr),
+ context);
+ }
+ else if (has_null_input)
+ {
+ /*
+ * One input is a nullable non-constant expression, and
+ * the other is an explicit NULL constant. We can
+ * transform this to a NullTest with !argisrow, which is
+ * much more amenable to optimization.
+ */
+
+ NullTest *nt = makeNode(NullTest);
+
+ nt->arg = (Expr *) (IsA(linitial(args), Const) ?
+ lsecond(args) : linitial(args));
+ nt->nulltesttype = IS_NOT_NULL;
+
+ /*
+ * argisrow = false is correct whether or not arg is
+ * composite
+ */
+ nt->argisrow = false;
+ nt->location = expr->location;
+
+ return eval_const_expressions_mutator((Node *) nt, context);
+ }
/*
* The expression cannot be simplified any further, so build
@@ -3630,6 +3710,9 @@ eval_const_expressions_mutator(Node *node,
context);
if (arg && IsA(arg, Const))
{
+ /*
+ * If arg is Const, simplify to constant.
+ */
Const *carg = (Const *) arg;
bool result;
@@ -3666,6 +3749,34 @@ eval_const_expressions_mutator(Node *node,
return makeBoolConst(result, false);
}
+ if (arg && expr_is_nonnullable(context->root, (Expr *) arg, false))
+ {
+ /*
+ * If arg is proven non-nullable, simplify to boolean
+ * expression or constant.
+ */
+ switch (btest->booltesttype)
+ {
+ case IS_TRUE:
+ case IS_NOT_FALSE:
+ return arg;
+
+ case IS_FALSE:
+ case IS_NOT_TRUE:
+ return (Node *) make_notclause((Expr *) arg);
+
+ case IS_UNKNOWN:
+ return makeBoolConst(false, false);
+
+ case IS_NOT_UNKNOWN:
+ return makeBoolConst(true, false);
+
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) btest->booltesttype);
+ break;
+ }
+ }
newbtest = makeNode(BooleanTest);
newbtest->arg = (Expr *) arg;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 7295438ad20..d61f328707f 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -777,10 +777,9 @@ add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
*
* Because we don't consider parameterized paths here, we also don't
* need to consider the row counts as a measure of quality: every path will
- * produce the same number of rows. Neither do we need to consider startup
- * costs: parallelism is only used for plans that will be run to completion.
- * Therefore, this routine is much simpler than add_path: it needs to
- * consider only disabled nodes, pathkeys and total cost.
+ * produce the same number of rows. However, we do need to consider the
+ * startup costs: this partial path could be used beneath a Limit node,
+ * so a fast-start plan could be correct.
*
* As with add_path, we pfree paths that are found to be dominated by
* another partial path; this requires that there be no other references to
@@ -818,52 +817,36 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
/* Compare pathkeys. */
keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
- /* Unless pathkeys are incompatible, keep just one of the two paths. */
+ /*
+ * Unless pathkeys are incompatible, see if one of the paths dominates
+ * the other (both in startup and total cost). It may happen that one
+ * path has lower startup cost, the other has lower total cost.
+ */
if (keyscmp != PATHKEYS_DIFFERENT)
{
- if (unlikely(new_path->disabled_nodes != old_path->disabled_nodes))
+ PathCostComparison costcmp;
+
+ /*
+ * Do a fuzzy cost comparison with standard fuzziness limit.
+ */
+ costcmp = compare_path_costs_fuzzily(new_path, old_path,
+ STD_FUZZ_FACTOR);
+ if (costcmp == COSTS_BETTER1)
{
- if (new_path->disabled_nodes > old_path->disabled_nodes)
- accept_new = false;
- else
+ if (keyscmp == PATHKEYS_BETTER1)
remove_old = true;
}
- else if (new_path->total_cost > old_path->total_cost
- * STD_FUZZ_FACTOR)
+ else if (costcmp == COSTS_BETTER2)
{
- /* New path costs more; keep it only if pathkeys are better. */
- if (keyscmp != PATHKEYS_BETTER1)
+ if (keyscmp == PATHKEYS_BETTER2)
accept_new = false;
}
- else if (old_path->total_cost > new_path->total_cost
- * STD_FUZZ_FACTOR)
+ else if (costcmp == COSTS_EQUAL)
{
- /* Old path costs more; keep it only if pathkeys are better. */
- if (keyscmp != PATHKEYS_BETTER2)
+ if (keyscmp == PATHKEYS_BETTER1)
remove_old = true;
- }
- else if (keyscmp == PATHKEYS_BETTER1)
- {
- /* Costs are about the same, new path has better pathkeys. */
- remove_old = true;
- }
- else if (keyscmp == PATHKEYS_BETTER2)
- {
- /* Costs are about the same, old path has better pathkeys. */
- accept_new = false;
- }
- else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
- {
- /* Pathkeys are the same, and the old path costs more. */
- remove_old = true;
- }
- else
- {
- /*
- * Pathkeys are the same, and new path isn't materially
- * cheaper.
- */
- accept_new = false;
+ else if (keyscmp == PATHKEYS_BETTER2)
+ accept_new = false;
}
}
@@ -878,8 +861,13 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
}
else
{
- /* new belongs after this old path if it has cost >= old's */
- if (new_path->total_cost >= old_path->total_cost)
+ /*
+ * new belongs after this old path if it has more disabled nodes
+ * or if it has the same number of nodes but a greater total cost
+ */
+ if (new_path->disabled_nodes > old_path->disabled_nodes ||
+ (new_path->disabled_nodes == old_path->disabled_nodes &&
+ new_path->total_cost >= old_path->total_cost))
insert_at = foreach_current_index(p1) + 1;
}
@@ -909,16 +897,16 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
* add_partial_path_precheck
* Check whether a proposed new partial path could possibly get accepted.
*
- * Unlike add_path_precheck, we can ignore startup cost and parameterization,
- * since they don't matter for partial paths (see add_partial_path). But
- * we do want to make sure we don't add a partial path if there's already
- * a complete path that dominates it, since in that case the proposed path
- * is surely a loser.
+ * Unlike add_path_precheck, we can ignore parameterization, since it doesn't
+ * matter for partial paths (see add_partial_path). But we do want to make
+ * sure we don't add a partial path if there's already a complete path that
+ * dominates it, since in that case the proposed path is surely a loser.
*/
bool
add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
- Cost total_cost, List *pathkeys)
+ Cost startup_cost, Cost total_cost, List *pathkeys)
{
+ bool consider_startup = parent_rel->consider_startup;
ListCell *p1;
/*
@@ -928,25 +916,80 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
* is clearly superior to some existing partial path -- at least, modulo
* final cost computations. If so, we definitely want to consider it.
*
- * Unlike add_path(), we always compare pathkeys here. This is because we
- * expect partial_pathlist to be very short, and getting a definitive
+ * Unlike add_path(), we never try to exit this loop early. This is because
+ * we expect partial_pathlist to be very short, and getting a definitive
* answer at this stage avoids the need to call add_path_precheck.
*/
foreach(p1, parent_rel->partial_pathlist)
{
Path *old_path = (Path *) lfirst(p1);
+ PathCostComparison costcmp;
PathKeysComparison keyscmp;
- keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
- if (keyscmp != PATHKEYS_DIFFERENT)
+ /*
+ * First, compare costs and disabled nodes. This logic should be
+ * identical to compare_path_costs_fuzzily, except that one of the
+ * paths hasn't been created yet, and the fuzz factor is always
+ * STD_FUZZ_FACTOR.
+ */
+ if (unlikely(old_path->disabled_nodes != disabled_nodes))
{
- if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
- keyscmp != PATHKEYS_BETTER1)
- return false;
- if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
- keyscmp != PATHKEYS_BETTER2)
- return true;
+ if (disabled_nodes < old_path->disabled_nodes)
+ costcmp = COSTS_BETTER1;
+ else
+ costcmp = COSTS_BETTER2;
+ }
+ else if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
+ {
+ if (consider_startup &&
+ old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
+ costcmp = COSTS_DIFFERENT;
+ else
+ costcmp = COSTS_BETTER2;
}
+ else if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR)
+ {
+ if (consider_startup &&
+ startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
+ costcmp = COSTS_DIFFERENT;
+ else
+ costcmp = COSTS_BETTER1;
+ }
+ else if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
+ costcmp = COSTS_BETTER2;
+ else if (old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
+ costcmp = COSTS_BETTER1;
+ else
+ costcmp = COSTS_EQUAL;
+
+ /*
+ * If one path wins on startup cost and the other on total cost, we
+ * can't say for sure which is better.
+ */
+ if (costcmp == COSTS_DIFFERENT)
+ continue;
+
+ /*
+ * If the two paths have different pathkeys, we can't say for sure
+ * which is better.
+ */
+ keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
+ if (keyscmp == PATHKEYS_DIFFERENT)
+ continue;
+
+ /*
+ * If the existing path is cheaper and the pathkeys are equal or worse,
+ * the new path is not interesting.
+ */
+ if (costcmp == COSTS_BETTER2 && keyscmp != PATHKEYS_BETTER1)
+ return false;
+
+ /*
+ * If the new path is cheaper and the pathkeys are equal or better,
+ * it is definitely interesting.
+ */
+ if (costcmp == COSTS_BETTER1 && keyscmp != PATHKEYS_BETTER2)
+ return true;
}
/*
@@ -954,14 +997,9 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
* clearly good enough that it might replace one. Compare it to
* non-parallel plans. If it loses even before accounting for the cost of
* the Gather node, we should definitely reject it.
- *
- * Note that we pass the total_cost to add_path_precheck twice. This is
- * because it's never advantageous to consider the startup cost of a
- * partial path; the resulting plans, if run in parallel, will be run to
- * completion.
*/
- if (!add_path_precheck(parent_rel, disabled_nodes, total_cost, total_cost,
- pathkeys, NULL))
+ if (!add_path_precheck(parent_rel, disabled_nodes, startup_cost,
+ total_cost, pathkeys, NULL))
return false;
return true;
@@ -1077,6 +1115,14 @@ create_index_path(PlannerInfo *root,
cost_index(pathnode, root, loop_count, partial_path);
+ /*
+ * cost_index will set disabled_nodes to 1 if this rel is not allowed to
+ * use index scans in general, but it doesn't have the IndexOptInfo to
+ * know whether this specific index has been disabled.
+ */
+ if (index->disabled)
+ pathnode->path.disabled_nodes = 1;
+
return pathnode;
}
@@ -1298,7 +1344,7 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
AppendPath *
create_append_path(PlannerInfo *root,
RelOptInfo *rel,
- List *subpaths, List *partial_subpaths,
+ AppendPathInput input,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
double rows)
@@ -1308,6 +1354,7 @@ create_append_path(PlannerInfo *root,
Assert(!parallel_aware || parallel_workers > 0);
+ pathnode->child_append_relid_sets = input.child_append_relid_sets;
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
pathnode->path.pathtarget = rel->reltarget;
@@ -1323,7 +1370,7 @@ create_append_path(PlannerInfo *root,
* on the simpler get_appendrel_parampathinfo. There's no point in doing
* the more expensive thing for a dummy path, either.
*/
- if (rel->reloptkind == RELOPT_BASEREL && root && subpaths != NIL)
+ if (rel->reloptkind == RELOPT_BASEREL && root && input.subpaths != NIL)
pathnode->path.param_info = get_baserel_parampathinfo(root,
rel,
required_outer);
@@ -1354,11 +1401,11 @@ create_append_path(PlannerInfo *root,
*/
Assert(pathkeys == NIL);
- list_sort(subpaths, append_total_cost_compare);
- list_sort(partial_subpaths, append_startup_cost_compare);
+ list_sort(input.subpaths, append_total_cost_compare);
+ list_sort(input.partial_subpaths, append_startup_cost_compare);
}
- pathnode->first_partial_path = list_length(subpaths);
- pathnode->subpaths = list_concat(subpaths, partial_subpaths);
+ pathnode->first_partial_path = list_length(input.subpaths);
+ pathnode->subpaths = list_concat(input.subpaths, input.partial_subpaths);
/*
* Apply query-wide LIMIT if known and path is for sole base relation.
@@ -1470,6 +1517,7 @@ MergeAppendPath *
create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
+ List *child_append_relid_sets,
List *pathkeys,
Relids required_outer)
{
@@ -1485,6 +1533,7 @@ create_merge_append_path(PlannerInfo *root,
*/
Assert(bms_is_empty(rel->lateral_relids) && bms_is_empty(required_outer));
+ pathnode->child_append_relid_sets = child_append_relid_sets;
pathnode->path.pathtype = T_MergeAppend;
pathnode->path.parent = rel;
pathnode->path.pathtarget = rel->reltarget;
@@ -3932,11 +3981,12 @@ reparameterize_path(PlannerInfo *root, Path *path,
case T_Append:
{
AppendPath *apath = (AppendPath *) path;
- List *childpaths = NIL;
- List *partialpaths = NIL;
+ AppendPathInput new_append = {0};
int i;
ListCell *lc;
+ new_append.child_append_relid_sets = apath->child_append_relid_sets;
+
/* Reparameterize the children */
i = 0;
foreach(lc, apath->subpaths)
@@ -3950,13 +4000,13 @@ reparameterize_path(PlannerInfo *root, Path *path,
return NULL;
/* We have to re-split the regular and partial paths */
if (i < apath->first_partial_path)
- childpaths = lappend(childpaths, spath);
+ new_append.subpaths = lappend(new_append.subpaths, spath);
else
- partialpaths = lappend(partialpaths, spath);
+ new_append.partial_subpaths = lappend(new_append.partial_subpaths, spath);
i++;
}
return (Path *)
- create_append_path(root, rel, childpaths, partialpaths,
+ create_append_path(root, rel, new_append,
apath->path.pathkeys, required_outer,
apath->path.parallel_workers,
apath->path.parallel_aware,
@@ -3971,10 +4021,10 @@ reparameterize_path(PlannerInfo *root, Path *path,
spath = reparameterize_path(root, spath,
required_outer,
loop_count);
- enabled =
- (mpath->path.disabled_nodes <= spath->disabled_nodes);
if (spath == NULL)
return NULL;
+ enabled =
+ (mpath->path.disabled_nodes <= spath->disabled_nodes);
return (Path *) create_material_path(rel, spath, enabled);
}
case T_Memoize:
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 1b20bc805e6..b2fbd6a082b 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -57,9 +57,6 @@
/* GUC parameter */
int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
-/* Hook for plugins to get control in get_relation_info() */
-get_relation_info_hook_type get_relation_info_hook = NULL;
-
typedef struct NotnullHashEntry
{
Oid relid; /* OID of the relation */
@@ -571,17 +568,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
set_relation_partition_info(root, rel, relation);
table_close(relation, NoLock);
-
- /*
- * Allow a plugin to editorialize on the info we obtained from the
- * catalogs. Actions might include altering the assumed relation size,
- * removing an index, or adding a hypothetical index to the indexlist.
- *
- * An extension can also modify rel->pgs_mask here to control path
- * generation.
- */
- if (get_relation_info_hook)
- (*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
}
/*
@@ -841,9 +827,9 @@ infer_arbiter_indexes(PlannerInfo *root)
/*
* Quickly return NIL for ON CONFLICT DO NOTHING without an inference
- * specification or named constraint. ON CONFLICT DO UPDATE statements
- * must always provide one or the other (but parser ought to have caught
- * that already).
+ * specification or named constraint. ON CONFLICT DO SELECT/UPDATE
+ * statements must always provide one or the other (but parser ought to
+ * have caught that already).
*/
if (onconflict->arbiterElems == NIL &&
onconflict->constraint == InvalidOid)
@@ -1024,10 +1010,17 @@ infer_arbiter_indexes(PlannerInfo *root)
*/
if (indexOidFromConstraint == idxForm->indexrelid)
{
- if (idxForm->indisexclusion && onconflict->action == ONCONFLICT_UPDATE)
+ /*
+ * ON CONFLICT DO UPDATE and ON CONFLICT DO SELECT are not
+ * supported with exclusion constraints.
+ */
+ if (idxForm->indisexclusion &&
+ (onconflict->action == ONCONFLICT_UPDATE ||
+ onconflict->action == ONCONFLICT_SELECT))
ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints")));
+ errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("ON CONFLICT DO %s not supported with exclusion constraints",
+ onconflict->action == ONCONFLICT_UPDATE ? "UPDATE" : "SELECT"));
/* Consider this one a match already */
results = lappend_oid(results, idxForm->indexrelid);
@@ -1037,10 +1030,12 @@ infer_arbiter_indexes(PlannerInfo *root)
else if (indexOidFromConstraint != InvalidOid)
{
/*
- * In the case of "ON constraint_name DO UPDATE" we need to skip
- * non-unique candidates.
+ * In the case of "ON constraint_name DO SELECT/UPDATE" we need to
+ * skip non-unique candidates.
*/
- if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE)
+ if (!idxForm->indisunique &&
+ (onconflict->action == ONCONFLICT_UPDATE ||
+ onconflict->action == ONCONFLICT_SELECT))
continue;
}
else
diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c
index 26858d1d2b0..fe15881af4e 100644
--- a/src/backend/optimizer/util/predtest.c
+++ b/src/backend/optimizer/util/predtest.c
@@ -109,7 +109,8 @@ static bool operator_same_subexprs_proof(Oid pred_op, Oid clause_op,
static bool operator_same_subexprs_lookup(Oid pred_op, Oid clause_op,
bool refute_it);
static Oid get_btree_test_op(Oid pred_op, Oid clause_op, bool refute_it);
-static void InvalidateOprProofCacheCallBack(Datum arg, int cacheid, uint32 hashvalue);
+static void InvalidateOprProofCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
/*
@@ -2343,7 +2344,8 @@ get_btree_test_op(Oid pred_op, Oid clause_op, bool refute_it)
* Callback for pg_amop inval events
*/
static void
-InvalidateOprProofCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateOprProofCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
OprProofCacheEntry *hentry;
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index a714c83f1ba..4a89eda0142 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -47,6 +47,9 @@ typedef struct JoinHashEntry
RelOptInfo *join_rel;
} JoinHashEntry;
+/* Hook for plugins to get control in build_simple_rel() */
+build_simple_rel_hook_type build_simple_rel_hook = NULL;
+
/* Hook for plugins to get control during joinrel setup */
joinrel_setup_hook_type joinrel_setup_hook = NULL;
@@ -394,6 +397,18 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
break;
}
+ /*
+ * Allow a plugin to editorialize on the new RelOptInfo. This could
+ * involve editorializing on the information which get_relation_info
+ * obtained from the catalogs, such as altering the assumed relation size,
+ * removing an index, or adding a hypothetical index to the indexlist.
+ *
+ * An extension can also modify rel->pgs_mask here to control path
+ * generation.
+ */
+ if (build_simple_rel_hook)
+ (*build_simple_rel_hook) (root, rel, rte);
+
/*
* We must apply the partially filled in RelOptInfo before calling
* apply_child_basequals due to some transformations within that function
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 029ca3b68c3..539c16c4f79 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -650,14 +650,13 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
ListCell *icols;
ListCell *attnos;
ListCell *lc;
- bool isOnConflictUpdate;
+ bool requiresUpdatePerm;
AclMode targetPerms;
/* There can't be any outer WITH to worry about */
Assert(pstate->p_ctenamespace == NIL);
qry->commandType = CMD_INSERT;
- pstate->p_is_insert = true;
/* process the WITH clause independently of all else */
if (stmt->withClause)
@@ -669,8 +668,14 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
qry->override = stmt->override;
- isOnConflictUpdate = (stmt->onConflictClause &&
- stmt->onConflictClause->action == ONCONFLICT_UPDATE);
+ /*
+ * ON CONFLICT DO UPDATE and ON CONFLICT DO SELECT FOR UPDATE/SHARE
+ * require UPDATE permission on the target relation.
+ */
+ requiresUpdatePerm = (stmt->onConflictClause &&
+ (stmt->onConflictClause->action == ONCONFLICT_UPDATE ||
+ (stmt->onConflictClause->action == ONCONFLICT_SELECT &&
+ stmt->onConflictClause->lockStrength != LCS_NONE)));
/*
* We have three cases to deal with: DEFAULT VALUES (selectStmt == NULL),
@@ -720,7 +725,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
* to the joinlist or namespace.
*/
targetPerms = ACL_INSERT;
- if (isOnConflictUpdate)
+ if (requiresUpdatePerm)
targetPerms |= ACL_UPDATE;
qry->resultRelation = setTargetTable(pstate, stmt->relation,
false, false, targetPerms);
@@ -1027,6 +1032,15 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
false, true, true);
}
+ /* ON CONFLICT DO SELECT requires a RETURNING clause */
+ if (stmt->onConflictClause &&
+ stmt->onConflictClause->action == ONCONFLICT_SELECT &&
+ !stmt->returningClause)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("ON CONFLICT DO SELECT requires a RETURNING clause"),
+ parser_errposition(pstate, stmt->onConflictClause->location));
+
/* Process ON CONFLICT, if any. */
if (stmt->onConflictClause)
qry->onConflict = transformOnConflictClause(pstate,
@@ -1185,12 +1199,13 @@ transformOnConflictClause(ParseState *pstate,
OnConflictExpr *result;
/*
- * If this is ON CONFLICT ... UPDATE, first create the range table entry
- * for the EXCLUDED pseudo relation, so that that will be present while
- * processing arbiter expressions. (You can't actually reference it from
- * there, but this provides a useful error message if you try.)
+ * If this is ON CONFLICT DO SELECT/UPDATE, first create the range table
+ * entry for the EXCLUDED pseudo relation, so that that will be present
+ * while processing arbiter expressions. (You can't actually reference it
+ * from there, but this provides a useful error message if you try.)
*/
- if (onConflictClause->action == ONCONFLICT_UPDATE)
+ if (onConflictClause->action == ONCONFLICT_UPDATE ||
+ onConflictClause->action == ONCONFLICT_SELECT)
{
Relation targetrel = pstate->p_target_relation;
RangeTblEntry *exclRte;
@@ -1219,28 +1234,22 @@ transformOnConflictClause(ParseState *pstate,
transformOnConflictArbiter(pstate, onConflictClause, &arbiterElems,
&arbiterWhere, &arbiterConstraint);
- /* Process DO UPDATE */
- if (onConflictClause->action == ONCONFLICT_UPDATE)
+ /* Process DO SELECT/UPDATE */
+ if (onConflictClause->action == ONCONFLICT_UPDATE ||
+ onConflictClause->action == ONCONFLICT_SELECT)
{
- /*
- * Expressions in the UPDATE targetlist need to be handled like UPDATE
- * not INSERT. We don't need to save/restore this because all INSERT
- * expressions have been parsed already.
- */
- pstate->p_is_insert = false;
-
/*
* Add the EXCLUDED pseudo relation to the query namespace, making it
- * available in the UPDATE subexpressions.
+ * available in SET and WHERE subexpressions.
*/
addNSItemToQuery(pstate, exclNSItem, false, true, true);
- /*
- * Now transform the UPDATE subexpressions.
- */
- onConflictSet =
- transformUpdateTargetList(pstate, onConflictClause->targetList);
+ /* Process the UPDATE SET clause */
+ if (onConflictClause->action == ONCONFLICT_UPDATE)
+ onConflictSet =
+ transformUpdateTargetList(pstate, onConflictClause->targetList);
+ /* Process the SELECT/UPDATE WHERE clause */
onConflictWhere = transformWhereClause(pstate,
onConflictClause->whereClause,
EXPR_KIND_WHERE, "WHERE");
@@ -1254,13 +1263,14 @@ transformOnConflictClause(ParseState *pstate,
pstate->p_namespace = list_delete_last(pstate->p_namespace);
}
- /* Finally, build ON CONFLICT DO [NOTHING | UPDATE] expression */
+ /* Finally, build ON CONFLICT DO [NOTHING | SELECT | UPDATE] expression */
result = makeNode(OnConflictExpr);
result->action = onConflictClause->action;
result->arbiterElems = arbiterElems;
result->arbiterWhere = arbiterWhere;
result->constraint = arbiterConstraint;
+ result->lockStrength = onConflictClause->lockStrength;
result->onConflictSet = onConflictSet;
result->onConflictWhere = onConflictWhere;
result->exclRelIndex = exclRelIndex;
@@ -2495,7 +2505,6 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
Node *qual;
qry->commandType = CMD_UPDATE;
- pstate->p_is_insert = false;
/* process the WITH clause independently of all else */
if (stmt->withClause)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 713ee5c10a2..c567252acc4 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -481,7 +481,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type OptNoLog
%type OnCommitOption
-%type for_locking_strength
+%type for_locking_strength opt_for_locking_strength
%type for_locking_item
%type for_locking_clause opt_for_locking_clause for_locking_items
%type locked_rels_list
@@ -12496,12 +12496,24 @@ insert_column_item:
;
opt_on_conflict:
+ ON CONFLICT opt_conf_expr DO SELECT opt_for_locking_strength where_clause
+ {
+ $$ = makeNode(OnConflictClause);
+ $$->action = ONCONFLICT_SELECT;
+ $$->infer = $3;
+ $$->targetList = NIL;
+ $$->lockStrength = $6;
+ $$->whereClause = $7;
+ $$->location = @1;
+ }
+ |
ON CONFLICT opt_conf_expr DO UPDATE SET set_clause_list where_clause
{
$$ = makeNode(OnConflictClause);
$$->action = ONCONFLICT_UPDATE;
$$->infer = $3;
$$->targetList = $7;
+ $$->lockStrength = LCS_NONE;
$$->whereClause = $8;
$$->location = @1;
}
@@ -12512,6 +12524,7 @@ opt_on_conflict:
$$->action = ONCONFLICT_NOTHING;
$$->infer = $3;
$$->targetList = NIL;
+ $$->lockStrength = LCS_NONE;
$$->whereClause = NULL;
$$->location = @1;
}
@@ -13741,6 +13754,11 @@ for_locking_strength:
| FOR KEY SHARE { $$ = LCS_FORKEYSHARE; }
;
+opt_for_locking_strength:
+ for_locking_strength { $$ = $1; }
+ | /* EMPTY */ { $$ = LCS_NONE; }
+ ;
+
locked_rels_list:
OF qualified_name_list { $$ = $2; }
| /* EMPTY */ { $$ = NIL; }
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index e35fd25c9bb..06b65d4a605 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -3373,13 +3373,15 @@ transformOnConflictArbiter(ParseState *pstate,
*arbiterWhere = NULL;
*constraint = InvalidOid;
- if (onConflictClause->action == ONCONFLICT_UPDATE && !infer)
+ if ((onConflictClause->action == ONCONFLICT_UPDATE ||
+ onConflictClause->action == ONCONFLICT_SELECT) && !infer)
ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("ON CONFLICT DO UPDATE requires inference specification or constraint name"),
- errhint("For example, ON CONFLICT (column_name)."),
- parser_errposition(pstate,
- exprLocation((Node *) onConflictClause))));
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("ON CONFLICT DO %s requires inference specification or constraint name",
+ onConflictClause->action == ONCONFLICT_UPDATE ? "UPDATE" : "SELECT"),
+ errhint("For example, ON CONFLICT (column_name)."),
+ parser_errposition(pstate,
+ exprLocation((Node *) onConflictClause)));
/*
* To simplify certain aspects of its design, speculative insertion into
diff --git a/src/backend/parser/parse_jsontable.c b/src/backend/parser/parse_jsontable.c
index c28ae99dee8..32a1e8629b2 100644
--- a/src/backend/parser/parse_jsontable.c
+++ b/src/backend/parser/parse_jsontable.c
@@ -312,7 +312,7 @@ transformJsonTableColumns(JsonTableParseContext *cxt, List *columns,
rawc->wrapper != JSW_UNSPEC)
rawc->coltype = JTC_FORMATTED;
- /* FALLTHROUGH */
+ pg_fallthrough;
case JTC_FORMATTED:
case JTC_EXISTS:
{
diff --git a/src/backend/parser/parse_merge.c b/src/backend/parser/parse_merge.c
index e08dc18dd75..0a70d48fd4c 100644
--- a/src/backend/parser/parse_merge.c
+++ b/src/backend/parser/parse_merge.c
@@ -307,8 +307,6 @@ transformMergeStmt(ParseState *pstate, MergeStmt *stmt)
List *icolumns;
List *attrnos;
- pstate->p_is_insert = true;
-
icolumns = checkInsertTargets(pstate,
mergeWhenClause->targetList,
&attrnos);
@@ -381,12 +379,9 @@ transformMergeStmt(ParseState *pstate, MergeStmt *stmt)
}
break;
case CMD_UPDATE:
- {
- pstate->p_is_insert = false;
- action->targetList =
- transformUpdateTargetList(pstate,
- mergeWhenClause->targetList);
- }
+ action->targetList =
+ transformUpdateTargetList(pstate,
+ mergeWhenClause->targetList);
break;
case CMD_DELETE:
break;
diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c
index 768e4cff9c5..a6b402f2d7b 100644
--- a/src/backend/parser/parse_oper.c
+++ b/src/backend/parser/parse_oper.c
@@ -79,7 +79,8 @@ static bool make_oper_cache_key(ParseState *pstate, OprCacheKey *key,
int location);
static Oid find_oper_cache_entry(OprCacheKey *key);
static void make_oper_cache_entry(OprCacheKey *key, Oid opr_oid);
-static void InvalidateOprCacheCallBack(Datum arg, int cacheid, uint32 hashvalue);
+static void InvalidateOprCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
/*
@@ -1076,7 +1077,8 @@ make_oper_cache_entry(OprCacheKey *key, Oid opr_oid)
* Callback for pg_operator and pg_cast inval events
*/
static void
-InvalidateOprCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateOprCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
OprCacheEntry *hentry;
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index b5a2f915b67..dbf5b2b5c01 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -438,6 +438,7 @@ markTargetListOrigin(ParseState *pstate, TargetEntry *tle,
* pstate parse state
* expr expression to be modified
* exprKind indicates which type of statement we're dealing with
+ * (EXPR_KIND_INSERT_TARGET or EXPR_KIND_UPDATE_TARGET)
* colname target column name (ie, name of attribute to be assigned to)
* attrno target attribute number
* indirection subscripts/field names for target column, if any
@@ -471,7 +472,8 @@ transformAssignedExpr(ParseState *pstate,
* set p_expr_kind here because we can parse subscripts without going
* through transformExpr().
*/
- Assert(exprKind != EXPR_KIND_NONE);
+ Assert(exprKind == EXPR_KIND_INSERT_TARGET ||
+ exprKind == EXPR_KIND_UPDATE_TARGET);
sv_expr_kind = pstate->p_expr_kind;
pstate->p_expr_kind = exprKind;
@@ -530,7 +532,7 @@ transformAssignedExpr(ParseState *pstate,
{
Node *colVar;
- if (pstate->p_is_insert)
+ if (exprKind == EXPR_KIND_INSERT_TARGET)
{
/*
* The command is INSERT INTO table (col.something) ... so there
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index b5f4c72459d..cc244c49e9e 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -918,7 +918,7 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
errmsg("primary key constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
- /* FALL THRU */
+ pg_fallthrough;
case CONSTR_UNIQUE:
if (cxt->isforeign)
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index a4bbb10a3b7..6d979a08fd3 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -2880,7 +2880,7 @@ get_matching_list_bounds(PartitionPruneContext *context,
case BTGreaterEqualStrategyNumber:
inclusive = true;
- /* fall through */
+ pg_fallthrough;
case BTGreaterStrategyNumber:
off = partition_list_bsearch(partsupfunc,
partcollation,
@@ -2915,7 +2915,7 @@ get_matching_list_bounds(PartitionPruneContext *context,
case BTLessEqualStrategyNumber:
inclusive = true;
- /* fall through */
+ pg_fallthrough;
case BTLessStrategyNumber:
off = partition_list_bsearch(partsupfunc,
partcollation,
@@ -3162,7 +3162,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
case BTGreaterEqualStrategyNumber:
inclusive = true;
- /* fall through */
+ pg_fallthrough;
case BTGreaterStrategyNumber:
/*
@@ -3243,7 +3243,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
case BTLessEqualStrategyNumber:
inclusive = true;
- /* fall through */
+ pg_fallthrough;
case BTLessStrategyNumber:
/*
@@ -3726,19 +3726,19 @@ match_boolean_partition_clause(Oid partopfamily, Expr *clause, Expr *partkey,
{
case IS_NOT_TRUE:
*notclause = true;
- /* fall through */
+ pg_fallthrough;
case IS_TRUE:
*outconst = (Expr *) makeBoolConst(true, false);
return PARTCLAUSE_MATCH_CLAUSE;
case IS_NOT_FALSE:
*notclause = true;
- /* fall through */
+ pg_fallthrough;
case IS_FALSE:
*outconst = (Expr *) makeBoolConst(false, false);
return PARTCLAUSE_MATCH_CLAUSE;
case IS_NOT_UNKNOWN:
*notclause = true;
- /* fall through */
+ pg_fallthrough;
case IS_UNKNOWN:
return PARTCLAUSE_MATCH_NULLNESS;
default:
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 3cd3544fa2b..2e3886cf9fe 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -855,7 +855,7 @@ PGSharedMemoryCreate(Size size,
* Initialize space allocation status for segment.
*/
hdr->totalsize = size;
- hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+ hdr->content_offset = MAXALIGN(sizeof(PGShmemHeader));
*shim = hdr;
/* Save info for possible future use */
diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c
index 7cb8b4c9b60..794e4fcb2ad 100644
--- a/src/backend/port/win32_shmem.c
+++ b/src/backend/port/win32_shmem.c
@@ -389,7 +389,7 @@ PGSharedMemoryCreate(Size size,
* Initialize space allocation status for segment.
*/
hdr->totalsize = size;
- hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
+ hdr->content_offset = MAXALIGN(sizeof(PGShmemHeader));
hdr->dsm_control = 0;
/* Save info for possible future use */
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 22379de1e31..6fde740465f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -385,7 +385,6 @@ AutoVacLauncherMain(const void *startup_data, size_t startup_data_len)
PostmasterContext = NULL;
}
- MyBackendType = B_AUTOVAC_LAUNCHER;
init_ps_display(NULL);
ereport(DEBUG1,
@@ -1398,7 +1397,6 @@ AutoVacWorkerMain(const void *startup_data, size_t startup_data_len)
PostmasterContext = NULL;
}
- MyBackendType = B_AUTOVAC_WORKER;
init_ps_display(NULL);
Assert(GetProcessingMode() == InitProcessing);
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 65deabe91a7..8678ea4e139 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -120,22 +120,28 @@ static const struct
{
{
- "ParallelWorkerMain", ParallelWorkerMain
+ .fn_name = "ParallelWorkerMain",
+ .fn_addr = ParallelWorkerMain
},
{
- "ApplyLauncherMain", ApplyLauncherMain
+ .fn_name = "ApplyLauncherMain",
+ .fn_addr = ApplyLauncherMain
},
{
- "ApplyWorkerMain", ApplyWorkerMain
+ .fn_name = "ApplyWorkerMain",
+ .fn_addr = ApplyWorkerMain
},
{
- "ParallelApplyWorkerMain", ParallelApplyWorkerMain
+ .fn_name = "ParallelApplyWorkerMain",
+ .fn_addr = ParallelApplyWorkerMain
},
{
- "TableSyncWorkerMain", TableSyncWorkerMain
+ .fn_name = "TableSyncWorkerMain",
+ .fn_addr = TableSyncWorkerMain
},
{
- "SequenceSyncWorkerMain", SequenceSyncWorkerMain
+ .fn_name = "SequenceSyncWorkerMain",
+ .fn_addr = SequenceSyncWorkerMain
}
};
@@ -712,20 +718,6 @@ SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
return true;
}
-/*
- * Standard SIGTERM handler for background workers
- */
-static void
-bgworker_die(SIGNAL_ARGS)
-{
- sigprocmask(SIG_SETMASK, &BlockSig, NULL);
-
- ereport(FATAL,
- (errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating background worker \"%s\" due to administrator command",
- MyBgworkerEntry->bgw_type)));
-}
-
/*
* Main entry point for background worker processes.
*/
@@ -753,7 +745,6 @@ BackgroundWorkerMain(const void *startup_data, size_t startup_data_len)
}
MyBgworkerEntry = worker;
- MyBackendType = B_BG_WORKER;
init_ps_display(worker->bgw_name);
Assert(GetProcessingMode() == InitProcessing);
@@ -782,7 +773,7 @@ BackgroundWorkerMain(const void *startup_data, size_t startup_data_len)
pqsignal(SIGUSR1, SIG_IGN);
pqsignal(SIGFPE, SIG_IGN);
}
- pqsignal(SIGTERM, bgworker_die);
+ pqsignal(SIGTERM, die);
/* SIGQUIT handler was already set up by InitPostmasterChild */
pqsignal(SIGHUP, SIG_IGN);
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 80e3088fc7e..0956bd39a85 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -94,7 +94,6 @@ BackgroundWriterMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_BG_WRITER;
AuxiliaryProcessMainCommon();
/*
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 6482c21b8f9..e03c19123bc 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -199,7 +199,6 @@ CheckpointerMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_CHECKPOINTER;
AuxiliaryProcessMainCommon();
CheckpointerShmem->checkpointer_pid = MyProcPid;
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index cea229ad6a4..e9134b9751b 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -96,7 +96,6 @@ typedef struct
HANDLE UsedShmemSegID;
#endif
void *UsedShmemSegAddr;
- slock_t *ShmemLock;
#ifdef USE_INJECTION_POINTS
struct InjectionPointsCtl *ActiveInjectionPoints;
#endif
@@ -105,7 +104,6 @@ typedef struct
char **LWLockTrancheNames;
int *LWLockCounter;
LWLockPadded *MainLWLockArray;
- slock_t *ProcStructLock;
PROC_HDR *ProcGlobal;
PGPROC *AuxiliaryProcs;
PGPROC *PreparedXactProcs;
@@ -179,7 +177,7 @@ typedef struct
} child_process_kind;
static child_process_kind child_process_kinds[] = {
-#define PG_PROCTYPE(bktype, description, main_func, shmem_attach) \
+#define PG_PROCTYPE(bktype, bkcategory, description, main_func, shmem_attach) \
[bktype] = {description, main_func, shmem_attach},
#include "postmaster/proctypelist.h"
#undef PG_PROCTYPE
@@ -224,6 +222,8 @@ postmaster_child_launch(BackendType child_type, int child_slot,
pid = fork_process();
if (pid == 0) /* child */
{
+ MyBackendType = child_type;
+
/* Capture and transfer timings that may be needed for logging */
if (IsExternalConnectionBackend(child_type))
{
@@ -608,6 +608,7 @@ SubPostmasterMain(int argc, char *argv[])
child_type = (BackendType) atoi(child_kind);
if (child_type <= B_INVALID || child_type > BACKEND_NUM_TYPES - 1)
elog(ERROR, "unknown child kind %s", child_kind);
+ MyBackendType = child_type;
/* Read in the variables file */
read_backend_variables(argv[2], &startup_data, &startup_data_len);
@@ -676,7 +677,7 @@ SubPostmasterMain(int argc, char *argv[])
/* Restore basic shared memory pointers */
if (UsedShmemSegAddr != NULL)
- InitShmemAccess(UsedShmemSegAddr);
+ InitShmemAllocator(UsedShmemSegAddr);
/*
* Run the appropriate Main function
@@ -724,8 +725,6 @@ save_backend_variables(BackendParameters *param,
param->UsedShmemSegID = UsedShmemSegID;
param->UsedShmemSegAddr = UsedShmemSegAddr;
- param->ShmemLock = ShmemLock;
-
#ifdef USE_INJECTION_POINTS
param->ActiveInjectionPoints = ActiveInjectionPoints;
#endif
@@ -735,7 +734,6 @@ save_backend_variables(BackendParameters *param,
param->LWLockTrancheNames = LWLockTrancheNames;
param->LWLockCounter = LWLockCounter;
param->MainLWLockArray = MainLWLockArray;
- param->ProcStructLock = ProcStructLock;
param->ProcGlobal = ProcGlobal;
param->AuxiliaryProcs = AuxiliaryProcs;
param->PreparedXactProcs = PreparedXactProcs;
@@ -986,8 +984,6 @@ restore_backend_variables(BackendParameters *param)
UsedShmemSegID = param->UsedShmemSegID;
UsedShmemSegAddr = param->UsedShmemSegAddr;
- ShmemLock = param->ShmemLock;
-
#ifdef USE_INJECTION_POINTS
ActiveInjectionPoints = param->ActiveInjectionPoints;
#endif
@@ -997,7 +993,6 @@ restore_backend_variables(BackendParameters *param)
LWLockTrancheNames = param->LWLockTrancheNames;
LWLockCounter = param->LWLockCounter;
MainLWLockArray = param->MainLWLockArray;
- ProcStructLock = param->ProcStructLock;
ProcGlobal = param->ProcGlobal;
AuxiliaryProcs = param->AuxiliaryProcs;
PreparedXactProcs = param->PreparedXactProcs;
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 1a20387c4bd..82731e452fc 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -222,7 +222,6 @@ PgArchiverMain(const void *startup_data, size_t startup_data_len)
{
Assert(startup_data_len == 0);
- MyBackendType = B_ARCHIVER;
AuxiliaryProcessMainCommon();
/*
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d6133bfebc6..3fac46c402b 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -625,7 +625,7 @@ PostmasterMain(int argc, char *argv[])
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("--%s must be first argument", optarg)));
- /* FALLTHROUGH */
+ pg_fallthrough;
case 'c':
{
char *name,
@@ -4217,19 +4217,18 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
case PM_RUN:
if (start_time == BgWorkerStart_RecoveryFinished)
return true;
- /* fall through */
+ pg_fallthrough;
case PM_HOT_STANDBY:
if (start_time == BgWorkerStart_ConsistentState)
return true;
- /* fall through */
+ pg_fallthrough;
case PM_RECOVERY:
case PM_STARTUP:
case PM_INIT:
if (start_time == BgWorkerStart_PostmasterStart)
return true;
- /* fall through */
}
return false;
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index a1a4f65f9a9..cdbe53dd262 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -217,7 +217,6 @@ StartupProcessMain(const void *startup_data, size_t startup_data_len)
{
Assert(startup_data_len == 0);
- MyBackendType = B_STARTUP;
AuxiliaryProcessMainCommon();
/* Arrange to clean up at startup process exit */
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index 1c443b3d126..86c5e376b40 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -206,7 +206,6 @@ SysLoggerMain(const void *startup_data, size_t startup_data_len)
now = MyStartTime;
- MyBackendType = B_LOGGER;
init_ps_display(NULL);
/*
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index c3d56c866d3..742137edad6 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -234,7 +234,6 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_WAL_SUMMARIZER;
AuxiliaryProcessMainCommon();
ereport(DEBUG1,
@@ -242,12 +241,9 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len)
/*
* Properly accept or ignore signals the postmaster might send us
- *
- * We have no particular use for SIGINT at the moment, but seems
- * reasonable to treat like SIGTERM.
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGINT, SignalHandlerForShutdownRequest);
+ pqsignal(SIGINT, SIG_IGN); /* no query to cancel */
pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
/* SIGQUIT handler was already set up by InitPostmasterChild */
pqsignal(SIGALRM, SIG_IGN);
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 38ec8a4c8c7..7c0e2809c17 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -94,17 +94,13 @@ WalWriterMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_WAL_WRITER;
AuxiliaryProcessMainCommon();
/*
* Properly accept or ignore signals the postmaster might send us
- *
- * We have no particular use for SIGINT at the moment, but seems
- * reasonable to treat like SIGTERM.
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGINT, SignalHandlerForShutdownRequest);
+ pqsignal(SIGINT, SIG_IGN); /* no query to cancel */
pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
/* SIGQUIT handler was already set up by InitPostmasterChild */
pqsignal(SIGALRM, SIG_IGN);
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
index 9087ef95af3..55df64f9ade 100644
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@@ -743,7 +743,7 @@ lexescape(struct vars *v)
/* oops, doesn't look like it's a backref after all... */
v->now = save;
/* and fall through into octal number */
- /* FALLTHROUGH */
+ pg_fallthrough;
case CHR('0'):
NOTE(REG_UUNPORT);
v->now--; /* put first digit back */
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 3e18e4a78a2..820995332ba 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -975,7 +975,7 @@ parseqatom(struct vars *v,
/* legal in EREs due to specification botch */
NOTE(REG_UPBOTCH);
/* fall through into case PLAIN */
- /* FALLTHROUGH */
+ pg_fallthrough;
case PLAIN:
onechr(v, v->nextvalue, lp, rp);
okcolors(v->nfa, v->cm);
diff --git a/src/backend/replication/logical/applyparallelworker.c b/src/backend/replication/logical/applyparallelworker.c
index 8a01f16a2ca..1730ace5490 100644
--- a/src/backend/replication/logical/applyparallelworker.c
+++ b/src/backend/replication/logical/applyparallelworker.c
@@ -879,7 +879,6 @@ ParallelApplyWorkerMain(Datum main_arg)
* receiving SIGTERM.
*/
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGTERM, die);
pqsignal(SIGUSR2, SignalHandlerForShutdownRequest);
BackgroundWorkerUnblockSignals();
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 3ed86480be2..e6112e11ec2 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -1213,7 +1213,6 @@ ApplyLauncherMain(Datum main_arg)
/* Establish signal handlers. */
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGTERM, die);
BackgroundWorkerUnblockSignals();
/*
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 85060d19a49..603a2b94d05 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -1986,16 +1986,22 @@ UpdateDecodingStats(LogicalDecodingContext *ctx)
}
/*
- * Read up to the end of WAL starting from the decoding slot's restart_lsn.
- * Return true if any meaningful/decodable WAL records are encountered,
- * otherwise false.
+ * Read up to the end of WAL starting from the decoding slot's restart_lsn
+ * to end_of_wal in order to check if any meaningful/decodable WAL records
+ * are encountered. scan_cutoff_lsn is the LSN, where we can terminate the
+ * WAL scan early if we find a decodable WAL record after this LSN.
+ *
+ * Returns the last LSN decodable WAL record's LSN if found, otherwise
+ * returns InvalidXLogRecPtr.
*/
-bool
-LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
+XLogRecPtr
+LogicalReplicationSlotCheckPendingWal(XLogRecPtr end_of_wal,
+ XLogRecPtr scan_cutoff_lsn)
{
- bool has_pending_wal = false;
+ XLogRecPtr last_pending_wal = InvalidXLogRecPtr;
Assert(MyReplicationSlot);
+ Assert(end_of_wal >= scan_cutoff_lsn);
PG_TRY();
{
@@ -2023,8 +2029,7 @@ LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
/* Invalidate non-timetravel entries */
InvalidateSystemCaches();
- /* Loop until the end of WAL or some changes are processed */
- while (!has_pending_wal && ctx->reader->EndRecPtr < end_of_wal)
+ while (ctx->reader->EndRecPtr < end_of_wal)
{
XLogRecord *record;
char *errm = NULL;
@@ -2037,7 +2042,20 @@ LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
if (record != NULL)
LogicalDecodingProcessRecord(ctx, ctx->reader);
- has_pending_wal = ctx->processing_required;
+ if (ctx->processing_required)
+ {
+ last_pending_wal = ctx->reader->ReadRecPtr;
+
+ /*
+ * If we find a decodable WAL after the scan_cutoff_lsn point,
+ * we can terminate the scan early.
+ */
+ if (last_pending_wal >= scan_cutoff_lsn)
+ break;
+
+ /* Reset the flag and continue checking */
+ ctx->processing_required = false;
+ }
CHECK_FOR_INTERRUPTS();
}
@@ -2055,7 +2073,7 @@ LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal)
}
PG_END_TRY();
- return has_pending_wal;
+ return last_pending_wal;
}
/*
diff --git a/src/backend/replication/logical/logicalctl.c b/src/backend/replication/logical/logicalctl.c
index 9f787f3dc51..4e292951201 100644
--- a/src/backend/replication/logical/logicalctl.c
+++ b/src/backend/replication/logical/logicalctl.c
@@ -71,6 +71,7 @@
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "storage/procsignal.h"
#include "utils/injection_point.h"
/*
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index d84fa120b9f..e832fa0d8ea 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -182,8 +182,8 @@ typedef struct ReorderBufferToastEnt
Size num_chunks; /* number of chunks we've already seen */
Size size; /* combined size of chunks seen */
dlist_head chunks; /* linked list of chunks */
- struct varlena *reconstructed; /* reconstructed varlena now pointed to in
- * main tup */
+ varlena *reconstructed; /* reconstructed varlena now pointed to in
+ * main tup */
} ReorderBufferToastEnt;
/* Disk serialization support datastructures */
@@ -2322,6 +2322,7 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
change->action = REORDER_BUFFER_CHANGE_INSERT;
/* intentionally fall through */
+ pg_fallthrough;
case REORDER_BUFFER_CHANGE_INSERT:
case REORDER_BUFFER_CHANGE_UPDATE:
case REORDER_BUFFER_CHANGE_DELETE:
@@ -5133,13 +5134,13 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
{
CompactAttribute *attr = TupleDescCompactAttr(desc, natt);
ReorderBufferToastEnt *ent;
- struct varlena *varlena;
+ varlena *varlena_pointer;
/* va_rawsize is the size of the original datum -- including header */
- struct varatt_external toast_pointer;
- struct varatt_indirect redirect_pointer;
- struct varlena *new_datum = NULL;
- struct varlena *reconstructed;
+ varatt_external toast_pointer;
+ varatt_indirect redirect_pointer;
+ varlena *new_datum = NULL;
+ varlena *reconstructed;
dlist_iter it;
Size data_done = 0;
@@ -5155,13 +5156,13 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
continue;
/* ok, we know we have a toast datum */
- varlena = (struct varlena *) DatumGetPointer(attrs[natt]);
+ varlena_pointer = (varlena *) DatumGetPointer(attrs[natt]);
/* no need to do anything if the tuple isn't external */
- if (!VARATT_IS_EXTERNAL(varlena))
+ if (!VARATT_IS_EXTERNAL(varlena_pointer))
continue;
- VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena);
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, varlena_pointer);
/*
* Check whether the toast tuple changed, replace if so.
@@ -5175,7 +5176,7 @@ ReorderBufferToastReplace(ReorderBuffer *rb, ReorderBufferTXN *txn,
continue;
new_datum =
- (struct varlena *) palloc0(INDIRECT_POINTER_SIZE);
+ (varlena *) palloc0(INDIRECT_POINTER_SIZE);
free[natt] = true;
@@ -5361,7 +5362,7 @@ DisplayMapping(HTAB *tuplecid_data)
* transaction c) applied in LSN order.
*/
static void
-ApplyLogicalMappingFile(HTAB *tuplecid_data, Oid relid, const char *fname)
+ApplyLogicalMappingFile(HTAB *tuplecid_data, const char *fname)
{
char path[MAXPGPATH];
int fd;
@@ -5544,7 +5545,7 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname,
snapshot->subxip[0]);
- ApplyLogicalMappingFile(tuplecid_data, relid, f->fname);
+ ApplyLogicalMappingFile(tuplecid_data, f->fname);
pfree(f);
}
}
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 1c343d03d21..062a08ccb88 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -194,31 +194,40 @@ update_slotsync_skip_stats(SlotSyncSkipReason skip_reason)
*
* If no update was needed (the data of the remote slot is the same as the
* local slot) return false, otherwise true.
- *
- * *found_consistent_snapshot will be true iff the remote slot's LSN or xmin is
- * modified, and decoding from the corresponding LSN's can reach a
- * consistent snapshot.
- *
- * *remote_slot_precedes will be true if the remote slot's LSN or xmin
- * precedes locally reserved position.
*/
static bool
-update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
- bool *found_consistent_snapshot,
- bool *remote_slot_precedes)
+update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
{
ReplicationSlot *slot = MyReplicationSlot;
bool updated_xmin_or_lsn = false;
bool updated_config = false;
SlotSyncSkipReason skip_reason = SS_SKIP_NONE;
+ XLogRecPtr latestFlushPtr = GetStandbyFlushRecPtr(NULL);
Assert(slot->data.invalidated == RS_INVAL_NONE);
- if (found_consistent_snapshot)
- *found_consistent_snapshot = false;
+ /*
+ * Make sure that concerned WAL is received and flushed before syncing
+ * slot to target lsn received from the primary server.
+ */
+ if (remote_slot->confirmed_lsn > latestFlushPtr)
+ {
+ update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
- if (remote_slot_precedes)
- *remote_slot_precedes = false;
+ /*
+ * Can get here only if GUC 'synchronized_standby_slots' on the
+ * primary server was not configured correctly.
+ */
+ ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
+ errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("skipping slot synchronization because the received slot sync"
+ " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
+ LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
+ remote_slot->name,
+ LSN_FORMAT_ARGS(latestFlushPtr)));
+
+ return false;
+ }
/*
* Don't overwrite if we already have a newer catalog_xmin and
@@ -262,9 +271,6 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
LSN_FORMAT_ARGS(slot->data.restart_lsn),
slot->data.catalog_xmin));
- if (remote_slot_precedes)
- *remote_slot_precedes = true;
-
/*
* Skip updating the configuration. This is required to avoid syncing
* two_phase_at without syncing confirmed_lsn. Otherwise, the prepared
@@ -304,14 +310,13 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
slot->data.confirmed_flush = remote_slot->confirmed_lsn;
slot->data.catalog_xmin = remote_slot->catalog_xmin;
SpinLockRelease(&slot->mutex);
-
- if (found_consistent_snapshot)
- *found_consistent_snapshot = true;
}
else
{
+ bool found_consistent_snapshot;
+
LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,
- found_consistent_snapshot);
+ &found_consistent_snapshot);
/* Sanity check */
if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
@@ -326,8 +331,18 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
* If we can't reach a consistent snapshot, the slot won't be
* persisted. See update_and_persist_local_synced_slot().
*/
- if (found_consistent_snapshot && !(*found_consistent_snapshot))
+ if (!found_consistent_snapshot)
+ {
+ Assert(MyReplicationSlot->data.persistency == RS_TEMPORARY);
+
+ ereport(LOG,
+ errmsg("could not synchronize replication slot \"%s\"",
+ remote_slot->name),
+ errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
+ LSN_FORMAT_ARGS(slot->data.restart_lsn)));
+
skip_reason = SS_SKIP_NO_CONSISTENT_SNAPSHOT;
+ }
}
updated_xmin_or_lsn = true;
@@ -619,27 +634,27 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
bool *slot_persistence_pending)
{
ReplicationSlot *slot = MyReplicationSlot;
- bool found_consistent_snapshot = false;
- bool remote_slot_precedes = false;
/* Slotsync skip stats are handled in function update_local_synced_slot() */
- (void) update_local_synced_slot(remote_slot, remote_dbid,
- &found_consistent_snapshot,
- &remote_slot_precedes);
+ (void) update_local_synced_slot(remote_slot, remote_dbid);
/*
- * Check if the primary server has caught up. Refer to the comment atop
- * the file for details on this check.
+ * Check if the slot cannot be synchronized. Refer to the comment atop the
+ * file for details on this check.
*/
- if (remote_slot_precedes)
+ if (slot->slotsync_skip_reason != SS_SKIP_NONE)
{
/*
- * The remote slot didn't catch up to locally reserved position.
+ * We reach this point when the remote slot didn't catch up to locally
+ * reserved position, or it cannot reach the consistent point from the
+ * restart_lsn, or the WAL prior to the remote confirmed flush LSN has
+ * not been received and flushed.
*
- * We do not drop the slot because the restart_lsn can be ahead of the
- * current location when recreating the slot in the next cycle. It may
- * take more time to create such a slot. Therefore, we keep this slot
- * and attempt the synchronization in the next cycle.
+ * We do not drop the slot because the restart_lsn and confirmed_lsn
+ * can be ahead of the current location when recreating the slot in
+ * the next cycle. It may take more time to create such a slot or
+ * reach the consistent point. Therefore, we keep this slot and
+ * attempt the synchronization in the next cycle.
*
* We also update the slot_persistence_pending parameter, so the SQL
* function can retry.
@@ -650,24 +665,6 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
return false;
}
- /*
- * Don't persist the slot if it cannot reach the consistent point from the
- * restart_lsn. See comments atop this file.
- */
- if (!found_consistent_snapshot)
- {
- ereport(LOG,
- errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
- errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
- LSN_FORMAT_ARGS(slot->data.restart_lsn)));
-
- /* Set this, so that SQL function can retry */
- if (slot_persistence_pending)
- *slot_persistence_pending = true;
-
- return false;
- }
-
ReplicationSlotPersist();
ereport(LOG,
@@ -698,7 +695,6 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
bool *slot_persistence_pending)
{
ReplicationSlot *slot;
- XLogRecPtr latestFlushPtr = GetStandbyFlushRecPtr(NULL);
bool slot_updated = false;
/* Search for the named slot */
@@ -765,34 +761,6 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
return slot_updated;
}
- /*
- * Make sure that concerned WAL is received and flushed before syncing
- * slot to target lsn received from the primary server.
- *
- * Report statistics only after the slot has been acquired, ensuring
- * it cannot be dropped during the reporting process.
- */
- if (remote_slot->confirmed_lsn > latestFlushPtr)
- {
- update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
-
- /*
- * Can get here only if GUC 'synchronized_standby_slots' on the
- * primary server was not configured correctly.
- */
- ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
- errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("skipping slot synchronization because the received slot sync"
- " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
- LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
- remote_slot->name,
- LSN_FORMAT_ARGS(latestFlushPtr)));
-
- ReplicationSlotRelease();
-
- return slot_updated;
- }
-
/* Slot not ready yet, let's attempt to make it sync-ready now. */
if (slot->data.persistency == RS_TEMPORARY)
{
@@ -819,8 +787,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
LSN_FORMAT_ARGS(slot->data.confirmed_flush),
LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
- slot_updated = update_local_synced_slot(remote_slot, remote_dbid,
- NULL, NULL);
+ slot_updated = update_local_synced_slot(remote_slot, remote_dbid);
}
}
/* Otherwise create the slot first. */
@@ -869,34 +836,6 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid,
LWLockRelease(ProcArrayLock);
LWLockRelease(ReplicationSlotControlLock);
- /*
- * Make sure that concerned WAL is received and flushed before syncing
- * slot to target lsn received from the primary server.
- *
- * Report statistics only after the slot has been acquired, ensuring
- * it cannot be dropped during the reporting process.
- */
- if (remote_slot->confirmed_lsn > latestFlushPtr)
- {
- update_slotsync_skip_stats(SS_SKIP_WAL_NOT_FLUSHED);
-
- /*
- * Can get here only if GUC 'synchronized_standby_slots' on the
- * primary server was not configured correctly.
- */
- ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
- errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("skipping slot synchronization because the received slot sync"
- " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
- LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
- remote_slot->name,
- LSN_FORMAT_ARGS(latestFlushPtr)));
-
- ReplicationSlotRelease();
-
- return false;
- }
-
update_and_persist_local_synced_slot(remote_slot, remote_dbid,
slot_persistence_pending);
@@ -1541,8 +1480,6 @@ ReplSlotSyncWorkerMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_SLOTSYNC_WORKER;
-
init_ps_display(NULL);
Assert(GetProcessingMode() == InitProcessing);
@@ -1759,7 +1696,7 @@ update_synced_slots_inactive_since(void)
Assert(SlotIsLogical(s));
/* The slot must not be acquired by any process */
- Assert(s->active_pid == 0);
+ Assert(s->active_proc == INVALID_PROC_NUMBER);
/* Use the same inactive_since time for all the slots. */
if (now == 0)
diff --git a/src/backend/replication/logical/syncutils.c b/src/backend/replication/logical/syncutils.c
index 535ffb6f09e..ef61ca0437d 100644
--- a/src/backend/replication/logical/syncutils.c
+++ b/src/backend/replication/logical/syncutils.c
@@ -98,7 +98,8 @@ FinishSyncWorker(void)
* Callback from syscache invalidation.
*/
void
-InvalidateSyncingRelStates(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateSyncingRelStates(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
relation_states_validity = SYNC_RELATIONS_STATE_NEEDS_REBUILD;
}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 32725c48623..adbdec49a0c 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -627,6 +627,8 @@ static inline void reset_apply_error_context_info(void);
static TransApplyAction get_transaction_apply_action(TransactionId xid,
ParallelApplyWorkerInfo **winfo);
+static void set_wal_receiver_timeout(void);
+
static void on_exit_clear_xact_state(int code, Datum arg);
/*
@@ -839,7 +841,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, false);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
stream_write_change(action, &original_msg);
@@ -1586,7 +1588,7 @@ apply_handle_stream_prepare(StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, true);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
Assert(winfo);
@@ -1808,7 +1810,7 @@ apply_handle_stream_start(StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, !first_segment);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
Assert(winfo);
@@ -1923,7 +1925,7 @@ apply_handle_stream_stop(StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, true);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
stream_write_change(LOGICAL_REP_MSG_STREAM_STOP, s);
stream_stop_internal(stream_xid);
@@ -2169,7 +2171,7 @@ apply_handle_stream_abort(StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, true);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
Assert(winfo);
@@ -2442,7 +2444,7 @@ apply_handle_stream_commit(StringInfo s)
*/
pa_switch_to_partial_serialize(winfo, true);
- /* fall through */
+ pg_fallthrough;
case TRANS_LEADER_PARTIAL_SERIALIZE:
Assert(winfo);
@@ -5154,17 +5156,53 @@ maybe_reread_subscription(void)
SetConfigOption("synchronous_commit", MySubscription->synccommit,
PGC_BACKEND, PGC_S_OVERRIDE);
+ /* Change wal_receiver_timeout according to the user's wishes */
+ set_wal_receiver_timeout();
+
if (started_tx)
CommitTransactionCommand();
MySubscriptionValid = true;
}
+/*
+ * Change wal_receiver_timeout to MySubscription->walrcvtimeout.
+ */
+static void
+set_wal_receiver_timeout(void)
+{
+ bool parsed;
+ int val;
+ int prev_timeout = wal_receiver_timeout;
+
+ /*
+ * Set the wal_receiver_timeout GUC to MySubscription->walrcvtimeout,
+ * which comes from the subscription's wal_receiver_timeout option. If the
+ * value is -1, reset the GUC to its default, meaning it will inherit from
+ * the server config, command line, or role/database settings.
+ */
+ parsed = parse_int(MySubscription->walrcvtimeout, &val, 0, NULL);
+ if (parsed && val == -1)
+ SetConfigOption("wal_receiver_timeout", NULL,
+ PGC_BACKEND, PGC_S_SESSION);
+ else
+ SetConfigOption("wal_receiver_timeout", MySubscription->walrcvtimeout,
+ PGC_BACKEND, PGC_S_SESSION);
+
+ /*
+ * Log the wal_receiver_timeout setting (in milliseconds) as a debug
+ * message when it changes, to verify it was set correctly.
+ */
+ if (prev_timeout != wal_receiver_timeout)
+ elog(DEBUG1, "logical replication worker for subscription \"%s\" wal_receiver_timeout: %d ms",
+ MySubscription->name, wal_receiver_timeout);
+}
+
/*
* Callback from subscription syscache invalidation.
*/
static void
-subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
+subscription_change_cb(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
MySubscriptionValid = false;
}
@@ -5822,6 +5860,9 @@ InitializeLogRepWorker(void)
SetConfigOption("synchronous_commit", MySubscription->synccommit,
PGC_BACKEND, PGC_S_OVERRIDE);
+ /* Change wal_receiver_timeout according to the user's wishes */
+ set_wal_receiver_timeout();
+
/*
* Keep us informed about subscription or role changes. Note that the
* role's superuser privilege can be revoked.
@@ -5890,7 +5931,6 @@ SetupApplyOrSyncWorker(int worker_slot)
/* Setup signal handling */
pqsignal(SIGHUP, SignalHandlerForConfigReload);
- pqsignal(SIGTERM, die);
BackgroundWorkerUnblockSignals();
/*
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index e016f64e0b3..7a49185d29d 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -86,7 +86,7 @@ static void pgoutput_stream_prepare_txn(LogicalDecodingContext *ctx,
static bool publications_valid;
static List *LoadPublications(List *pubnames);
-static void publication_invalidation_cb(Datum arg, int cacheid,
+static void publication_invalidation_cb(Datum arg, SysCacheIdentifier cacheid,
uint32 hashvalue);
static void send_repl_origin(LogicalDecodingContext *ctx,
ReplOriginId origin_id, XLogRecPtr origin_lsn,
@@ -227,7 +227,7 @@ static void send_relation_and_attrs(Relation relation, TransactionId xid,
LogicalDecodingContext *ctx,
RelationSyncEntry *relentry);
static void rel_sync_cache_relation_cb(Datum arg, Oid relid);
-static void rel_sync_cache_publication_cb(Datum arg, int cacheid,
+static void rel_sync_cache_publication_cb(Datum arg, SysCacheIdentifier cacheid,
uint32 hashvalue);
static void set_schema_sent_in_streamed_txn(RelationSyncEntry *entry,
TransactionId xid);
@@ -1828,7 +1828,8 @@ LoadPublications(List *pubnames)
* Called for invalidations on pg_publication.
*/
static void
-publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
+publication_invalidation_cb(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
publications_valid = false;
}
@@ -2431,7 +2432,8 @@ rel_sync_cache_relation_cb(Datum arg, Oid relid)
* Called for invalidations on pg_namespace.
*/
static void
-rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
+rel_sync_cache_publication_cb(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
RelationSyncEntry *entry;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 4c47261c7f9..28c7019402b 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -226,6 +226,7 @@ ReplicationSlotsShmemInit(void)
ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[i];
/* everything else is zeroed by the memset above */
+ slot->active_proc = INVALID_PROC_NUMBER;
SpinLockInit(&slot->mutex);
LWLockInitialize(&slot->io_in_progress_lock,
LWTRANCHE_REPLICATION_SLOT_IO);
@@ -461,7 +462,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* be doing that. So it's safe to initialize the slot.
*/
Assert(!slot->in_use);
- Assert(slot->active_pid == 0);
+ Assert(slot->active_proc == INVALID_PROC_NUMBER);
/* first initialize persistent data */
memset(&slot->data, 0, sizeof(ReplicationSlotPersistentData));
@@ -505,8 +506,8 @@ ReplicationSlotCreate(const char *name, bool db_specific,
/* We can now mark the slot active, and that makes it our slot. */
SpinLockAcquire(&slot->mutex);
- Assert(slot->active_pid == 0);
- slot->active_pid = MyProcPid;
+ Assert(slot->active_proc == INVALID_PROC_NUMBER);
+ slot->active_proc = MyProcNumber;
SpinLockRelease(&slot->mutex);
MyReplicationSlot = slot;
@@ -620,6 +621,7 @@ void
ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
{
ReplicationSlot *s;
+ ProcNumber active_proc;
int active_pid;
Assert(name != NULL);
@@ -672,17 +674,18 @@ ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
* to inactive_since in InvalidatePossiblyObsoleteSlot.
*/
SpinLockAcquire(&s->mutex);
- if (s->active_pid == 0)
- s->active_pid = MyProcPid;
- active_pid = s->active_pid;
+ if (s->active_proc == INVALID_PROC_NUMBER)
+ s->active_proc = MyProcNumber;
+ active_proc = s->active_proc;
ReplicationSlotSetInactiveSince(s, 0, false);
SpinLockRelease(&s->mutex);
}
else
{
- s->active_pid = active_pid = MyProcPid;
+ s->active_proc = active_proc = MyProcNumber;
ReplicationSlotSetInactiveSince(s, 0, true);
}
+ active_pid = GetPGProcByNumber(active_proc)->pid;
LWLockRelease(ReplicationSlotControlLock);
/*
@@ -690,7 +693,7 @@ ReplicationSlotAcquire(const char *name, bool nowait, bool error_if_invalid)
* wait until the owning process signals us that it's been released, or
* error out.
*/
- if (active_pid != MyProcPid)
+ if (active_proc != MyProcNumber)
{
if (!nowait)
{
@@ -762,7 +765,7 @@ ReplicationSlotRelease(void)
bool is_logical;
TimestampTz now = 0;
- Assert(slot != NULL && slot->active_pid != 0);
+ Assert(slot != NULL && slot->active_proc != INVALID_PROC_NUMBER);
is_logical = SlotIsLogical(slot);
@@ -815,7 +818,7 @@ ReplicationSlotRelease(void)
* disconnecting, but wake up others that may be waiting for it.
*/
SpinLockAcquire(&slot->mutex);
- slot->active_pid = 0;
+ slot->active_proc = INVALID_PROC_NUMBER;
ReplicationSlotSetInactiveSince(slot, now, false);
SpinLockRelease(&slot->mutex);
ConditionVariableBroadcast(&slot->active_cv);
@@ -877,7 +880,7 @@ ReplicationSlotCleanup(bool synced_only)
found_valid_logicalslot |=
(SlotIsLogical(s) && s->data.invalidated == RS_INVAL_NONE);
- if ((s->active_pid == MyProcPid &&
+ if ((s->active_proc == MyProcNumber &&
(!synced_only || s->data.synced)))
{
Assert(s->data.persistency == RS_TEMPORARY);
@@ -1088,7 +1091,7 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
bool fail_softly = slot->data.persistency != RS_PERSISTENT;
SpinLockAcquire(&slot->mutex);
- slot->active_pid = 0;
+ slot->active_proc = INVALID_PROC_NUMBER;
SpinLockRelease(&slot->mutex);
/* wake up anyone waiting on this slot */
@@ -1110,7 +1113,7 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
* Also wake up processes waiting for it.
*/
LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
- slot->active_pid = 0;
+ slot->active_proc = INVALID_PROC_NUMBER;
slot->in_use = false;
LWLockRelease(ReplicationSlotControlLock);
ConditionVariableBroadcast(&slot->active_cv);
@@ -1476,7 +1479,7 @@ ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive)
/* count slots with spinlock held */
SpinLockAcquire(&s->mutex);
(*nslots)++;
- if (s->active_pid != 0)
+ if (s->active_proc != INVALID_PROC_NUMBER)
(*nactive)++;
SpinLockRelease(&s->mutex);
}
@@ -1520,7 +1523,7 @@ ReplicationSlotsDropDBSlots(Oid dboid)
{
ReplicationSlot *s;
char *slotname;
- int active_pid;
+ ProcNumber active_proc;
s = &ReplicationSlotCtl->replication_slots[i];
@@ -1550,11 +1553,11 @@ ReplicationSlotsDropDBSlots(Oid dboid)
SpinLockAcquire(&s->mutex);
/* can't change while ReplicationSlotControlLock is held */
slotname = NameStr(s->data.name);
- active_pid = s->active_pid;
- if (active_pid == 0)
+ active_proc = s->active_proc;
+ if (active_proc == INVALID_PROC_NUMBER)
{
MyReplicationSlot = s;
- s->active_pid = MyProcPid;
+ s->active_proc = MyProcNumber;
}
SpinLockRelease(&s->mutex);
@@ -1579,11 +1582,11 @@ ReplicationSlotsDropDBSlots(Oid dboid)
* XXX: We can consider shutting down the slot sync worker before
* trying to drop synced temporary slots here.
*/
- if (active_pid)
+ if (active_proc != INVALID_PROC_NUMBER)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("replication slot \"%s\" is active for PID %d",
- slotname, active_pid)));
+ slotname, GetPGProcByNumber(active_proc)->pid)));
/*
* To avoid duplicating ReplicationSlotDropAcquired() and to avoid
@@ -1974,6 +1977,7 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
{
XLogRecPtr restart_lsn;
NameData slotname;
+ ProcNumber active_proc;
int active_pid = 0;
ReplicationSlotInvalidationCause invalidation_cause = RS_INVAL_NONE;
TimestampTz now = 0;
@@ -2027,7 +2031,7 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
}
slotname = s->data.name;
- active_pid = s->active_pid;
+ active_proc = s->active_proc;
/*
* If the slot can be acquired, do so and mark it invalidated
@@ -2039,10 +2043,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
* is terminated. So, the inactive slot can only be invalidated
* immediately without being terminated.
*/
- if (active_pid == 0)
+ if (active_proc == INVALID_PROC_NUMBER)
{
MyReplicationSlot = s;
- s->active_pid = MyProcPid;
+ s->active_proc = MyProcNumber;
s->data.invalidated = invalidation_cause;
/*
@@ -2058,6 +2062,11 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
/* Let caller know */
invalidated = true;
}
+ else
+ {
+ active_pid = GetPGProcByNumber(active_proc)->pid;
+ Assert(active_pid != 0);
+ }
SpinLockRelease(&s->mutex);
@@ -2073,7 +2082,7 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
&slot_idle_usecs);
}
- if (active_pid != 0)
+ if (active_proc != INVALID_PROC_NUMBER)
{
/*
* Prepare the sleep on the slot's condition variable before
@@ -2105,9 +2114,9 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
slot_idle_secs);
if (MyBackendType == B_STARTUP)
- (void) SendProcSignal(active_pid,
- PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
- INVALID_PROC_NUMBER);
+ (void) SignalRecoveryConflict(GetPGProcByNumber(active_proc),
+ active_pid,
+ RECOVERY_CONFLICT_LOGICALSLOT);
else
(void) kill(active_pid, SIGTERM);
@@ -2875,7 +2884,7 @@ RestoreSlotFromDisk(const char *name)
slot->candidate_restart_valid = InvalidXLogRecPtr;
slot->in_use = true;
- slot->active_pid = 0;
+ slot->active_proc = INVALID_PROC_NUMBER;
/*
* Set the time since the slot has become inactive after loading the
@@ -3158,7 +3167,7 @@ StandbySlotsHaveCaughtup(XLogRecPtr wait_for_lsn, int elevel)
SpinLockAcquire(&slot->mutex);
restart_lsn = slot->data.restart_lsn;
invalidated = slot->data.invalidated != RS_INVAL_NONE;
- inactive = slot->active_pid == 0;
+ inactive = slot->active_proc == INVALID_PROC_NUMBER;
SpinLockRelease(&slot->mutex);
if (invalidated)
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 1ed2d80c2d2..9f5e4f998fe 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -20,6 +20,7 @@
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/slotsync.h"
+#include "storage/proc.h"
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/pg_lsn.h"
@@ -309,10 +310,10 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
values[i++] = ObjectIdGetDatum(slot_contents.data.database);
values[i++] = BoolGetDatum(slot_contents.data.persistency == RS_TEMPORARY);
- values[i++] = BoolGetDatum(slot_contents.active_pid != 0);
+ values[i++] = BoolGetDatum(slot_contents.active_proc != INVALID_PROC_NUMBER);
- if (slot_contents.active_pid != 0)
- values[i++] = Int32GetDatum(slot_contents.active_pid);
+ if (slot_contents.active_proc != INVALID_PROC_NUMBER)
+ values[i++] = Int32GetDatum(GetPGProcByNumber(slot_contents.active_proc)->pid);
else
nulls[i++] = true;
@@ -377,13 +378,13 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
*/
if (XLogRecPtrIsValid(slot_contents.data.restart_lsn))
{
- int pid;
+ ProcNumber procno;
SpinLockAcquire(&slot->mutex);
- pid = slot->active_pid;
+ procno = slot->active_proc;
slot_contents.data.restart_lsn = slot->data.restart_lsn;
SpinLockRelease(&slot->mutex);
- if (pid != 0)
+ if (procno != INVALID_PROC_NUMBER)
{
values[i++] = CStringGetTextDatum("unreserved");
walstate = WALAVAIL_UNRESERVED;
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index e7bee777532..d1582a5d711 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -355,7 +355,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
pg_read_barrier();
Assert(dlist_node_is_detached(&MyProc->syncRepLinks));
MyProc->syncRepState = SYNC_REP_NOT_WAITING;
- MyProc->waitLSN = 0;
+ MyProc->waitLSN = InvalidXLogRecPtr;
/* reset ps display to remove the suffix */
if (update_process_title)
@@ -1027,7 +1027,7 @@ SyncRepQueueIsOrderedByLSN(int mode)
Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);
- lastLSN = 0;
+ lastLSN = InvalidXLogRecPtr;
dlist_foreach(iter, &WalSndCtl->SyncRepQueue[mode])
{
@@ -1077,6 +1077,7 @@ check_synchronous_standby_names(char **newval, void **extra, GucSource source)
if (syncrep_parse_error_msg)
GUC_check_errdetail("%s", syncrep_parse_error_msg);
else
+ /* translator: %s is a GUC name */
GUC_check_errdetail("\"%s\" parser failed.",
"synchronous_standby_names");
return false;
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6970af3f3ff..7c1b8757d7d 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -169,7 +169,6 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
Assert(startup_data_len == 0);
- MyBackendType = B_WAL_RECEIVER;
AuxiliaryProcessMainCommon();
/*
@@ -193,7 +192,7 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
case WALRCV_STOPPING:
/* If we've already been requested to stop, don't start up. */
walrcv->walRcvState = WALRCV_STOPPED;
- /* fall through */
+ pg_fallthrough;
case WALRCV_STOPPED:
SpinLockRelease(&walrcv->mutex);
@@ -1122,8 +1121,8 @@ XLogWalRcvClose(XLogRecPtr recptr, TimeLineID tli)
static void
XLogWalRcvSendReply(bool force, bool requestReply)
{
- static XLogRecPtr writePtr = 0;
- static XLogRecPtr flushPtr = 0;
+ static XLogRecPtr writePtr = InvalidXLogRecPtr;
+ static XLogRecPtr flushPtr = InvalidXLogRecPtr;
XLogRecPtr applyPtr;
TimestampTz now;
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c
index 42e3e170bc0..e62e8a20420 100644
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -216,7 +216,7 @@ ShutdownWalRcv(void)
case WALRCV_WAITING:
case WALRCV_RESTARTING:
walrcv->walRcvState = WALRCV_STOPPING;
- /* fall through */
+ pg_fallthrough;
case WALRCV_STOPPING:
walrcvpid = walrcv->pid;
break;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index a0e6a3d200c..2cde8ebc729 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1611,6 +1611,32 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
ProcessPendingWrites();
}
+/*
+ * Handle configuration reload.
+ *
+ * Process the pending configuration file reload and reinitializes synchronous
+ * replication settings. Also releases any waiters that may now be satisfied due
+ * to changes in synchronous replication requirements.
+ */
+static void
+WalSndHandleConfigReload(void)
+{
+ if (!ConfigReloadPending)
+ return;
+
+ ConfigReloadPending = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ SyncRepInitConfig();
+
+ /*
+ * Recheck and release any now-satisfied waiters after config reload
+ * changes synchronous replication requirements (e.g., reducing the number
+ * of sync standbys or changing the standby names).
+ */
+ if (!am_cascading_walsender)
+ SyncRepReleaseWaiters();
+}
+
/*
* Wait until there is no pending write. Also process replies from the other
* side and check timeouts during that.
@@ -1646,12 +1672,7 @@ ProcessPendingWrites(void)
CHECK_FOR_INTERRUPTS();
/* Process any requests or signals received recently */
- if (ConfigReloadPending)
- {
- ConfigReloadPending = false;
- ProcessConfigFile(PGC_SIGHUP);
- SyncRepInitConfig();
- }
+ WalSndHandleConfigReload();
/* Try to flush pending output to the client */
if (pq_flush_if_writable() != 0)
@@ -1854,12 +1875,7 @@ WalSndWaitForWal(XLogRecPtr loc)
CHECK_FOR_INTERRUPTS();
/* Process any requests or signals received recently */
- if (ConfigReloadPending)
- {
- ConfigReloadPending = false;
- ProcessConfigFile(PGC_SIGHUP);
- SyncRepInitConfig();
- }
+ WalSndHandleConfigReload();
/* Check for input from the client */
ProcessRepliesIfAny();
@@ -2899,12 +2915,7 @@ WalSndLoop(WalSndSendDataCallback send_data)
CHECK_FOR_INTERRUPTS();
/* Process any requests or signals received recently */
- if (ConfigReloadPending)
- {
- ConfigReloadPending = false;
- ProcessConfigFile(PGC_SIGHUP);
- SyncRepInitConfig();
- }
+ WalSndHandleConfigReload();
/* Check for input from the client */
ProcessRepliesIfAny();
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 19dcce80ec4..7c99290be4d 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -658,6 +658,19 @@ rewriteRuleAction(Query *parsetree,
rule_action = sub_action;
}
+ /*
+ * If rule_action is INSERT .. ON CONFLICT DO SELECT, the parser should
+ * have verified that it has a RETURNING clause, but we must also check
+ * that the triggering query has a RETURNING clause.
+ */
+ if (rule_action->onConflict &&
+ rule_action->onConflict->action == ONCONFLICT_SELECT &&
+ (!rule_action->returningList || !parsetree->returningList))
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("ON CONFLICT DO SELECT requires a RETURNING clause"),
+ errdetail("A rule action is INSERT ... ON CONFLICT DO SELECT, which requires a RETURNING clause."));
+
/*
* If rule_action has a RETURNING clause, then either throw it away if the
* triggering query has no RETURNING clause, or rewrite it to emit what
@@ -3643,11 +3656,12 @@ rewriteTargetView(Query *parsetree, Relation view)
}
/*
- * For INSERT .. ON CONFLICT .. DO UPDATE, we must also update assorted
- * stuff in the onConflict data structure.
+ * For INSERT .. ON CONFLICT .. DO SELECT/UPDATE, we must also update
+ * assorted stuff in the onConflict data structure.
*/
if (parsetree->onConflict &&
- parsetree->onConflict->action == ONCONFLICT_UPDATE)
+ (parsetree->onConflict->action == ONCONFLICT_UPDATE ||
+ parsetree->onConflict->action == ONCONFLICT_SELECT))
{
Index old_exclRelIndex,
new_exclRelIndex;
@@ -3656,9 +3670,8 @@ rewriteTargetView(Query *parsetree, Relation view)
List *tmp_tlist;
/*
- * Like the INSERT/UPDATE code above, update the resnos in the
- * auxiliary UPDATE targetlist to refer to columns of the base
- * relation.
+ * For ON CONFLICT DO UPDATE, update the resnos in the auxiliary
+ * UPDATE targetlist to refer to columns of the base relation.
*/
foreach(lc, parsetree->onConflict->onConflictSet)
{
@@ -3677,7 +3690,7 @@ rewriteTargetView(Query *parsetree, Relation view)
}
/*
- * Also, create a new RTE for the EXCLUDED pseudo-relation, using the
+ * Create a new RTE for the EXCLUDED pseudo-relation, using the
* query's new base rel (which may well have a different column list
* from the view, hence we need a new column alias list). This should
* match transformOnConflictClause. In particular, note that the
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index 93a205d02bc..e88a1bc1a89 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -301,40 +301,48 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
}
/*
- * For INSERT ... ON CONFLICT DO UPDATE we need additional policy
- * checks for the UPDATE which may be applied to the same RTE.
+ * For INSERT ... ON CONFLICT DO SELECT/UPDATE we need additional
+ * policy checks for the SELECT/UPDATE which may be applied to the
+ * same RTE.
*/
- if (commandType == CMD_INSERT &&
- root->onConflict && root->onConflict->action == ONCONFLICT_UPDATE)
+ if (commandType == CMD_INSERT && root->onConflict &&
+ (root->onConflict->action == ONCONFLICT_UPDATE ||
+ root->onConflict->action == ONCONFLICT_SELECT))
{
- List *conflict_permissive_policies;
- List *conflict_restrictive_policies;
+ List *conflict_permissive_policies = NIL;
+ List *conflict_restrictive_policies = NIL;
List *conflict_select_permissive_policies = NIL;
List *conflict_select_restrictive_policies = NIL;
- /* Get the policies that apply to the auxiliary UPDATE */
- get_policies_for_relation(rel, CMD_UPDATE, user_id,
- &conflict_permissive_policies,
- &conflict_restrictive_policies);
-
- /*
- * Enforce the USING clauses of the UPDATE policies using WCOs
- * rather than security quals. This ensures that an error is
- * raised if the conflicting row cannot be updated due to RLS,
- * rather than the change being silently dropped.
- */
- add_with_check_options(rel, rt_index,
- WCO_RLS_CONFLICT_CHECK,
- conflict_permissive_policies,
- conflict_restrictive_policies,
- withCheckOptions,
- hasSubLinks,
- true);
+ if (perminfo->requiredPerms & ACL_UPDATE)
+ {
+ /*
+ * Get the policies that apply to the auxiliary UPDATE or
+ * SELECT FOR UPDATE/SHARE.
+ */
+ get_policies_for_relation(rel, CMD_UPDATE, user_id,
+ &conflict_permissive_policies,
+ &conflict_restrictive_policies);
+
+ /*
+ * Enforce the USING clauses of the UPDATE policies using WCOs
+ * rather than security quals. This ensures that an error is
+ * raised if the conflicting row cannot be updated/locked due
+ * to RLS, rather than the change being silently dropped.
+ */
+ add_with_check_options(rel, rt_index,
+ WCO_RLS_CONFLICT_CHECK,
+ conflict_permissive_policies,
+ conflict_restrictive_policies,
+ withCheckOptions,
+ hasSubLinks,
+ true);
+ }
/*
* Get and add ALL/SELECT policies, as WCO_RLS_CONFLICT_CHECK WCOs
- * to ensure they are considered when taking the UPDATE path of an
- * INSERT .. ON CONFLICT DO UPDATE, if SELECT rights are required
+ * to ensure they are considered when taking the SELECT/UPDATE
+ * path of an INSERT .. ON CONFLICT, if SELECT rights are required
* for this relation, also as WCO policies, again, to avoid
* silently dropping data. See above.
*/
@@ -352,29 +360,36 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
true);
}
- /* Enforce the WITH CHECK clauses of the UPDATE policies */
- add_with_check_options(rel, rt_index,
- WCO_RLS_UPDATE_CHECK,
- conflict_permissive_policies,
- conflict_restrictive_policies,
- withCheckOptions,
- hasSubLinks,
- false);
-
/*
- * Add ALL/SELECT policies as WCO_RLS_UPDATE_CHECK WCOs, to ensure
- * that the final updated row is visible when taking the UPDATE
- * path of an INSERT .. ON CONFLICT DO UPDATE, if SELECT rights
- * are required for this relation.
+ * For INSERT .. ON CONFLICT DO UPDATE, add additional policies to
+ * be checked when the auxiliary UPDATE is executed.
*/
- if (perminfo->requiredPerms & ACL_SELECT)
+ if (root->onConflict->action == ONCONFLICT_UPDATE)
+ {
+ /* Enforce the WITH CHECK clauses of the UPDATE policies */
add_with_check_options(rel, rt_index,
WCO_RLS_UPDATE_CHECK,
- conflict_select_permissive_policies,
- conflict_select_restrictive_policies,
+ conflict_permissive_policies,
+ conflict_restrictive_policies,
withCheckOptions,
hasSubLinks,
- true);
+ false);
+
+ /*
+ * Add ALL/SELECT policies as WCO_RLS_UPDATE_CHECK WCOs, to
+ * ensure that the final updated row is visible when taking
+ * the UPDATE path of an INSERT .. ON CONFLICT, if SELECT
+ * rights are required for this relation.
+ */
+ if (perminfo->requiredPerms & ACL_SELECT)
+ add_with_check_options(rel, rt_index,
+ WCO_RLS_UPDATE_CHECK,
+ conflict_select_permissive_policies,
+ conflict_select_restrictive_policies,
+ withCheckOptions,
+ hasSubLinks,
+ true);
+ }
}
}
@@ -398,8 +413,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
* XXX We are setting up USING quals as WITH CHECK. If RLS prohibits
* UPDATE/DELETE on the target row, we shall throw an error instead of
* silently ignoring the row. This is different than how normal
- * UPDATE/DELETE works and more in line with INSERT ON CONFLICT DO UPDATE
- * handling.
+ * UPDATE/DELETE works and more in line with INSERT ON CONFLICT DO
+ * SELECT/UPDATE handling.
*/
if (commandType == CMD_MERGE)
{
@@ -784,9 +799,9 @@ add_security_quals(int rt_index,
* added by an INSERT or UPDATE are consistent with the specified RLS
* policies. Normally new data must satisfy the WITH CHECK clauses from the
* policies. If a policy has no explicit WITH CHECK clause, its USING clause
- * is used instead. In the special case of an UPDATE arising from an
- * INSERT ... ON CONFLICT DO UPDATE, existing records are first checked using
- * a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING
+ * is used instead. In the special case of a SELECT or UPDATE arising from an
+ * INSERT ... ON CONFLICT DO SELECT/UPDATE, existing records are first checked
+ * using a WCO_RLS_CONFLICT_CHECK WithCheckOption, which always uses the USING
* clauses from RLS policies.
*
* New WCOs are added to withCheckOptions, and hasSubLinks is set to true if
diff --git a/src/backend/statistics/extended_stats_funcs.c b/src/backend/statistics/extended_stats_funcs.c
index db107684607..479f74652be 100644
--- a/src/backend/statistics/extended_stats_funcs.c
+++ b/src/backend/statistics/extended_stats_funcs.c
@@ -347,9 +347,8 @@ extended_statistics_update(FunctionCallInfo fcinfo)
{
ereport(WARNING,
errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("could not find extended statistics object \"%s\".\"%s\"",
- quote_identifier(nspname),
- quote_identifier(stxname)));
+ errmsg("could not find extended statistics object \"%s.%s\"",
+ nspname, stxname));
success = false;
goto cleanup;
}
@@ -364,11 +363,9 @@ extended_statistics_update(FunctionCallInfo fcinfo)
{
ereport(WARNING,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("could not restore extended statistics object \"%s\".\"%s\": incorrect relation \"%s\".\"%s\" specified",
- quote_identifier(nspname),
- quote_identifier(stxname),
- quote_identifier(relnspname),
- quote_identifier(relname)));
+ errmsg("could not restore extended statistics object \"%s.%s\": incorrect relation \"%s.%s\" specified",
+ nspname, stxname,
+ relnspname, relname));
success = false;
goto cleanup;
@@ -420,9 +417,8 @@ extended_statistics_update(FunctionCallInfo fcinfo)
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot specify parameter \"%s\"",
extarginfo[NDISTINCT_ARG].argname),
- errhint("Extended statistics object \"%s\".\"%s\" does not support statistics of this type.",
- quote_identifier(nspname),
- quote_identifier(stxname)));
+ errhint("Extended statistics object \"%s.%s\" does not support statistics of this type.",
+ nspname, stxname));
has.ndistinct = false;
success = false;
@@ -438,9 +434,8 @@ extended_statistics_update(FunctionCallInfo fcinfo)
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot specify parameter \"%s\"",
extarginfo[DEPENDENCIES_ARG].argname),
- errhint("Extended statistics object \"%s\".\"%s\" does not support statistics of this type.",
- quote_identifier(nspname),
- quote_identifier(stxname)));
+ errhint("Extended statistics object \"%s.%s\" does not support statistics of this type.",
+ nspname, stxname));
has.dependencies = false;
success = false;
}
@@ -463,9 +458,8 @@ extended_statistics_update(FunctionCallInfo fcinfo)
extarginfo[MOST_COMMON_VALS_ARG].argname,
extarginfo[MOST_COMMON_FREQS_ARG].argname,
extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname),
- errhint("Extended statistics object \"%s\".\"%s\" does not support statistics of this type.",
- quote_identifier(nspname),
- quote_identifier(stxname)));
+ errhint("Extended statistics object \"%s.%s\" does not support statistics of this type.",
+ nspname, stxname));
has.mcv = false;
success = false;
@@ -539,7 +533,7 @@ extended_statistics_update(FunctionCallInfo fcinfo)
/*
* After all the positive number attnums in stxkeys come the negative
* numbers (if any) which represent expressions in the order that they
- * appear in stxdexprs. Because the expressions are always
+ * appear in stxdexpr. Because the expressions are always
* monotonically decreasing from -1, there is no point in looking at
* the values in stxkeys, it's enough to know how many of them there
* are.
@@ -888,7 +882,7 @@ pg_clear_extended_stats(PG_FUNCTION_ARGS)
table_close(pg_stext, RowExclusiveLock);
ereport(WARNING,
errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("could not find extended statistics object \"%s\".\"%s\"",
+ errmsg("could not find extended statistics object \"%s.%s\"",
nspname, stxname));
PG_RETURN_VOID();
}
@@ -904,7 +898,7 @@ pg_clear_extended_stats(PG_FUNCTION_ARGS)
table_close(pg_stext, RowExclusiveLock);
ereport(WARNING,
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("could not clear extended statistics object \"%s\".\"%s\": incorrect relation \"%s\".\"%s\" specified",
+ errmsg("could not clear extended statistics object \"%s.%s\": incorrect relation \"%s.%s\" specified",
get_namespace_name(nspoid), stxname,
relnspname, relname));
PG_RETURN_VOID();
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index d2c9cd6f20a..e4ae3031fef 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -622,7 +622,7 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
pgaio_method_ops->wait_one(ioh, ref_generation);
continue;
}
- /* fallthrough */
+ pg_fallthrough;
/* waiting for owner to submit */
case PGAIO_HS_DEFINED:
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index d7c144cd8f7..d9617c20e76 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -390,7 +390,6 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
volatile int error_errno = 0;
char cmd[128];
- MyBackendType = B_IO_WORKER;
AuxiliaryProcessMainCommon();
pqsignal(SIGHUP, SignalHandlerForConfigReload);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6f935648ae9..d1babaff023 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -59,6 +59,7 @@
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/proclist.h"
+#include "storage/procsignal.h"
#include "storage/read_stream.h"
#include "storage/smgr.h"
#include "storage/standby.h"
@@ -5895,6 +5896,13 @@ BufferLockUnlock(Buffer buffer, BufferDesc *buf_hdr)
/*
* Acquire the content lock for the buffer, but only if we don't have to wait.
+ *
+ * It is allowed to try to conditionally acquire a lock on a buffer that this
+ * backend has already locked, but the lock acquisition will always fail, even
+ * if the new lock acquisition does not conflict with an already held lock
+ * (e.g. two share locks). This is because we currently do not have space to
+ * track multiple lock ownerships of the same buffer within one backend. That
+ * is ok for the current uses of BufferLockConditional().
*/
static bool
BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
@@ -5903,9 +5911,12 @@ BufferLockConditional(Buffer buffer, BufferDesc *buf_hdr, BufferLockMode mode)
bool mustwait;
/*
- * We better not already hold a lock on the buffer.
+ * As described above, if we're trying to lock a buffer this backend
+ * already has locked, return false, independent of the existing and
+ * desired lock level.
*/
- Assert(entry->data.lockmode == BUFFER_LOCK_UNLOCK);
+ if (entry->data.lockmode != BUFFER_LOCK_UNLOCK)
+ return false;
/*
* Lock out cancel/die interrupts until we exit the code section protected
@@ -6560,7 +6571,7 @@ LockBufferForCleanup(Buffer buffer)
* deadlock_timeout for it.
*/
if (logged_recovery_conflict)
- LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN,
waitStart, GetCurrentTimestamp(),
NULL, false);
@@ -6611,7 +6622,7 @@ LockBufferForCleanup(Buffer buffer)
if (TimestampDifferenceExceeds(waitStart, now,
DeadlockTimeout))
{
- LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ LogRecoveryConflict(RECOVERY_CONFLICT_BUFFERPIN,
waitStart, now, NULL, true);
logged_recovery_conflict = true;
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0f8083651de..5d07b64a1ef 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -164,6 +164,9 @@ bool data_sync_retry = false;
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+/* How data files should be bulk-extended with zeros. */
+int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
+
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 2a3dfedf7e9..1f7e933d500 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -212,12 +212,10 @@ CreateSharedMemoryAndSemaphores(void)
Assert(strcmp("unknown",
GetConfigOption("huge_pages_status", false, false)) != 0);
- InitShmemAccess(seghdr);
-
/*
* Set up shared memory allocation mechanism
*/
- InitShmemAllocation();
+ InitShmemAllocator(seghdr);
/* Initialize subsystems */
CreateOrAttachShmemStructs();
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 6be565155ab..40312df2cac 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -60,6 +60,7 @@
#include "port/pg_lfind.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "storage/procsignal.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/injection_point.h"
@@ -708,7 +709,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
- proc->recoveryConflictPending = false;
+ pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0);
/* must be cleared with xid/xmin: */
/* avoid unnecessarily dirtying shared cachelines */
@@ -750,7 +751,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
- proc->recoveryConflictPending = false;
+ pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0);
/* must be cleared with xid/xmin: */
/* avoid unnecessarily dirtying shared cachelines */
@@ -933,7 +934,7 @@ ProcArrayClearTransaction(PGPROC *proc)
proc->vxid.lxid = InvalidLocalTransactionId;
proc->xmin = InvalidTransactionId;
- proc->recoveryConflictPending = false;
+ pg_atomic_write_u32(&proc->pendingRecoveryConflicts, 0);
Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK));
Assert(!proc->delayChkptFlags);
@@ -3445,19 +3446,46 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
}
/*
- * CancelVirtualTransaction - used in recovery conflict processing
+ * SignalRecoveryConflict -- signal that a process is blocking recovery
*
- * Returns pid of the process signaled, or 0 if not found.
+ * The 'pid' is redundant with 'proc', but it acts as a cross-check to
+ * detect process had exited and the PGPROC entry was reused for a different
+ * process.
+ *
+ * Returns true if the process was signaled, or false if not found.
*/
-pid_t
-CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
+bool
+SignalRecoveryConflict(PGPROC *proc, pid_t pid, RecoveryConflictReason reason)
{
- return SignalVirtualTransaction(vxid, sigmode, true);
+ bool found = false;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ /*
+ * Kill the pid if it's still here. If not, that's what we wanted so
+ * ignore any errors.
+ */
+ if (proc->pid == pid)
+ {
+ (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason));
+
+ /* wake up the process */
+ (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, GetNumberFromPGProc(proc));
+ found = true;
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ return found;
}
-pid_t
-SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
- bool conflictPending)
+/*
+ * SignalRecoveryConflictWithVirtualXID -- signal that a VXID is blocking recovery
+ *
+ * Like SignalRecoveryConflict, but the target is identified by VXID
+ */
+bool
+SignalRecoveryConflictWithVirtualXID(VirtualTransactionId vxid, RecoveryConflictReason reason)
{
ProcArrayStruct *arrayP = procArray;
int index;
@@ -3476,15 +3504,16 @@ SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
if (procvxid.procNumber == vxid.procNumber &&
procvxid.localTransactionId == vxid.localTransactionId)
{
- proc->recoveryConflictPending = conflictPending;
pid = proc->pid;
if (pid != 0)
{
+ (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason));
+
/*
* Kill the pid if it's still here. If not, that's what we
* wanted so ignore any errors.
*/
- (void) SendProcSignal(pid, sigmode, vxid.procNumber);
+ (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, vxid.procNumber);
}
break;
}
@@ -3492,7 +3521,50 @@ SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
LWLockRelease(ProcArrayLock);
- return pid;
+ return pid != 0;
+}
+
+/*
+ * SignalRecoveryConflictWithDatabase --- signal all backends specified database
+ *
+ * Like SignalRecoveryConflict, but signals all backends using the database.
+ */
+void
+SignalRecoveryConflictWithDatabase(Oid databaseid, RecoveryConflictReason reason)
+{
+ ProcArrayStruct *arrayP = procArray;
+ int index;
+
+ /* tell all backends to die */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ PGPROC *proc = &allProcs[pgprocno];
+
+ if (databaseid == InvalidOid || proc->databaseId == databaseid)
+ {
+ VirtualTransactionId procvxid;
+ pid_t pid;
+
+ GET_VXID_FROM_PGPROC(procvxid, *proc);
+
+ pid = proc->pid;
+ if (pid != 0)
+ {
+ (void) pg_atomic_fetch_or_u32(&proc->pendingRecoveryConflicts, (1 << reason));
+
+ /*
+ * Kill the pid if it's still here. If not, that's what we
+ * wanted so ignore any errors.
+ */
+ (void) SendProcSignal(pid, PROCSIG_RECOVERY_CONFLICT, procvxid.procNumber);
+ }
+ }
+ }
+
+ LWLockRelease(ProcArrayLock);
}
/*
@@ -3602,7 +3674,7 @@ CountDBConnections(Oid databaseid)
if (proc->pid == 0)
continue; /* do not count prepared xacts */
- if (!proc->isRegularBackend)
+ if (proc->backendType != B_BACKEND)
continue; /* count only regular backend processes */
if (!OidIsValid(databaseid) ||
proc->databaseId == databaseid)
@@ -3614,46 +3686,6 @@ CountDBConnections(Oid databaseid)
return count;
}
-/*
- * CancelDBBackends --- cancel backends that are using specified database
- */
-void
-CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
-{
- ProcArrayStruct *arrayP = procArray;
- int index;
-
- /* tell all backends to die */
- LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
-
- for (index = 0; index < arrayP->numProcs; index++)
- {
- int pgprocno = arrayP->pgprocnos[index];
- PGPROC *proc = &allProcs[pgprocno];
-
- if (databaseid == InvalidOid || proc->databaseId == databaseid)
- {
- VirtualTransactionId procvxid;
- pid_t pid;
-
- GET_VXID_FROM_PGPROC(procvxid, *proc);
-
- proc->recoveryConflictPending = conflictPending;
- pid = proc->pid;
- if (pid != 0)
- {
- /*
- * Kill the pid if it's still here. If not, that's what we
- * wanted so ignore any errors.
- */
- (void) SendProcSignal(pid, sigmode, procvxid.procNumber);
- }
- }
- }
-
- LWLockRelease(ProcArrayLock);
-}
-
/*
* CountUserBackends --- count backends that are used by specified user
* (only regular backends, not any type of background worker)
@@ -3674,7 +3706,7 @@ CountUserBackends(Oid roleid)
if (proc->pid == 0)
continue; /* do not count prepared xacts */
- if (!proc->isRegularBackend)
+ if (proc->backendType != B_BACKEND)
continue; /* count only regular backend processes */
if (proc->roleId == roleid)
count++;
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index 8e56922dcea..5d33559926a 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -697,26 +697,8 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE))
HandleParallelApplyMessageInterrupt();
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_DATABASE))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_DATABASE);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_TABLESPACE))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_TABLESPACE);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOCK))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOCK);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
-
- if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
- HandleRecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT))
+ HandleRecoveryConflictInterrupt();
SetLatch(MyLatch);
}
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 1b536363152..9f362ce8641 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -76,20 +76,33 @@
#include "storage/spin.h"
#include "utils/builtins.h"
+/*
+ * This is the first data structure stored in the shared memory segment, at
+ * the offset that PGShmemHeader->content_offset points to. Allocations by
+ * ShmemAlloc() are carved out of the space after this.
+ *
+ * For the base pointer and the total size of the shmem segment, we rely on
+ * the PGShmemHeader.
+ */
+typedef struct ShmemAllocatorData
+{
+ Size free_offset; /* offset to first free space from ShmemBase */
+ HTAB *index; /* copy of ShmemIndex */
+
+ /* protects shared memory and LWLock allocation */
+ slock_t shmem_lock;
+} ShmemAllocatorData;
+
static void *ShmemAllocRaw(Size size, Size *allocated_size);
-static void *ShmemAllocUnlocked(Size size);
/* shared memory global variables */
static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
-
static void *ShmemBase; /* start address of shared memory */
-
static void *ShmemEnd; /* end+1 address of shared memory */
-slock_t *ShmemLock; /* spinlock for shared memory and LWLock
- * allocation */
-
+static ShmemAllocatorData *ShmemAllocator;
+slock_t *ShmemLock; /* points to ShmemAllocator->shmem_lock */
static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
/* To get reliable results for NUMA inquiry we need to "touch pages" once */
@@ -98,49 +111,64 @@ static bool firstNumaTouch = true;
Datum pg_numa_available(PG_FUNCTION_ARGS);
/*
- * InitShmemAccess() --- set up basic pointers to shared memory.
+ * InitShmemAllocator() --- set up basic pointers to shared memory.
+ *
+ * Called at postmaster or stand-alone backend startup, to initialize the
+ * allocator's data structure in the shared memory segment. In EXEC_BACKEND,
+ * this is also called at backend startup, to set up pointers to the shared
+ * memory areas.
*/
void
-InitShmemAccess(PGShmemHeader *seghdr)
+InitShmemAllocator(PGShmemHeader *seghdr)
{
+ Assert(seghdr != NULL);
+
+ /*
+ * We assume the pointer and offset are MAXALIGN. Not a hard requirement,
+ * but it's true today and keeps the math below simpler.
+ */
+ Assert(seghdr == (void *) MAXALIGN(seghdr));
+ Assert(seghdr->content_offset == MAXALIGN(seghdr->content_offset));
+
ShmemSegHdr = seghdr;
ShmemBase = seghdr;
ShmemEnd = (char *) ShmemBase + seghdr->totalsize;
-}
-/*
- * InitShmemAllocation() --- set up shared-memory space allocation.
- *
- * This should be called only in the postmaster or a standalone backend.
- */
-void
-InitShmemAllocation(void)
-{
- PGShmemHeader *shmhdr = ShmemSegHdr;
- char *aligned;
+#ifndef EXEC_BACKEND
+ Assert(!IsUnderPostmaster);
+#endif
+ if (IsUnderPostmaster)
+ {
+ PGShmemHeader *shmhdr = ShmemSegHdr;
- Assert(shmhdr != NULL);
+ ShmemAllocator = (ShmemAllocatorData *) ((char *) shmhdr + shmhdr->content_offset);
+ ShmemLock = &ShmemAllocator->shmem_lock;
+ }
+ else
+ {
+ Size offset;
- /*
- * Initialize the spinlock used by ShmemAlloc. We must use
- * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
- */
- ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
+ /*
+ * Allocations after this point should go through ShmemAlloc, which
+ * expects to allocate everything on cache line boundaries. Make sure
+ * the first allocation begins on a cache line boundary.
+ */
+ offset = CACHELINEALIGN(seghdr->content_offset + sizeof(ShmemAllocatorData));
+ if (offset > seghdr->totalsize)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory (%zu bytes requested)",
+ offset)));
- SpinLockInit(ShmemLock);
+ ShmemAllocator = (ShmemAllocatorData *) ((char *) seghdr + seghdr->content_offset);
- /*
- * Allocations after this point should go through ShmemAlloc, which
- * expects to allocate everything on cache line boundaries. Make sure the
- * first allocation begins on a cache line boundary.
- */
- aligned = (char *)
- (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
- shmhdr->freeoffset = aligned - (char *) shmhdr;
-
- /* ShmemIndex can't be set up yet (need LWLocks first) */
- shmhdr->index = NULL;
- ShmemIndex = (HTAB *) NULL;
+ SpinLockInit(&ShmemAllocator->shmem_lock);
+ ShmemLock = &ShmemAllocator->shmem_lock;
+ ShmemAllocator->free_offset = offset;
+ /* ShmemIndex can't be set up yet (need LWLocks first) */
+ ShmemAllocator->index = NULL;
+ ShmemIndex = (HTAB *) NULL;
+ }
}
/*
@@ -209,13 +237,13 @@ ShmemAllocRaw(Size size, Size *allocated_size)
SpinLockAcquire(ShmemLock);
- newStart = ShmemSegHdr->freeoffset;
+ newStart = ShmemAllocator->free_offset;
newFree = newStart + size;
if (newFree <= ShmemSegHdr->totalsize)
{
newSpace = (char *) ShmemBase + newStart;
- ShmemSegHdr->freeoffset = newFree;
+ ShmemAllocator->free_offset = newFree;
}
else
newSpace = NULL;
@@ -228,45 +256,6 @@ ShmemAllocRaw(Size size, Size *allocated_size)
return newSpace;
}
-/*
- * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
- *
- * Allocate space without locking ShmemLock. This should be used for,
- * and only for, allocations that must happen before ShmemLock is ready.
- *
- * We consider maxalign, rather than cachealign, sufficient here.
- */
-static void *
-ShmemAllocUnlocked(Size size)
-{
- Size newStart;
- Size newFree;
- void *newSpace;
-
- /*
- * Ensure allocated space is adequately aligned.
- */
- size = MAXALIGN(size);
-
- Assert(ShmemSegHdr != NULL);
-
- newStart = ShmemSegHdr->freeoffset;
-
- newFree = newStart + size;
- if (newFree > ShmemSegHdr->totalsize)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of shared memory (%zu bytes requested)",
- size)));
- ShmemSegHdr->freeoffset = newFree;
-
- newSpace = (char *) ShmemBase + newStart;
-
- Assert(newSpace == (void *) MAXALIGN(newSpace));
-
- return newSpace;
-}
-
/*
* ShmemAddrIsValid -- test if an address refers to shared memory
*
@@ -395,16 +384,14 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr)
if (!ShmemIndex)
{
- PGShmemHeader *shmemseghdr = ShmemSegHdr;
-
/* Must be trying to create/attach to ShmemIndex itself */
Assert(strcmp(name, "ShmemIndex") == 0);
if (IsUnderPostmaster)
{
/* Must be initializing a (non-standalone) backend */
- Assert(shmemseghdr->index != NULL);
- structPtr = shmemseghdr->index;
+ Assert(ShmemAllocator->index != NULL);
+ structPtr = ShmemAllocator->index;
*foundPtr = true;
}
else
@@ -417,9 +404,9 @@ ShmemInitStruct(const char *name, Size size, bool *foundPtr)
* index has been initialized. This should be OK because no other
* process can be accessing shared memory yet.
*/
- Assert(shmemseghdr->index == NULL);
+ Assert(ShmemAllocator->index == NULL);
structPtr = ShmemAlloc(size);
- shmemseghdr->index = structPtr;
+ ShmemAllocator->index = structPtr;
*foundPtr = false;
}
LWLockRelease(ShmemIndexLock);
@@ -553,15 +540,15 @@ pg_get_shmem_allocations(PG_FUNCTION_ARGS)
/* output shared memory allocated but not counted via the shmem index */
values[0] = CStringGetTextDatum("");
nulls[1] = true;
- values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
+ values[2] = Int64GetDatum(ShmemAllocator->free_offset - named_allocated);
values[3] = values[2];
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
/* output as-of-yet unused shared memory */
nulls[0] = true;
- values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
+ values[1] = Int64GetDatum(ShmemAllocator->free_offset);
nulls[1] = false;
- values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
+ values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemAllocator->free_offset);
values[3] = values[2];
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index 6f7759cd720..d48b4fe3799 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -87,10 +87,7 @@ pg_signal_backend(int pid, int sig)
*/
if (!OidIsValid(proc->roleId) || superuser_arg(proc->roleId))
{
- ProcNumber procNumber = GetNumberFromPGProc(proc);
- BackendType backendType = pgstat_get_backend_type_by_proc_number(procNumber);
-
- if (backendType == B_AUTOVAC_WORKER)
+ if (proc->backendType == B_AUTOVAC_WORKER)
{
if (!has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_AUTOVACUUM_WORKER))
return SIGNAL_BACKEND_NOAUTOVAC;
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index afffab77106..d83afbfb9d6 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -71,13 +71,13 @@ static volatile sig_atomic_t got_standby_delay_timeout = false;
static volatile sig_atomic_t got_standby_lock_timeout = false;
static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
- ProcSignalReason reason,
+ RecoveryConflictReason reason,
uint32 wait_event_info,
bool report_waiting);
-static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
+static void SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason);
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
-static const char *get_recovery_conflict_desc(ProcSignalReason reason);
+static const char *get_recovery_conflict_desc(RecoveryConflictReason reason);
/*
* InitRecoveryTransactionEnvironment
@@ -271,7 +271,7 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
* to be resolved or not.
*/
void
-LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
+LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start,
TimestampTz now, VirtualTransactionId *wait_list,
bool still_waiting)
{
@@ -358,7 +358,8 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
*/
static void
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
- ProcSignalReason reason, uint32 wait_event_info,
+ RecoveryConflictReason reason,
+ uint32 wait_event_info,
bool report_waiting)
{
TimestampTz waitStart = 0;
@@ -384,19 +385,19 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
/* Is it time to kill it? */
if (WaitExceedsMaxStandbyDelay(wait_event_info))
{
- pid_t pid;
+ bool signaled;
/*
* Now find out who to throw out of the balloon.
*/
Assert(VirtualTransactionIdIsValid(*waitlist));
- pid = CancelVirtualTransaction(*waitlist, reason);
+ signaled = SignalRecoveryConflictWithVirtualXID(*waitlist, reason);
/*
* Wait a little bit for it to die so that we avoid flooding
* an unresponsive backend when system is heavily loaded.
*/
- if (pid != 0)
+ if (signaled)
pg_usleep(5000L);
}
@@ -489,7 +490,7 @@ ResolveRecoveryConflictWithSnapshot(TransactionId snapshotConflictHorizon,
backends = GetConflictingVirtualXIDs(snapshotConflictHorizon,
locator.dbOid);
ResolveRecoveryConflictWithVirtualXIDs(backends,
- PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
+ RECOVERY_CONFLICT_SNAPSHOT,
WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
true);
@@ -560,7 +561,7 @@ ResolveRecoveryConflictWithTablespace(Oid tsid)
temp_file_users = GetConflictingVirtualXIDs(InvalidTransactionId,
InvalidOid);
ResolveRecoveryConflictWithVirtualXIDs(temp_file_users,
- PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
+ RECOVERY_CONFLICT_TABLESPACE,
WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
true);
}
@@ -581,7 +582,7 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
*/
while (CountDBBackends(dbid) > 0)
{
- CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true);
+ SignalRecoveryConflictWithDatabase(dbid, RECOVERY_CONFLICT_DATABASE);
/*
* Wait awhile for them to die so that we avoid flooding an
@@ -665,7 +666,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
* because the caller, WaitOnLock(), has already reported that.
*/
ResolveRecoveryConflictWithVirtualXIDs(backends,
- PROCSIG_RECOVERY_CONFLICT_LOCK,
+ RECOVERY_CONFLICT_LOCK,
PG_WAIT_LOCK | locktag.locktag_type,
false);
}
@@ -723,9 +724,8 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
*/
while (VirtualTransactionIdIsValid(*backends))
{
- SignalVirtualTransaction(*backends,
- PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
- false);
+ (void) SignalRecoveryConflictWithVirtualXID(*backends,
+ RECOVERY_CONFLICT_STARTUP_DEADLOCK);
backends++;
}
@@ -803,7 +803,7 @@ ResolveRecoveryConflictWithBufferPin(void)
/*
* We're already behind, so clear a path as quickly as possible.
*/
- SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
}
else
{
@@ -843,7 +843,7 @@ ResolveRecoveryConflictWithBufferPin(void)
ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
if (got_standby_delay_timeout)
- SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
else if (got_standby_deadlock_timeout)
{
/*
@@ -859,7 +859,7 @@ ResolveRecoveryConflictWithBufferPin(void)
* not be so harmful because the period that the buffer is kept pinned
* is basically no so long. But we should fix this?
*/
- SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
}
/*
@@ -874,18 +874,18 @@ ResolveRecoveryConflictWithBufferPin(void)
}
static void
-SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
+SendRecoveryConflictWithBufferPin(RecoveryConflictReason reason)
{
- Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
- reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ Assert(reason == RECOVERY_CONFLICT_BUFFERPIN ||
+ reason == RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK);
/*
* We send signal to all backends to ask them if they are holding the
- * buffer pin which is delaying the Startup process. We must not set the
- * conflict flag yet, since most backends will be innocent. Let the
- * SIGUSR1 handling in each backend decide their own fate.
+ * buffer pin which is delaying the Startup process. Most of them will be
+ * innocent, but we let the SIGUSR1 handling in each backend decide their
+ * own fate.
*/
- CancelDBBackends(InvalidOid, reason, false);
+ SignalRecoveryConflictWithDatabase(InvalidOid, reason);
}
/*
@@ -1490,35 +1490,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
/* Return the description of recovery conflict */
static const char *
-get_recovery_conflict_desc(ProcSignalReason reason)
+get_recovery_conflict_desc(RecoveryConflictReason reason)
{
const char *reasonDesc = _("unknown reason");
switch (reason)
{
- case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ case RECOVERY_CONFLICT_BUFFERPIN:
reasonDesc = _("recovery conflict on buffer pin");
break;
- case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ case RECOVERY_CONFLICT_LOCK:
reasonDesc = _("recovery conflict on lock");
break;
- case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ case RECOVERY_CONFLICT_TABLESPACE:
reasonDesc = _("recovery conflict on tablespace");
break;
- case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ case RECOVERY_CONFLICT_SNAPSHOT:
reasonDesc = _("recovery conflict on snapshot");
break;
- case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
+ case RECOVERY_CONFLICT_LOGICALSLOT:
reasonDesc = _("recovery conflict on replication slot");
break;
- case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ reasonDesc = _("recovery conflict on deadlock");
+ break;
+ case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
reasonDesc = _("recovery conflict on buffer deadlock");
break;
- case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ case RECOVERY_CONFLICT_DATABASE:
reasonDesc = _("recovery conflict on database");
break;
- default:
- break;
}
return reasonDesc;
diff --git a/src/backend/storage/large_object/inv_api.c b/src/backend/storage/large_object/inv_api.c
index 466c1a856cb..a3cce496c20 100644
--- a/src/backend/storage/large_object/inv_api.c
+++ b/src/backend/storage/large_object/inv_api.c
@@ -142,7 +142,7 @@ getdatafield(Form_pg_largeobject tuple,
if (VARATT_IS_EXTENDED(datafield))
{
datafield = (bytea *)
- detoast_attr((struct varlena *) datafield);
+ detoast_attr((varlena *) datafield);
freeit = true;
}
len = VARSIZE(datafield) - VARHDRSZ;
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
index 8334a887618..c0c4ed57d9e 100644
--- a/src/backend/storage/lmgr/deadlock.c
+++ b/src/backend/storage/lmgr/deadlock.c
@@ -135,10 +135,9 @@ static PGPROC *blocking_autovacuum_proc = NULL;
* This does per-backend initialization of the deadlock checker; primarily,
* allocation of working memory for DeadLockCheck. We do this per-backend
* since there's no percentage in making the kernel do copy-on-write
- * inheritance of workspace from the postmaster. We want to allocate the
- * space at startup because (a) the deadlock checker might be invoked when
- * there's no free memory left, and (b) the checker is normally run inside a
- * signal handler, which is a very dangerous place to invoke palloc from.
+ * inheritance of workspace from the postmaster. We allocate the space at
+ * startup because the deadlock checker is run with all the partitions of the
+ * lock table locked, and we want to keep that section as short as possible.
*/
void
InitDeadLockChecking(void)
@@ -192,11 +191,13 @@ InitDeadLockChecking(void)
* last MaxBackends entries in possibleConstraints[] are reserved as
* output workspace for FindLockCycle.
*/
- StaticAssertStmt(MAX_BACKENDS_BITS <= (32 - 3),
- "MAX_BACKENDS_BITS too big for * 4");
- maxPossibleConstraints = MaxBackends * 4;
- possibleConstraints =
- (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
+ {
+ StaticAssertDecl(MAX_BACKENDS_BITS <= (32 - 3),
+ "MAX_BACKENDS_BITS too big for * 4");
+ maxPossibleConstraints = MaxBackends * 4;
+ possibleConstraints =
+ (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
+ }
MemoryContextSwitchTo(oldcxt);
}
@@ -213,8 +214,7 @@ InitDeadLockChecking(void)
*
* On failure, deadlock details are recorded in deadlockDetails[] for
* subsequent printing by DeadLockReport(). That activity is separate
- * because (a) we don't want to do it while holding all those LWLocks,
- * and (b) we are typically invoked inside a signal handler.
+ * because we don't want to do it while holding all those LWLocks.
*/
DeadLockState
DeadLockCheck(PGPROC *proc)
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 7f0cd784f79..e1168ad3837 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -4148,7 +4148,6 @@ GetSingleProcBlockerStatusData(PGPROC *blocked_proc, BlockedProcsData *data)
if (queued_proc == blocked_proc)
break;
data->waiter_pids[data->npids++] = queued_proc->pid;
- queued_proc = (PGPROC *) queued_proc->links.next;
}
bproc->num_locks = data->nlocks - bproc->first_lock;
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 063826ae576..fd8318bdf3d 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -66,29 +66,18 @@ bool log_lock_waits = true;
/* Pointer to this process's PGPROC struct, if any */
PGPROC *MyProc = NULL;
-/*
- * This spinlock protects the freelist of recycled PGPROC structures.
- * We cannot use an LWLock because the LWLock manager depends on already
- * having a PGPROC and a wait semaphore! But these structures are touched
- * relatively infrequently (only at backend startup or shutdown) and not for
- * very long, so a spinlock is okay.
- */
-NON_EXEC_STATIC slock_t *ProcStructLock = NULL;
-
/* Pointers to shared-memory structures */
PROC_HDR *ProcGlobal = NULL;
NON_EXEC_STATIC PGPROC *AuxiliaryProcs = NULL;
PGPROC *PreparedXactProcs = NULL;
-static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
-
/* Is a deadlock check pending? */
static volatile sig_atomic_t got_deadlock_timeout;
static void RemoveProcFromArray(int code, Datum arg);
static void ProcKill(int code, Datum arg);
static void AuxiliaryProcKill(int code, Datum arg);
-static void CheckDeadLock(void);
+static DeadLockState CheckDeadLock(void);
/*
@@ -216,6 +205,7 @@ InitProcGlobal(void)
* Initialize the data structures.
*/
ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
+ SpinLockInit(&ProcGlobal->freeProcsLock);
dlist_init(&ProcGlobal->freeProcs);
dlist_init(&ProcGlobal->autovacFreeProcs);
dlist_init(&ProcGlobal->bgworkerFreeProcs);
@@ -380,12 +370,6 @@ InitProcGlobal(void)
*/
AuxiliaryProcs = &procs[MaxBackends];
PreparedXactProcs = &procs[MaxBackends + NUM_AUXILIARY_PROCS];
-
- /* Create ProcStructLock spinlock, too */
- ProcStructLock = (slock_t *) ShmemInitStruct("ProcStructLock spinlock",
- sizeof(slock_t),
- &found);
- SpinLockInit(ProcStructLock);
}
/*
@@ -431,17 +415,17 @@ InitProcess(void)
* Try to get a proc struct from the appropriate free list. If this
* fails, we must be out of PGPROC structures (not to mention semaphores).
*
- * While we are holding the ProcStructLock, also copy the current shared
+ * While we are holding the spinlock, also copy the current shared
* estimate of spins_per_delay to local storage.
*/
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
set_spins_per_delay(ProcGlobal->spins_per_delay);
if (!dlist_is_empty(procgloballist))
{
MyProc = dlist_container(PGPROC, links, dlist_pop_head_node(procgloballist));
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
}
else
{
@@ -451,7 +435,7 @@ InitProcess(void)
* error message. XXX do we need to give a different failure message
* in the autovacuum case?
*/
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
if (AmWalSenderProcess())
ereport(FATAL,
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
@@ -486,7 +470,7 @@ InitProcess(void)
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
MyProc->tempNamespaceId = InvalidOid;
- MyProc->isRegularBackend = AmRegularBackendProcess();
+ MyProc->backendType = MyBackendType;
MyProc->delayChkptFlags = 0;
MyProc->statusFlags = 0;
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
@@ -506,10 +490,10 @@ InitProcess(void)
Assert(dlist_is_empty(&(MyProc->myProcLocks[i])));
}
#endif
- MyProc->recoveryConflictPending = false;
+ pg_atomic_write_u32(&MyProc->pendingRecoveryConflicts, 0);
/* Initialize fields for sync rep */
- MyProc->waitLSN = 0;
+ MyProc->waitLSN = InvalidXLogRecPtr;
MyProc->syncRepState = SYNC_REP_NOT_WAITING;
dlist_node_init(&MyProc->syncRepLinks);
@@ -636,13 +620,13 @@ InitAuxiliaryProcess(void)
RegisterPostmasterChildActive();
/*
- * We use the ProcStructLock to protect assignment and releasing of
+ * We use the freeProcsLock to protect assignment and releasing of
* AuxiliaryProcs entries.
*
- * While we are holding the ProcStructLock, also copy the current shared
+ * While we are holding the spinlock, also copy the current shared
* estimate of spins_per_delay to local storage.
*/
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
set_spins_per_delay(ProcGlobal->spins_per_delay);
@@ -657,7 +641,7 @@ InitAuxiliaryProcess(void)
}
if (proctype >= NUM_AUXILIARY_PROCS)
{
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
elog(FATAL, "all AuxiliaryProcs are in use");
}
@@ -665,7 +649,7 @@ InitAuxiliaryProcess(void)
/* use volatile pointer to prevent code rearrangement */
((volatile PGPROC *) auxproc)->pid = MyProcPid;
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
MyProc = auxproc;
MyProcNumber = GetNumberFromPGProc(MyProc);
@@ -685,7 +669,7 @@ InitAuxiliaryProcess(void)
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
MyProc->tempNamespaceId = InvalidOid;
- MyProc->isRegularBackend = false;
+ MyProc->backendType = MyBackendType;
MyProc->delayChkptFlags = 0;
MyProc->statusFlags = 0;
MyProc->lwWaiting = LW_WS_NOT_WAITING;
@@ -791,7 +775,7 @@ HaveNFreeProcs(int n, int *nfree)
Assert(n > 0);
Assert(nfree);
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
*nfree = 0;
dlist_foreach(iter, &ProcGlobal->freeProcs)
@@ -801,7 +785,7 @@ HaveNFreeProcs(int n, int *nfree)
break;
}
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
return (*nfree == n);
}
@@ -982,9 +966,9 @@ ProcKill(int code, Datum arg)
procgloballist = leader->procgloballist;
/* Leader exited first; return its PGPROC. */
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
dlist_push_head(procgloballist, &leader->links);
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
}
}
else if (leader != MyProc)
@@ -1015,7 +999,7 @@ ProcKill(int code, Datum arg)
proc->vxid.lxid = InvalidTransactionId;
procgloballist = proc->procgloballist;
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
/*
* If we're still a member of a locking group, that means we're a leader
@@ -1034,7 +1018,7 @@ ProcKill(int code, Datum arg)
/* Update shared estimate of spins_per_delay */
ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
}
/*
@@ -1074,7 +1058,7 @@ AuxiliaryProcKill(int code, Datum arg)
MyProcNumber = INVALID_PROC_NUMBER;
DisownLatch(&proc->procLatch);
- SpinLockAcquire(ProcStructLock);
+ SpinLockAcquire(&ProcGlobal->freeProcsLock);
/* Mark auxiliary proc no longer in use */
proc->pid = 0;
@@ -1084,7 +1068,7 @@ AuxiliaryProcKill(int code, Datum arg)
/* Update shared estimate of spins_per_delay */
ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
- SpinLockRelease(ProcStructLock);
+ SpinLockRelease(&ProcGlobal->freeProcsLock);
}
/*
@@ -1322,6 +1306,7 @@ ProcSleep(LOCALLOCK *locallock)
bool allow_autovacuum_cancel = true;
bool logged_recovery_conflict = false;
ProcWaitStatus myWaitStatus;
+ DeadLockState deadlock_state;
/* The caller must've armed the on-error cleanup mechanism */
Assert(GetAwaitedLock() == locallock);
@@ -1447,7 +1432,7 @@ ProcSleep(LOCALLOCK *locallock)
* because the startup process here has already waited
* longer than deadlock_timeout.
*/
- LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+ LogRecoveryConflict(RECOVERY_CONFLICT_LOCK,
standbyWaitStart, now,
cnt > 0 ? vxids : NULL, true);
logged_recovery_conflict = true;
@@ -1462,7 +1447,7 @@ ProcSleep(LOCALLOCK *locallock)
/* check for deadlocks first, as that's probably log-worthy */
if (got_deadlock_timeout)
{
- CheckDeadLock();
+ deadlock_state = CheckDeadLock();
got_deadlock_timeout = false;
}
CHECK_FOR_INTERRUPTS();
@@ -1688,7 +1673,7 @@ ProcSleep(LOCALLOCK *locallock)
* startup process waited longer than deadlock_timeout for it.
*/
if (InHotStandby && logged_recovery_conflict)
- LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+ LogRecoveryConflict(RECOVERY_CONFLICT_LOCK,
standbyWaitStart, GetCurrentTimestamp(),
NULL, false);
@@ -1785,14 +1770,14 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock)
*
* We only get to this routine, if DEADLOCK_TIMEOUT fired while waiting for a
* lock to be released by some other process. Check if there's a deadlock; if
- * not, just return. (But signal ProcSleep to log a message, if
- * log_lock_waits is true.) If we have a real deadlock, remove ourselves from
- * the lock's wait queue and signal an error to ProcSleep.
+ * not, just return. If we have a real deadlock, remove ourselves from the
+ * lock's wait queue.
*/
-static void
+static DeadLockState
CheckDeadLock(void)
{
int i;
+ DeadLockState result;
/*
* Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1819,17 +1804,20 @@ CheckDeadLock(void)
*/
if (MyProc->links.prev == NULL ||
MyProc->links.next == NULL)
+ {
+ result = DS_NO_DEADLOCK;
goto check_done;
+ }
#ifdef LOCK_DEBUG
if (Debug_deadlocks)
DumpAllLocks();
#endif
- /* Run the deadlock check, and set deadlock_state for use by ProcSleep */
- deadlock_state = DeadLockCheck(MyProc);
+ /* Run the deadlock check */
+ result = DeadLockCheck(MyProc);
- if (deadlock_state == DS_HARD_DEADLOCK)
+ if (result == DS_HARD_DEADLOCK)
{
/*
* Oops. We have a deadlock.
@@ -1841,7 +1829,7 @@ CheckDeadLock(void)
*
* RemoveFromWaitQueue sets MyProc->waitStatus to
* PROC_WAIT_STATUS_ERROR, so ProcSleep will report an error after we
- * return from the signal handler.
+ * return.
*/
Assert(MyProc->waitLock != NULL);
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
@@ -1868,6 +1856,8 @@ CheckDeadLock(void)
check_done:
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
LWLockRelease(LockHashPartitionLockByIndex(i));
+
+ return result;
}
/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index a2625871185..443434e4ea8 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -602,13 +602,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* that decision should be made though? For now just use a cutoff of
* 8, anything between 4 and 8 worked OK in some local testing.
*/
- if (numblocks > 8)
+ if (numblocks > 8 &&
+ file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
- int ret;
+ int ret = 0;
- ret = FileFallocate(v->mdfd_vfd,
- seekpos, (pgoff_t) BLCKSZ * numblocks,
- WAIT_EVENT_DATA_FILE_EXTEND);
+#ifdef HAVE_POSIX_FALLOCATE
+ if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
+ {
+ ret = FileFallocate(v->mdfd_vfd,
+ seekpos, (pgoff_t) BLCKSZ * numblocks,
+ WAIT_EVENT_DATA_FILE_EXTEND);
+ }
+ else
+#endif
+ {
+ elog(ERROR, "unsupported file_extend_method: %d",
+ file_extend_method);
+ }
if (ret != 0)
{
ereport(ERROR,
diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c
index 94a7b839563..c517115927c 100644
--- a/src/backend/tcop/backend_startup.c
+++ b/src/backend/tcop/backend_startup.c
@@ -846,10 +846,9 @@ ProcessStartupPacket(Port *port, bool ssl_done, bool gss_done)
if (strlen(port->user_name) >= NAMEDATALEN)
port->user_name[NAMEDATALEN - 1] = '\0';
+ Assert(MyBackendType == B_BACKEND || MyBackendType == B_DEAD_END_BACKEND);
if (am_walsender)
MyBackendType = B_WAL_SENDER;
- else
- MyBackendType = B_BACKEND;
/*
* Normal walsender backends, e.g. for streaming replication, are not
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index e54bf1e760f..d01a09dd0c4 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -67,6 +67,7 @@
#include "storage/proc.h"
#include "storage/procsignal.h"
#include "storage/sinval.h"
+#include "storage/standby.h"
#include "tcop/backend_startup.h"
#include "tcop/fastpath.h"
#include "tcop/pquery.h"
@@ -155,10 +156,6 @@ static const char *userDoption = NULL; /* -D switch */
static bool EchoQuery = false; /* -E switch */
static bool UseSemiNewlineNewline = false; /* -j switch */
-/* whether or not, and why, we were canceled by conflict with recovery */
-static volatile sig_atomic_t RecoveryConflictPending = false;
-static volatile sig_atomic_t RecoveryConflictPendingReasons[NUM_PROCSIGNALS];
-
/* reused buffer to pass to SendRowDescriptionMessage() */
static MemoryContext row_description_context = NULL;
static StringInfoData row_description_buf;
@@ -175,7 +172,6 @@ static void forbidden_in_wal_sender(char firstchar);
static bool check_log_statement(List *stmt_list);
static int errdetail_execute(List *raw_parsetree_list);
static int errdetail_params(ParamListInfo params);
-static int errdetail_abort(void);
static void bind_param_error_callback(void *arg);
static void start_xact_command(void);
static void finish_xact_command(void);
@@ -183,6 +179,9 @@ static bool IsTransactionExitStmt(Node *parsetree);
static bool IsTransactionExitStmtList(List *pstmts);
static bool IsTransactionStmtList(List *pstmts);
static void drop_unnamed_stmt(void);
+static void ProcessRecoveryConflictInterrupts(void);
+static void ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason);
+static void report_recovery_conflict(RecoveryConflictReason reason);
static void log_disconnections(int code, Datum arg);
static void enable_statement_timeout(void);
static void disable_statement_timeout(void);
@@ -1117,7 +1116,7 @@ exec_simple_query(const char *query_string)
/*
* Get the command name for use in status display (it also becomes the
- * default completion tag, down inside PortalRun). Set ps_status and
+ * default completion tag, in PortalDefineQuery). Set ps_status and
* do any special start-of-SQL-command processing needed by the
* destination.
*/
@@ -1141,8 +1140,7 @@ exec_simple_query(const char *query_string)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
/* Make sure we are in a transaction command */
start_xact_command();
@@ -1498,8 +1496,7 @@ exec_parse_message(const char *query_string, /* string to execute */
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
/*
* Create the CachedPlanSource before we do parse analysis, since it
@@ -1750,8 +1747,7 @@ exec_bind_message(StringInfo input_message)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
/*
* Create the portal. Allow silent replacement of an existing portal only
@@ -2255,8 +2251,7 @@ exec_execute_message(const char *portal_name, long max_rows)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
/* Check for cancel signal before we start execution */
CHECK_FOR_INTERRUPTS();
@@ -2536,54 +2531,40 @@ errdetail_params(ParamListInfo params)
return 0;
}
-/*
- * errdetail_abort
- *
- * Add an errdetail() line showing abort reason, if any.
- */
-static int
-errdetail_abort(void)
-{
- if (MyProc->recoveryConflictPending)
- errdetail("Abort reason: recovery conflict");
-
- return 0;
-}
-
/*
* errdetail_recovery_conflict
*
* Add an errdetail() line showing conflict source.
*/
static int
-errdetail_recovery_conflict(ProcSignalReason reason)
+errdetail_recovery_conflict(RecoveryConflictReason reason)
{
switch (reason)
{
- case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ case RECOVERY_CONFLICT_BUFFERPIN:
errdetail("User was holding shared buffer pin for too long.");
break;
- case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ case RECOVERY_CONFLICT_LOCK:
errdetail("User was holding a relation lock for too long.");
break;
- case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ case RECOVERY_CONFLICT_TABLESPACE:
errdetail("User was or might have been using tablespace that must be dropped.");
break;
- case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ case RECOVERY_CONFLICT_SNAPSHOT:
errdetail("User query might have needed to see row versions that must be removed.");
break;
- case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
+ case RECOVERY_CONFLICT_LOGICALSLOT:
errdetail("User was using a logical replication slot that must be invalidated.");
break;
- case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ errdetail("User transaction caused deadlock with recovery.");
+ break;
+ case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
errdetail("User transaction caused buffer deadlock with recovery.");
break;
- case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ case RECOVERY_CONFLICT_DATABASE:
errdetail("User was connected to a database that must be dropped.");
break;
- default:
- break;
- /* no errdetail */
}
return 0;
@@ -2692,8 +2673,7 @@ exec_describe_statement_message(const char *stmt_name)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
@@ -2769,8 +2749,7 @@ exec_describe_portal_message(const char *portal_name)
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
- "commands ignored until end of transaction block"),
- errdetail_abort()));
+ "commands ignored until end of transaction block")));
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
@@ -3088,15 +3067,14 @@ FloatExceptionHandler(SIGNAL_ARGS)
}
/*
- * Tell the next CHECK_FOR_INTERRUPTS() to check for a particular type of
- * recovery conflict. Runs in a SIGUSR1 handler.
+ * Tell the next CHECK_FOR_INTERRUPTS() to process recovery conflicts. Runs
+ * in a SIGUSR1 handler.
*/
void
-HandleRecoveryConflictInterrupt(ProcSignalReason reason)
+HandleRecoveryConflictInterrupt(void)
{
- RecoveryConflictPendingReasons[reason] = true;
- RecoveryConflictPending = true;
- InterruptPending = true;
+ if (pg_atomic_read_u32(&MyProc->pendingRecoveryConflicts) != 0)
+ InterruptPending = true;
/* latch will be set by procsignal_sigusr1_handler */
}
@@ -3104,49 +3082,73 @@ HandleRecoveryConflictInterrupt(ProcSignalReason reason)
* Check one individual conflict reason.
*/
static void
-ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
+ProcessRecoveryConflictInterrupt(RecoveryConflictReason reason)
{
switch (reason)
{
- case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
/*
+ * The startup process is waiting on a lock held by us, and has
+ * requested us to check if it is a deadlock (i.e. the deadlock
+ * timeout expired).
+ *
* If we aren't waiting for a lock we can never deadlock.
*/
if (GetAwaitedLock() == NULL)
return;
- /* Intentional fall through to check wait for pin */
- /* FALLTHROUGH */
+ /* Set the flag so that ProcSleep() will check for deadlocks. */
+ CheckDeadLockAlert();
+ return;
- case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
/*
- * If PROCSIG_RECOVERY_CONFLICT_BUFFERPIN is requested but we
- * aren't blocking the Startup process there is nothing more to
- * do.
+ * The startup process is waiting on a buffer pin, and has
+ * requested us to check if there is a deadlock involving the pin.
*
- * When PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK is requested,
- * if we're waiting for locks and the startup process is not
- * waiting for buffer pin (i.e., also waiting for locks), we set
- * the flag so that ProcSleep() will check for deadlocks.
+ * If we're not waiting on a lock, there can be no deadlock.
+ */
+ if (GetAwaitedLock() == NULL)
+ return;
+
+ /*
+ * If we're not holding the buffer pin, also no deadlock. (The
+ * startup process doesn't know who's holding the pin, and sends
+ * this signal to *all* backends, so this is the common case.)
*/
if (!HoldingBufferPinThatDelaysRecovery())
- {
- if (reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK &&
- GetStartupBufferPinWaitBufId() < 0)
- CheckDeadLockAlert();
return;
- }
- MyProc->recoveryConflictPending = true;
+ /*
+ * Otherwise, we probably have a deadlock. Unfortunately the
+ * normal deadlock detector doesn't know about buffer pins, so we
+ * cannot perform comprehensively deadlock check. Instead, we
+ * just assume that it is a deadlock if the above two conditions
+ * are met. In principle this can lead to false positives, but
+ * it's rare in practice because sessions in a hot standby server
+ * rarely hold locks that can block other backends.
+ */
+ report_recovery_conflict(reason);
+ return;
+
+ case RECOVERY_CONFLICT_BUFFERPIN:
- /* Intentional fall through to error handling */
- /* FALLTHROUGH */
+ /*
+ * Someone is holding a buffer pin that the startup process is
+ * waiting for, and it got tired of waiting. If that's us, error
+ * out to release the pin.
+ */
+ if (!HoldingBufferPinThatDelaysRecovery())
+ return;
- case PROCSIG_RECOVERY_CONFLICT_LOCK:
- case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
- case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ report_recovery_conflict(reason);
+ return;
+
+ case RECOVERY_CONFLICT_LOCK:
+ case RECOVERY_CONFLICT_TABLESPACE:
+ case RECOVERY_CONFLICT_SNAPSHOT:
/*
* If we aren't in a transaction any longer then ignore.
@@ -3154,108 +3156,128 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
if (!IsTransactionOrTransactionBlock())
return;
- /* FALLTHROUGH */
+ report_recovery_conflict(reason);
+ return;
- case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
+ case RECOVERY_CONFLICT_LOGICALSLOT:
+ report_recovery_conflict(reason);
+ return;
- /*
- * If we're not in a subtransaction then we are OK to throw an
- * ERROR to resolve the conflict. Otherwise drop through to the
- * FATAL case.
- *
- * PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT is a special case that
- * always throws an ERROR (ie never promotes to FATAL), though it
- * still has to respect QueryCancelHoldoffCount, so it shares this
- * code path. Logical decoding slots are only acquired while
- * performing logical decoding. During logical decoding no user
- * controlled code is run. During [sub]transaction abort, the
- * slot is released. Therefore user controlled code cannot
- * intercept an error before the replication slot is released.
- *
- * XXX other times that we can throw just an ERROR *may* be
- * PROCSIG_RECOVERY_CONFLICT_LOCK if no locks are held in parent
- * transactions
- *
- * PROCSIG_RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by
- * parent transactions and the transaction is not
- * transaction-snapshot mode
- *
- * PROCSIG_RECOVERY_CONFLICT_TABLESPACE if no temp files or
- * cursors open in parent transactions
- */
- if (reason == PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT ||
- !IsSubTransaction())
- {
- /*
- * If we already aborted then we no longer need to cancel. We
- * do this here since we do not wish to ignore aborted
- * subtransactions, which must cause FATAL, currently.
- */
- if (IsAbortedTransactionBlockState())
- return;
+ case RECOVERY_CONFLICT_DATABASE:
- /*
- * If a recovery conflict happens while we are waiting for
- * input from the client, the client is presumably just
- * sitting idle in a transaction, preventing recovery from
- * making progress. We'll drop through to the FATAL case
- * below to dislodge it, in that case.
- */
- if (!DoingCommandRead)
- {
- /* Avoid losing sync in the FE/BE protocol. */
- if (QueryCancelHoldoffCount != 0)
- {
- /*
- * Re-arm and defer this interrupt until later. See
- * similar code in ProcessInterrupts().
- */
- RecoveryConflictPendingReasons[reason] = true;
- RecoveryConflictPending = true;
- InterruptPending = true;
- return;
- }
+ /* The database is being dropped; terminate the session */
+ report_recovery_conflict(reason);
+ return;
+ }
+ elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+}
- /*
- * We are cleared to throw an ERROR. Either it's the
- * logical slot case, or we have a top-level transaction
- * that we can abort and a conflict that isn't inherently
- * non-retryable.
- */
- LockErrorCleanup();
- pgstat_report_recovery_conflict(reason);
- ereport(ERROR,
- (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("canceling statement due to conflict with recovery"),
- errdetail_recovery_conflict(reason)));
- break;
- }
- }
+/*
+ * This transaction or session is conflicting with recovery and needs to be
+ * killed. Roll back the transaction, if that's sufficient, or terminate the
+ * connection, or do nothing if we're already in an aborted state.
+ */
+static void
+report_recovery_conflict(RecoveryConflictReason reason)
+{
+ bool fatal;
- /* Intentional fall through to session cancel */
- /* FALLTHROUGH */
+ if (reason == RECOVERY_CONFLICT_DATABASE)
+ {
+ /* note: no hint about reconnecting, and different errcode */
+ pgstat_report_recovery_conflict(reason);
+ ereport(FATAL,
+ (errcode(ERRCODE_DATABASE_DROPPED),
+ errmsg("terminating connection due to conflict with recovery"),
+ errdetail_recovery_conflict(reason)));
+ }
+ if (reason == RECOVERY_CONFLICT_LOGICALSLOT)
+ {
+ /*
+ * RECOVERY_CONFLICT_LOGICALSLOT is a special case that always throws
+ * an ERROR (ie never promotes to FATAL), though it still has to
+ * respect QueryCancelHoldoffCount, so it shares this code path.
+ * Logical decoding slots are only acquired while performing logical
+ * decoding. During logical decoding no user controlled code is run.
+ * During [sub]transaction abort, the slot is released. Therefore
+ * user controlled code cannot intercept an error before the
+ * replication slot is released.
+ */
+ fatal = false;
+ }
+ else
+ {
+ fatal = IsSubTransaction();
+ }
- case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ /*
+ * If we're not in a subtransaction then we are OK to throw an ERROR to
+ * resolve the conflict.
+ *
+ * XXX other times that we can throw just an ERROR *may* be
+ * RECOVERY_CONFLICT_LOCK if no locks are held in parent transactions
+ *
+ * RECOVERY_CONFLICT_SNAPSHOT if no snapshots are held by parent
+ * transactions and the transaction is not transaction-snapshot mode
+ *
+ * RECOVERY_CONFLICT_TABLESPACE if no temp files or cursors open in parent
+ * transactions
+ */
+ if (!fatal)
+ {
+ /*
+ * If we already aborted then we no longer need to cancel. We do this
+ * here since we do not wish to ignore aborted subtransactions, which
+ * must cause FATAL, currently.
+ */
+ if (IsAbortedTransactionBlockState())
+ return;
+
+ /*
+ * If a recovery conflict happens while we are waiting for input from
+ * the client, the client is presumably just sitting idle in a
+ * transaction, preventing recovery from making progress. We'll drop
+ * through to the FATAL case below to dislodge it, in that case.
+ */
+ if (!DoingCommandRead)
+ {
+ /* Avoid losing sync in the FE/BE protocol. */
+ if (QueryCancelHoldoffCount != 0)
+ {
+ /*
+ * Re-arm and defer this interrupt until later. See similar
+ * code in ProcessInterrupts().
+ */
+ (void) pg_atomic_fetch_or_u32(&MyProc->pendingRecoveryConflicts, (1 << reason));
+ InterruptPending = true;
+ return;
+ }
/*
- * Retrying is not possible because the database is dropped, or we
- * decided above that we couldn't resolve the conflict with an
- * ERROR and fell through. Terminate the session.
+ * We are cleared to throw an ERROR. Either it's the logical slot
+ * case, or we have a top-level transaction that we can abort and
+ * a conflict that isn't inherently non-retryable.
*/
+ LockErrorCleanup();
pgstat_report_recovery_conflict(reason);
- ereport(FATAL,
- (errcode(reason == PROCSIG_RECOVERY_CONFLICT_DATABASE ?
- ERRCODE_DATABASE_DROPPED :
- ERRCODE_T_R_SERIALIZATION_FAILURE),
- errmsg("terminating connection due to conflict with recovery"),
- errdetail_recovery_conflict(reason),
- errhint("In a moment you should be able to reconnect to the"
- " database and repeat your command.")));
- break;
-
- default:
- elog(FATAL, "unrecognized conflict mode: %d", (int) reason);
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("canceling statement due to conflict with recovery"),
+ errdetail_recovery_conflict(reason)));
+ }
}
+
+ /*
+ * We couldn't resolve the conflict with ERROR, so terminate the whole
+ * session.
+ */
+ pgstat_report_recovery_conflict(reason);
+ ereport(FATAL,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("terminating connection due to conflict with recovery"),
+ errdetail_recovery_conflict(reason),
+ errhint("In a moment you should be able to reconnect to the"
+ " database and repeat your command.")));
}
/*
@@ -3264,6 +3286,8 @@ ProcessRecoveryConflictInterrupt(ProcSignalReason reason)
static void
ProcessRecoveryConflictInterrupts(void)
{
+ uint32 pending;
+
/*
* We don't need to worry about joggling the elbow of proc_exit, because
* proc_exit_prepare() holds interrupts, so ProcessInterrupts() won't call
@@ -3271,17 +3295,27 @@ ProcessRecoveryConflictInterrupts(void)
*/
Assert(!proc_exit_inprogress);
Assert(InterruptHoldoffCount == 0);
- Assert(RecoveryConflictPending);
- RecoveryConflictPending = false;
+ /* Are any recovery conflict pending? */
+ pending = pg_atomic_read_membarrier_u32(&MyProc->pendingRecoveryConflicts);
+ if (pending == 0)
+ return;
- for (ProcSignalReason reason = PROCSIG_RECOVERY_CONFLICT_FIRST;
- reason <= PROCSIG_RECOVERY_CONFLICT_LAST;
+ /*
+ * Check the conflicts one by one, clearing each flag only before
+ * processing the particular conflict. This ensures that if multiple
+ * conflicts are pending, we come back here to process the remaining
+ * conflicts, if an error is thrown during processing one of them.
+ */
+ for (RecoveryConflictReason reason = 0;
+ reason < NUM_RECOVERY_CONFLICT_REASONS;
reason++)
{
- if (RecoveryConflictPendingReasons[reason])
+ if ((pending & (1 << reason)) != 0)
{
- RecoveryConflictPendingReasons[reason] = false;
+ /* clear the flag */
+ (void) pg_atomic_fetch_and_u32(&MyProc->pendingRecoveryConflicts, ~(1 << reason));
+
ProcessRecoveryConflictInterrupt(reason);
}
}
@@ -3472,7 +3506,7 @@ ProcessInterrupts(void)
}
}
- if (RecoveryConflictPending)
+ if (pg_atomic_read_u32(&MyProc->pendingRecoveryConflicts) != 0)
ProcessRecoveryConflictInterrupts();
if (IdleInTransactionSessionTimeoutPending)
@@ -3866,7 +3900,7 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("--%s must be first argument", optarg)));
- /* FALLTHROUGH */
+ pg_fallthrough;
case 'c':
{
char *name,
@@ -4990,7 +5024,7 @@ PostgresMain(const char *dbname, const char *username)
/* for the cumulative statistics system */
pgStatSessionEndCause = DISCONNECT_CLIENT_EOF;
- /* FALLTHROUGH */
+ pg_fallthrough;
case PqMsg_Terminate:
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 34dd6e18df5..bf707f2d57f 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -2001,7 +2001,7 @@ ExecDropStmt(DropStmt *stmt, bool isTopLevel)
if (stmt->concurrent)
PreventInTransactionBlock(isTopLevel,
"DROP INDEX CONCURRENTLY");
- /* fall through */
+ pg_fallthrough;
case OBJECT_TABLE:
case OBJECT_SEQUENCE:
diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c
index 6dee28ae525..3937f25bcc6 100644
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@@ -50,7 +50,7 @@ findwrd(char *in, char **end, uint16 *flags)
/* Skip leading spaces */
while (*in && isspace((unsigned char) *in))
- in += pg_mblen(in);
+ in += pg_mblen_cstr(in);
/* Return NULL on empty lines */
if (*in == '\0')
@@ -65,7 +65,7 @@ findwrd(char *in, char **end, uint16 *flags)
while (*in && !isspace((unsigned char) *in))
{
lastchar = in;
- in += pg_mblen(in);
+ in += pg_mblen_cstr(in);
}
if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c
index 7253f64e5f7..0fd4cf3dfa8 100644
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -191,7 +191,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
/* is it a comment? */
while (*ptr && isspace((unsigned char) *ptr))
- ptr += pg_mblen(ptr);
+ ptr += pg_mblen_cstr(ptr);
if (t_iseq(ptr, '#') || *ptr == '\0' ||
t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
@@ -237,13 +237,13 @@ thesaurusRead(const char *filename, DictThesaurus *d)
{
useasis = true;
state = TR_INSUBS;
- beginwrd = ptr + pg_mblen(ptr);
+ beginwrd = ptr + pg_mblen_cstr(ptr);
}
else if (t_iseq(ptr, '\\'))
{
useasis = false;
state = TR_INSUBS;
- beginwrd = ptr + pg_mblen(ptr);
+ beginwrd = ptr + pg_mblen_cstr(ptr);
}
else if (!isspace((unsigned char) *ptr))
{
@@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
else
elog(ERROR, "unrecognized thesaurus state: %d", state);
- ptr += pg_mblen(ptr);
+ ptr += pg_mblen_cstr(ptr);
}
if (state == TR_INSUBS)
diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c
index 1c7d5c361f1..51ba78fabbc 100644
--- a/src/backend/tsearch/regis.c
+++ b/src/backend/tsearch/regis.c
@@ -37,7 +37,7 @@ RS_isRegis(const char *str)
{
if (state == RS_IN_WAIT)
{
- if (t_isalpha(c))
+ if (t_isalpha_cstr(c))
/* okay */ ;
else if (t_iseq(c, '['))
state = RS_IN_ONEOF;
@@ -48,14 +48,14 @@ RS_isRegis(const char *str)
{
if (t_iseq(c, '^'))
state = RS_IN_NONEOF;
- else if (t_isalpha(c))
+ else if (t_isalpha_cstr(c))
state = RS_IN_ONEOF_IN;
else
return false;
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
- if (t_isalpha(c))
+ if (t_isalpha_cstr(c))
/* okay */ ;
else if (t_iseq(c, ']'))
state = RS_IN_WAIT;
@@ -64,7 +64,7 @@ RS_isRegis(const char *str)
}
else
elog(ERROR, "internal error in RS_isRegis: state %d", state);
- c += pg_mblen(c);
+ c += pg_mblen_cstr(c);
}
return (state == RS_IN_WAIT);
@@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str)
{
if (state == RS_IN_WAIT)
{
- if (t_isalpha(c))
+ if (t_isalpha_cstr(c))
{
if (ptr)
ptr = newRegisNode(ptr, len);
else
ptr = r->node = newRegisNode(NULL, len);
- COPYCHAR(ptr->data, c);
ptr->type = RSF_ONEOF;
- ptr->len = pg_mblen(c);
+ ptr->len = ts_copychar_cstr(ptr->data, c);
}
else if (t_iseq(c, '['))
{
@@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str)
ptr->type = RSF_NONEOF;
state = RS_IN_NONEOF;
}
- else if (t_isalpha(c))
+ else if (t_isalpha_cstr(c))
{
- COPYCHAR(ptr->data, c);
- ptr->len = pg_mblen(c);
+ ptr->len = ts_copychar_cstr(ptr->data, c);
state = RS_IN_ONEOF_IN;
}
else /* shouldn't get here */
@@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str)
}
else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
{
- if (t_isalpha(c))
- {
- COPYCHAR(ptr->data + ptr->len, c);
- ptr->len += pg_mblen(c);
- }
+ if (t_isalpha_cstr(c))
+ ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c);
else if (t_iseq(c, ']'))
state = RS_IN_WAIT;
else /* shouldn't get here */
@@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str)
}
else
elog(ERROR, "internal error in RS_compile: state %d", state);
- c += pg_mblen(c);
+ c += pg_mblen_cstr(c);
}
if (state != RS_IN_WAIT) /* shouldn't get here */
@@ -187,10 +182,10 @@ mb_strchr(char *str, char *c)
char *ptr = str;
bool res = false;
- clen = pg_mblen(c);
+ clen = pg_mblen_cstr(c);
while (*ptr && !res)
{
- plen = pg_mblen(ptr);
+ plen = pg_mblen_cstr(ptr);
if (plen == clen)
{
i = plen;
@@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str)
while (*c)
{
len++;
- c += pg_mblen(c);
+ c += pg_mblen_cstr(c);
}
if (len < r->nchar)
@@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str)
{
len -= r->nchar;
while (len-- > 0)
- c += pg_mblen(c);
+ c += pg_mblen_cstr(c);
}
@@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str)
elog(ERROR, "unrecognized regis node type: %d", ptr->type);
}
ptr = ptr->next;
- c += pg_mblen(c);
+ c += pg_mblen_cstr(c);
}
return true;
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index ad0ceec37b0..a1bfd2a9f9b 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -233,7 +233,7 @@ findchar(char *str, int c)
{
if (t_iseq(str, c))
return str;
- str += pg_mblen(str);
+ str += pg_mblen_cstr(str);
}
return NULL;
@@ -246,7 +246,7 @@ findchar2(char *str, int c1, int c2)
{
if (t_iseq(str, c1) || t_iseq(str, c2))
return str;
- str += pg_mblen(str);
+ str += pg_mblen_cstr(str);
}
return NULL;
@@ -353,6 +353,7 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
char *next;
const char *sbuf = *sflagset;
int maxstep;
+ int clen;
bool stop = false;
bool met_comma = false;
@@ -364,11 +365,11 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
{
case FM_LONG:
case FM_CHAR:
- COPYCHAR(sflag, *sflagset);
- sflag += pg_mblen(*sflagset);
+ clen = ts_copychar_cstr(sflag, *sflagset);
+ sflag += clen;
/* Go to start of the next flag */
- *sflagset += pg_mblen(*sflagset);
+ *sflagset += clen;
/* Check if we get all characters of flag */
maxstep--;
@@ -418,7 +419,7 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
*sflagset)));
}
- *sflagset += pg_mblen(*sflagset);
+ *sflagset += pg_mblen_cstr(*sflagset);
}
stop = true;
break;
@@ -544,7 +545,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
while (*s)
{
/* we allow only single encoded flags for faster works */
- if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
+ if (pg_mblen_cstr(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
s++;
else
{
@@ -565,7 +566,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
*s = '\0';
break;
}
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
}
pstr = lowerstr_ctx(Conf, line);
@@ -797,17 +798,17 @@ get_nextfield(char **str, char *next)
while (**str)
{
+ int clen = pg_mblen_cstr(*str);
+
if (state == PAE_WAIT_MASK)
{
if (t_iseq(*str, '#'))
return false;
else if (!isspace((unsigned char) **str))
{
- int clen = pg_mblen(*str);
-
if (clen < avail)
{
- COPYCHAR(next, *str);
+ ts_copychar_with_len(next, *str, clen);
next += clen;
avail -= clen;
}
@@ -823,17 +824,15 @@ get_nextfield(char **str, char *next)
}
else
{
- int clen = pg_mblen(*str);
-
if (clen < avail)
{
- COPYCHAR(next, *str);
+ ts_copychar_with_len(next, *str, clen);
next += clen;
avail -= clen;
}
}
}
- *str += pg_mblen(*str);
+ *str += clen;
}
*next = '\0';
@@ -923,14 +922,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
while (*str)
{
+ int clen = pg_mblen_cstr(str);
+
if (state == PAE_WAIT_MASK)
{
if (t_iseq(str, '#'))
return false;
else if (!isspace((unsigned char) *str))
{
- COPYCHAR(pmask, str);
- pmask += pg_mblen(str);
+ pmask += ts_copychar_with_len(pmask, str, clen);
state = PAE_INMASK;
}
}
@@ -943,8 +943,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
}
else if (!isspace((unsigned char) *str))
{
- COPYCHAR(pmask, str);
- pmask += pg_mblen(str);
+ pmask += ts_copychar_with_len(pmask, str, clen);
}
}
else if (state == PAE_WAIT_FIND)
@@ -953,10 +952,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
{
state = PAE_INFIND;
}
- else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
+ else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ )
{
- COPYCHAR(prepl, str);
- prepl += pg_mblen(str);
+ prepl += ts_copychar_with_len(prepl, str, clen);
state = PAE_INREPL;
}
else if (!isspace((unsigned char) *str))
@@ -971,10 +969,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*pfind = '\0';
state = PAE_WAIT_REPL;
}
- else if (t_isalpha(str))
+ else if (t_isalpha_cstr(str))
{
- COPYCHAR(pfind, str);
- pfind += pg_mblen(str);
+ pfind += ts_copychar_with_len(pfind, str, clen);
}
else if (!isspace((unsigned char) *str))
ereport(ERROR,
@@ -987,10 +984,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
{
break; /* void repl */
}
- else if (t_isalpha(str))
+ else if (t_isalpha_cstr(str))
{
- COPYCHAR(prepl, str);
- prepl += pg_mblen(str);
+ prepl += ts_copychar_with_len(prepl, str, clen);
state = PAE_INREPL;
}
else if (!isspace((unsigned char) *str))
@@ -1005,10 +1001,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*prepl = '\0';
break;
}
- else if (t_isalpha(str))
+ else if (t_isalpha_cstr(str))
{
- COPYCHAR(prepl, str);
- prepl += pg_mblen(str);
+ prepl += ts_copychar_with_len(prepl, str, clen);
}
else if (!isspace((unsigned char) *str))
ereport(ERROR,
@@ -1018,7 +1013,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
else
elog(ERROR, "unrecognized state in parse_affentry: %d", state);
- str += pg_mblen(str);
+ str += clen;
}
*pmask = *pfind = *prepl = '\0';
@@ -1071,10 +1066,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
CompoundAffixFlag *newValue;
char sbuf[BUFSIZ];
char *sflag;
- int clen;
while (*s && isspace((unsigned char) *s))
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
if (!*s)
ereport(ERROR,
@@ -1085,8 +1079,8 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
sflag = sbuf;
while (*s && !isspace((unsigned char) *s) && *s != '\n')
{
- clen = pg_mblen(s);
- COPYCHAR(sflag, s);
+ int clen = ts_copychar_cstr(sflag, s);
+
sflag += clen;
s += clen;
}
@@ -1267,7 +1261,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
char *s = recoded + strlen("FLAG");
while (*s && isspace((unsigned char) *s))
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
if (*s)
{
@@ -1466,11 +1460,11 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
if (s)
{
while (*s && !isspace((unsigned char) *s))
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
while (*s && isspace((unsigned char) *s))
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
- if (*s && pg_mblen(s) == 1)
+ if (*s && pg_mblen_cstr(s) == 1)
{
addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
Conf->usecompound = true;
@@ -1499,7 +1493,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
flagflags = 0;
while (*s && isspace((unsigned char) *s))
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
if (*s == '*')
{
@@ -1520,12 +1514,11 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
* be followed by EOL, whitespace, or ':'. Otherwise this is a
* new-format flag command.
*/
- if (*s && pg_mblen(s) == 1)
+ if (*s && pg_mblen_cstr(s) == 1)
{
- COPYCHAR(flag, s);
+ flag[0] = *s++;
flag[1] = '\0';
- s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
isspace((unsigned char) *s))
{
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 1e98f321957..df02ffb12fd 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -23,32 +23,40 @@ static void tsearch_readline_callback(void *arg);
/* space for a single character plus a trailing NUL */
#define WC_BUF_LEN 2
-int
-t_isalpha(const char *ptr)
-{
- pg_wchar wstr[WC_BUF_LEN];
- int wlen pg_attribute_unused();
-
- wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
- Assert(wlen <= 1);
-
- /* pass single character, or NUL if empty */
- return pg_iswalpha(wstr[0], pg_database_locale());
-}
-
-int
-t_isalnum(const char *ptr)
-{
- pg_wchar wstr[WC_BUF_LEN];
- int wlen pg_attribute_unused();
-
- wlen = pg_mb2wchar_with_len(ptr, wstr, pg_mblen(ptr));
- Assert(wlen <= 1);
-
- /* pass single character, or NUL if empty */
- return pg_iswalnum(wstr[0], pg_database_locale());
+#define GENERATE_T_ISCLASS_DEF(character_class) \
+/* mblen shall be that of the first character */ \
+int \
+t_is##character_class##_with_len(const char *ptr, int mblen) \
+{ \
+ pg_wchar wstr[WC_BUF_LEN]; \
+ int wlen pg_attribute_unused(); \
+ wlen = pg_mb2wchar_with_len(ptr, wstr, mblen); \
+ Assert(wlen <= 1); \
+ /* pass single character, or NUL if empty */ \
+ return pg_isw##character_class(wstr[0], pg_database_locale()); \
+} \
+\
+/* ptr shall point to a NUL-terminated string */ \
+int \
+t_is##character_class##_cstr(const char *ptr) \
+{ \
+ return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \
+} \
+/* ptr shall point to a string with pre-validated encoding */ \
+int \
+t_is##character_class##_unbounded(const char *ptr) \
+{ \
+ return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \
+} \
+/* historical name for _unbounded */ \
+int \
+t_is##character_class(const char *ptr) \
+{ \
+ return t_is##character_class##_unbounded(ptr); \
}
+GENERATE_T_ISCLASS_DEF(alnum)
+GENERATE_T_ISCLASS_DEF(alpha)
/*
* Set up to read a file using tsearch_readline(). This facility is
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
index 5afa6e4bad8..64b60bb9513 100644
--- a/src/backend/tsearch/ts_selfuncs.c
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -108,12 +108,14 @@ tsmatchsel(PG_FUNCTION_ARGS)
* OK, there's a Var and a Const we're dealing with here. We need the
* Const to be a TSQuery, else we can't do anything useful. We have to
* check this because the Var might be the TSQuery not the TSVector.
+ *
+ * Also check that the Var really is a TSVector, in case this estimator is
+ * mistakenly attached to some other operator.
*/
- if (((Const *) other)->consttype == TSQUERYOID)
+ if (((Const *) other)->consttype == TSQUERYOID &&
+ vardata.vartype == TSVECTOROID)
{
/* tsvector @@ tsquery or the other way around */
- Assert(vardata.vartype == TSVECTOROID);
-
selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
}
else
diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c
index 0c513d694e7..48ee050e37f 100644
--- a/src/backend/tsearch/ts_typanalyze.c
+++ b/src/backend/tsearch/ts_typanalyze.c
@@ -444,7 +444,7 @@ compute_tsvector_stats(VacAttrStats *stats,
stats->statypid[0] = TEXTOID;
stats->statyplen[0] = -1; /* typlen, -1 for varlena */
stats->statypbyval[0] = false;
- stats->statypalign[0] = 'i';
+ stats->statypalign[0] = TYPALIGN_INT;
}
}
else
diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c
index 9072d22423f..52cf65533e4 100644
--- a/src/backend/tsearch/ts_utils.c
+++ b/src/backend/tsearch/ts_utils.c
@@ -90,7 +90,7 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size
/* Trim trailing space */
while (*pbuf && !isspace((unsigned char) *pbuf))
- pbuf += pg_mblen(pbuf);
+ pbuf += pg_mblen_cstr(pbuf);
*pbuf = '\0';
/* Skip empty lines */
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index bfe8aa7fbce..8b9b34e762a 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -1683,7 +1683,8 @@ TParserGet(TParser *prs)
prs->state->charlen = 0;
else
prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen :
- pg_mblen(prs->str + prs->state->posbyte);
+ pg_mblen_range(prs->str + prs->state->posbyte,
+ prs->str + prs->lenstr);
Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr);
Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null);
diff --git a/src/backend/utils/.gitignore b/src/backend/utils/.gitignore
index 303c01d0515..fa9cfb39693 100644
--- a/src/backend/utils/.gitignore
+++ b/src/backend/utils/.gitignore
@@ -5,3 +5,6 @@
/guc_tables.inc.c
/probes.h
/errcodes.h
+/pgstat_wait_event.c
+/wait_event_funcs_data.c
+/wait_event_types.h
diff --git a/src/backend/utils/Makefile b/src/backend/utils/Makefile
index 6df31504f32..81b4a956bda 100644
--- a/src/backend/utils/Makefile
+++ b/src/backend/utils/Makefile
@@ -43,7 +43,7 @@ generated-header-symlinks: $(top_builddir)/src/include/utils/header-stamp submak
submake-adt-headers:
$(MAKE) -C adt jsonpath_gram.h
-$(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h guc_tables.inc.c
+$(SUBDIRS:%=%-recursive): fmgr-stamp errcodes.h guc_tables.inc.c pgstat_wait_event.c wait_event_funcs_data.c wait_event_types.h
# fmgr-stamp records the last time we ran Gen_fmgrtab.pl. We don't rely on
# the timestamps of the individual output files, because the Perl script
@@ -58,6 +58,12 @@ errcodes.h: $(top_srcdir)/src/backend/utils/errcodes.txt generate-errcodes.pl
guc_tables.inc.c: $(top_srcdir)/src/backend/utils/misc/guc_parameters.dat $(top_srcdir)/src/backend/utils/misc/gen_guc_tables.pl
$(PERL) $(top_srcdir)/src/backend/utils/misc/gen_guc_tables.pl $< $@
+pgstat_wait_event.c: wait_event_types.h
+wait_event_funcs_data.c: wait_event_types.h
+
+wait_event_types.h: $(top_srcdir)/src/backend/utils/activity/wait_event_names.txt $(top_srcdir)/src/backend/utils/activity/generate-wait_event_types.pl
+ $(PERL) $(top_srcdir)/src/backend/utils/activity/generate-wait_event_types.pl --code $<
+
ifeq ($(enable_dtrace), yes)
probes.h: postprocess_dtrace.sed probes.h.tmp
sed -f $^ >$@
@@ -73,8 +79,8 @@ endif
# These generated headers must be symlinked into src/include/.
# We use header-stamp to record that we've done this because the symlinks
# themselves may appear older than fmgr-stamp.
-$(top_builddir)/src/include/utils/header-stamp: fmgr-stamp errcodes.h probes.h guc_tables.inc.c
- cd '$(dir $@)' && for file in fmgroids.h fmgrprotos.h errcodes.h probes.h guc_tables.inc.c; do \
+$(top_builddir)/src/include/utils/header-stamp: fmgr-stamp errcodes.h probes.h guc_tables.inc.c pgstat_wait_event.c wait_event_funcs_data.c wait_event_types.h
+ cd '$(dir $@)' && for file in fmgroids.h fmgrprotos.h errcodes.h probes.h guc_tables.inc.c pgstat_wait_event.c wait_event_funcs_data.c wait_event_types.h; do \
rm -f $$file && $(LN_S) "../../../$(subdir)/$$file" . ; \
done
touch $@
@@ -93,3 +99,4 @@ uninstall-data:
clean:
rm -f probes.h probes.h.tmp
rm -f fmgroids.h fmgrprotos.h fmgrtab.c fmgr-stamp errcodes.h guc_tables.inc.c
+ rm -f wait_event_types.h pgstat_wait_event.c wait_event_funcs_data.c
diff --git a/src/backend/utils/activity/.gitignore b/src/backend/utils/activity/.gitignore
deleted file mode 100644
index bd0c0c77729..00000000000
--- a/src/backend/utils/activity/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/pgstat_wait_event.c
-/wait_event_types.h
-/wait_event_funcs_data.c
diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile
index 0eb29ee78aa..c37bfb350bb 100644
--- a/src/backend/utils/activity/Makefile
+++ b/src/backend/utils/activity/Makefile
@@ -36,17 +36,8 @@ OBJS = \
wait_event.o \
wait_event_funcs.o
-include $(top_srcdir)/src/backend/common.mk
-
-wait_event_funcs.o: wait_event_funcs_data.c
-wait_event_funcs_data.c: wait_event_types.h
-
-wait_event.o: pgstat_wait_event.c
-pgstat_wait_event.c: wait_event_types.h
- touch $@
+# Force these dependencies to be known even without dependency info built:
+wait_event.o: wait_event.c $(top_builddir)/src/backend/utils/pgstat_wait_event.c
+wait_event_funcs.o: wait_event_funcs.c $(top_builddir)/src/backend/utils/wait_event_funcs_data.c
-wait_event_types.h: $(top_srcdir)/src/backend/utils/activity/wait_event_names.txt generate-wait_event_types.pl
- $(PERL) $(srcdir)/generate-wait_event_types.pl --code $<
-
-clean:
- rm -f wait_event_types.h pgstat_wait_event.c wait_event_funcs_data.c
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index c84e6536580..cd087129469 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -1164,31 +1164,6 @@ pgstat_get_my_plan_id(void)
return MyBEEntry->st_plan_id;
}
-/* ----------
- * pgstat_get_backend_type_by_proc_number() -
- *
- * Return the type of the backend with the specified ProcNumber. This looks
- * directly at the BackendStatusArray, so the return value may be out of date.
- * The only current use of this function is in pg_signal_backend(), which is
- * inherently racy, so we don't worry too much about this.
- *
- * It is the caller's responsibility to use this wisely; at minimum, callers
- * should ensure that procNumber is valid and perform the required permissions
- * checks.
- * ----------
- */
-BackendType
-pgstat_get_backend_type_by_proc_number(ProcNumber procNumber)
-{
- volatile PgBackendStatus *status = &BackendStatusArray[procNumber];
-
- /*
- * We bypass the changecount mechanism since fetching and storing an int
- * is almost certainly atomic.
- */
- return status->st_backendType;
-}
-
/* ----------
* cmp_lbestatus
*
diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build
index 9f48d5970e1..53bd5a246ca 100644
--- a/src/backend/utils/activity/meson.build
+++ b/src/backend/utils/activity/meson.build
@@ -30,7 +30,6 @@ waitevent_sources = files(
wait_event = static_library('wait_event_names',
waitevent_sources,
dependencies: [backend_code],
- include_directories: include_directories('../../../include/utils'),
kwargs: internal_lib_args,
)
diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index 1350f5f62f1..f2f8d3ff75f 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -326,7 +326,7 @@ pgstat_create_backend(ProcNumber procnum)
PgStatShared_Backend *shstatent;
entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid,
- MyProcNumber, false);
+ procnum, false);
shstatent = (PgStatShared_Backend *) entry_ref->shared_stats;
/*
diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c
index d7f6d4c5ee6..933dcb5cae5 100644
--- a/src/backend/utils/activity/pgstat_database.c
+++ b/src/backend/utils/activity/pgstat_database.c
@@ -17,7 +17,7 @@
#include "postgres.h"
-#include "storage/procsignal.h"
+#include "storage/standby.h"
#include "utils/pgstat_internal.h"
#include "utils/timestamp.h"
@@ -88,31 +88,41 @@ pgstat_report_recovery_conflict(int reason)
dbentry = pgstat_prep_database_pending(MyDatabaseId);
- switch (reason)
+ switch ((RecoveryConflictReason) reason)
{
- case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ case RECOVERY_CONFLICT_DATABASE:
/*
* Since we drop the information about the database as soon as it
* replicates, there is no point in counting these conflicts.
*/
break;
- case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ case RECOVERY_CONFLICT_TABLESPACE:
dbentry->conflict_tablespace++;
break;
- case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ case RECOVERY_CONFLICT_LOCK:
dbentry->conflict_lock++;
break;
- case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ case RECOVERY_CONFLICT_SNAPSHOT:
dbentry->conflict_snapshot++;
break;
- case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ case RECOVERY_CONFLICT_BUFFERPIN:
dbentry->conflict_bufferpin++;
break;
- case PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT:
+ case RECOVERY_CONFLICT_LOGICALSLOT:
dbentry->conflict_logicalslot++;
break;
- case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ case RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ dbentry->conflict_startup_deadlock++;
+ break;
+ case RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK:
+
+ /*
+ * The difference between RECOVERY_CONFLICT_STARTUP_DEADLOCK and
+ * RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK is merely whether a buffer
+ * pin was part of the deadlock. We use the same counter for both
+ * reasons.
+ */
dbentry->conflict_startup_deadlock++;
break;
}
@@ -190,7 +200,7 @@ pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
Assert(entry_ref);
if (!entry_ref)
{
- elog(WARNING, "could not report %d conflicts for DB %u",
+ elog(WARNING, "could not report %d checksum failures for database %u",
failurecount, dboid);
return;
}
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index e4f2c440257..aca2c8fc742 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -503,4 +503,4 @@ pgstat_get_wait_event(uint32 wait_event_info)
return event_name;
}
-#include "pgstat_wait_event.c"
+#include "utils/pgstat_wait_event.c"
diff --git a/src/backend/utils/activity/wait_event_funcs.c b/src/backend/utils/activity/wait_event_funcs.c
index b62ee83ef73..fa10a80b088 100644
--- a/src/backend/utils/activity/wait_event_funcs.c
+++ b/src/backend/utils/activity/wait_event_funcs.c
@@ -31,7 +31,7 @@ static const struct
waitEventData[] =
{
-#include "wait_event_funcs_data.c"
+#include "utils/wait_event_funcs_data.c"
/* end of list */
{NULL, NULL, NULL}
};
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 5537a2d2530..4aa864fe3c3 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -14,13 +14,13 @@
#
# The files generated from this one are:
#
-# src/backend/utils/activity/wait_event_types.h
+# wait_event_types.h
# typedef enum definitions for wait events.
#
-# src/backend/utils/activity/pgstat_wait_event.c
+# pgstat_wait_event.c
# C functions to get the wait event name based on the enum.
#
-# src/backend/utils/activity/wait_event_types.sgml
+# wait_event_types.sgml
# SGML tables of wait events for inclusion in the documentation.
#
# When adding a new wait event, make sure it is placed in the appropriate
@@ -213,6 +213,8 @@ CONTROL_FILE_WRITE_UPDATE "Waiting for a write to update the pg_contro
COPY_FILE_COPY "Waiting for a file copy operation."
COPY_FILE_READ "Waiting for a read during a file copy operation."
COPY_FILE_WRITE "Waiting for a write during a file copy operation."
+COPY_FROM_READ "Waiting to read data from a pipe, a file or a program during COPY FROM."
+COPY_TO_WRITE "Waiting to write data to a pipe, a file or a program during COPY TO."
DATA_FILE_EXTEND "Waiting for a relation data file to be extended."
DATA_FILE_FLUSH "Waiting for a relation data file to reach durable storage."
DATA_FILE_IMMEDIATE_SYNC "Waiting for an immediate synchronization of a relation data file to durable storage."
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index 3a6905f9546..641673f0b0e 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -130,7 +130,8 @@ static AclMode convert_largeobject_priv_string(text *priv_type_text);
static AclMode convert_role_priv_string(text *priv_type_text);
static AclResult pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode);
-static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void RoleMembershipCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
/*
@@ -5067,7 +5068,8 @@ initialize_acl(void)
* Syscache inval callback function
*/
static void
-RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+RoleMembershipCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
if (cacheid == DATABASEOID &&
hashvalue != cached_db_hash &&
diff --git a/src/backend/utils/adt/array_expanded.c b/src/backend/utils/adt/array_expanded.c
index 01e3dddcbbb..7e8352af52b 100644
--- a/src/backend/utils/adt/array_expanded.c
+++ b/src/backend/utils/adt/array_expanded.c
@@ -238,6 +238,7 @@ EA_get_flat_size(ExpandedObjectHeader *eohptr)
Datum *dvalues;
bool *dnulls;
Size nbytes;
+ uint8 typalignby;
int i;
Assert(eah->ea_magic == EA_MAGIC);
@@ -261,12 +262,13 @@ EA_get_flat_size(ExpandedObjectHeader *eohptr)
dvalues = eah->dvalues;
dnulls = eah->dnulls;
nbytes = 0;
+ typalignby = typalign_to_alignby(eah->typalign);
for (i = 0; i < nelems; i++)
{
if (dnulls && dnulls[i])
continue;
nbytes = att_addlength_datum(nbytes, eah->typlen, dvalues[i]);
- nbytes = att_align_nominal(nbytes, eah->typalign);
+ nbytes = att_nominal_alignby(nbytes, typalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(nbytes))
ereport(ERROR,
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index e71d32773b5..734e5fea45e 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -75,6 +75,7 @@ typedef struct ArrayIteratorData
int16 typlen; /* element type's length */
bool typbyval; /* element type's byval property */
char typalign; /* element type's align property */
+ uint8 typalignby; /* typalign mapped to numeric alignment */
/* information about the requested slice size */
int slice_ndim; /* slice dimension, or 0 if not slicing */
@@ -123,7 +124,7 @@ static bool array_get_isnull(const bits8 *nullbitmap, int offset);
static void array_set_isnull(bits8 *nullbitmap, int offset, bool isNull);
static Datum ArrayCast(char *value, bool byval, int len);
static int ArrayCastAndSet(Datum src,
- int typlen, bool typbyval, char typalign,
+ int typlen, bool typbyval, uint8 typalignby,
char *dest);
static char *array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
int typlen, bool typbyval, char typalign);
@@ -187,6 +188,7 @@ array_in(PG_FUNCTION_ARGS)
int typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
char typdelim;
Oid typioparam;
char *p;
@@ -232,6 +234,7 @@ array_in(PG_FUNCTION_ARGS)
typlen = my_extra->typlen;
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
+ typalignby = typalign_to_alignby(typalign);
typdelim = my_extra->typdelim;
typioparam = my_extra->typioparam;
@@ -328,7 +331,7 @@ array_in(PG_FUNCTION_ARGS)
if (typlen == -1)
values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
nbytes = att_addlength_datum(nbytes, typlen, values[i]);
- nbytes = att_align_nominal(nbytes, typalign);
+ nbytes = att_nominal_alignby(nbytes, typalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(nbytes))
ereturn(escontext, (Datum) 0,
@@ -972,6 +975,7 @@ CopyArrayEls(ArrayType *array,
bits8 *bitmap = ARR_NULLBITMAP(array);
int bitval = 0;
int bitmask = 1;
+ uint8 typalignby = typalign_to_alignby(typalign);
int i;
if (typbyval)
@@ -988,7 +992,7 @@ CopyArrayEls(ArrayType *array,
else
{
bitval |= bitmask;
- p += ArrayCastAndSet(values[i], typlen, typbyval, typalign, p);
+ p += ArrayCastAndSet(values[i], typlen, typbyval, typalignby, p);
if (freedata)
pfree(DatumGetPointer(values[i]));
}
@@ -1112,7 +1116,7 @@ array_out(PG_FUNCTION_ARGS)
needquotes = (bool *) palloc(nitems * sizeof(bool));
overall_length = 0;
- array_iter_setup(&iter, v);
+ array_iter_setup(&iter, v, typlen, typbyval, typalign);
for (i = 0; i < nitems; i++)
{
@@ -1121,8 +1125,7 @@ array_out(PG_FUNCTION_ARGS)
bool needquote;
/* Get source element, checking for NULL */
- itemvalue = array_iter_next(&iter, &isnull, i,
- typlen, typbyval, typalign);
+ itemvalue = array_iter_next(&iter, &isnull, i);
if (isnull)
{
@@ -1468,6 +1471,7 @@ ReadArrayBinary(StringInfo buf,
int i;
bool hasnull;
int32 totbytes;
+ uint8 typalignby = typalign_to_alignby(typalign);
for (i = 0; i < nitems; i++)
{
@@ -1526,7 +1530,7 @@ ReadArrayBinary(StringInfo buf,
if (typlen == -1)
values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
totbytes = att_addlength_datum(totbytes, typlen, values[i]);
- totbytes = att_align_nominal(totbytes, typalign);
+ totbytes = att_nominal_alignby(totbytes, typalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(totbytes))
ereport(ERROR,
@@ -1614,7 +1618,7 @@ array_send(PG_FUNCTION_ARGS)
}
/* Send the array elements using the element's own sendproc */
- array_iter_setup(&iter, v);
+ array_iter_setup(&iter, v, typlen, typbyval, typalign);
for (i = 0; i < nitems; i++)
{
@@ -1622,8 +1626,7 @@ array_send(PG_FUNCTION_ARGS)
bool isnull;
/* Get source element, checking for NULL */
- itemvalue = array_iter_next(&iter, &isnull, i,
- typlen, typbyval, typalign);
+ itemvalue = array_iter_next(&iter, &isnull, i);
if (isnull)
{
@@ -2231,6 +2234,7 @@ array_set_element(Datum arraydatum,
addedafter,
lenbefore,
lenafter;
+ uint8 elmalignby = typalign_to_alignby(elmalign);
if (arraytyplen > 0)
{
@@ -2258,7 +2262,7 @@ array_set_element(Datum arraydatum,
resultarray = (char *) palloc(arraytyplen);
memcpy(resultarray, DatumGetPointer(arraydatum), arraytyplen);
elt_ptr = resultarray + indx[0] * elmlen;
- ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign, elt_ptr);
+ ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalignby, elt_ptr);
return PointerGetDatum(resultarray);
}
@@ -2416,7 +2420,7 @@ array_set_element(Datum arraydatum,
else
{
olditemlen = att_addlength_pointer(0, elmlen, elt_ptr);
- olditemlen = att_align_nominal(olditemlen, elmalign);
+ olditemlen = att_nominal_alignby(olditemlen, elmalignby);
}
lenafter = olddatasize - lenbefore - olditemlen;
}
@@ -2426,7 +2430,7 @@ array_set_element(Datum arraydatum,
else
{
newitemlen = att_addlength_datum(0, elmlen, dataValue);
- newitemlen = att_align_nominal(newitemlen, elmalign);
+ newitemlen = att_nominal_alignby(newitemlen, elmalignby);
}
newsize = overheadlen + lenbefore + newitemlen + lenafter;
@@ -2449,7 +2453,7 @@ array_set_element(Datum arraydatum,
(char *) array + oldoverheadlen,
lenbefore);
if (!isNull)
- ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign,
+ ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalignby,
(char *) newarray + overheadlen + lenbefore);
memcpy((char *) newarray + overheadlen + lenbefore + newitemlen,
(char *) array + oldoverheadlen + lenbefore + olditemlen,
@@ -3221,6 +3225,7 @@ array_map(Datum arrayd,
int typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
array_iter iter;
ArrayMetaState *inp_extra;
ArrayMetaState *ret_extra;
@@ -3270,21 +3275,21 @@ array_map(Datum arrayd,
typlen = ret_extra->typlen;
typbyval = ret_extra->typbyval;
typalign = ret_extra->typalign;
+ typalignby = typalign_to_alignby(typalign);
/* Allocate temporary arrays for new values */
values = (Datum *) palloc(nitems * sizeof(Datum));
nulls = (bool *) palloc(nitems * sizeof(bool));
/* Loop over source data */
- array_iter_setup(&iter, v);
+ array_iter_setup(&iter, v, inp_typlen, inp_typbyval, inp_typalign);
hasnulls = false;
for (i = 0; i < nitems; i++)
{
/* Get source element, checking for NULL */
*transform_source =
- array_iter_next(&iter, transform_source_isnull, i,
- inp_typlen, inp_typbyval, inp_typalign);
+ array_iter_next(&iter, transform_source_isnull, i);
/* Apply the given expression to source element */
values[i] = ExecEvalExpr(exprstate, econtext, &nulls[i]);
@@ -3298,7 +3303,7 @@ array_map(Datum arrayd,
values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
/* Update total result size */
nbytes = att_addlength_datum(nbytes, typlen, values[i]);
- nbytes = att_align_nominal(nbytes, typalign);
+ nbytes = att_nominal_alignby(nbytes, typalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(nbytes))
ereport(ERROR,
@@ -3505,6 +3510,7 @@ construct_md_array(Datum *elems,
int32 dataoffset;
int i;
int nelems;
+ uint8 elmalignby = typalign_to_alignby(elmalign);
if (ndims < 0) /* we do allow zero-dimension arrays */
ereport(ERROR,
@@ -3538,7 +3544,7 @@ construct_md_array(Datum *elems,
if (elmlen == -1)
elems[i] = PointerGetDatum(PG_DETOAST_DATUM(elems[i]));
nbytes = att_addlength_datum(nbytes, elmlen, elems[i]);
- nbytes = att_align_nominal(nbytes, elmalign);
+ nbytes = att_nominal_alignby(nbytes, elmalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(nbytes))
ereport(ERROR,
@@ -3641,6 +3647,7 @@ deconstruct_array(const ArrayType *array,
bits8 *bitmap;
int bitmask;
int i;
+ uint8 elmalignby = typalign_to_alignby(elmalign);
Assert(ARR_ELEMTYPE(array) == elmtype);
@@ -3673,7 +3680,7 @@ deconstruct_array(const ArrayType *array,
{
elems[i] = fetch_att(p, elmbyval, elmlen);
p = att_addlength_pointer(p, elmlen, p);
- p = (char *) att_align_nominal(p, elmalign);
+ p = (char *) att_nominal_alignby(p, elmalignby);
}
/* advance bitmap pointer if any */
@@ -3729,6 +3736,12 @@ deconstruct_array_builtin(const ArrayType *array,
elmalign = TYPALIGN_SHORT;
break;
+ case INT4OID:
+ elmlen = sizeof(int32);
+ elmbyval = true;
+ elmalign = TYPALIGN_INT;
+ break;
+
case OIDOID:
elmlen = sizeof(Oid);
elmbyval = true;
@@ -3878,8 +3891,8 @@ array_eq(PG_FUNCTION_ARGS)
/* Loop over source data */
nitems = ArrayGetNItems(ndims1, dims1);
- array_iter_setup(&it1, array1);
- array_iter_setup(&it2, array2);
+ array_iter_setup(&it1, array1, typlen, typbyval, typalign);
+ array_iter_setup(&it2, array2, typlen, typbyval, typalign);
for (i = 0; i < nitems; i++)
{
@@ -3890,10 +3903,8 @@ array_eq(PG_FUNCTION_ARGS)
bool oprresult;
/* Get elements, checking for NULL */
- elt1 = array_iter_next(&it1, &isnull1, i,
- typlen, typbyval, typalign);
- elt2 = array_iter_next(&it2, &isnull2, i,
- typlen, typbyval, typalign);
+ elt1 = array_iter_next(&it1, &isnull1, i);
+ elt2 = array_iter_next(&it2, &isnull2, i);
/*
* We consider two NULLs equal; NULL and not-NULL are unequal.
@@ -4042,8 +4053,8 @@ array_cmp(FunctionCallInfo fcinfo)
/* Loop over source data */
min_nitems = Min(nitems1, nitems2);
- array_iter_setup(&it1, array1);
- array_iter_setup(&it2, array2);
+ array_iter_setup(&it1, array1, typlen, typbyval, typalign);
+ array_iter_setup(&it2, array2, typlen, typbyval, typalign);
for (i = 0; i < min_nitems; i++)
{
@@ -4054,8 +4065,8 @@ array_cmp(FunctionCallInfo fcinfo)
int32 cmpresult;
/* Get elements, checking for NULL */
- elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign);
- elt2 = array_iter_next(&it2, &isnull2, i, typlen, typbyval, typalign);
+ elt1 = array_iter_next(&it1, &isnull1, i);
+ elt2 = array_iter_next(&it2, &isnull2, i);
/*
* We consider two NULLs equal; NULL > not-NULL.
@@ -4238,7 +4249,7 @@ hash_array(PG_FUNCTION_ARGS)
/* Loop over source data */
nitems = ArrayGetNItems(ndims, dims);
- array_iter_setup(&iter, array);
+ array_iter_setup(&iter, array, typlen, typbyval, typalign);
for (i = 0; i < nitems; i++)
{
@@ -4247,7 +4258,7 @@ hash_array(PG_FUNCTION_ARGS)
uint32 elthash;
/* Get element, checking for NULL */
- elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
+ elt = array_iter_next(&iter, &isnull, i);
if (isnull)
{
@@ -4328,7 +4339,7 @@ hash_array_extended(PG_FUNCTION_ARGS)
/* Loop over source data */
nitems = ArrayGetNItems(ndims, dims);
- array_iter_setup(&iter, array);
+ array_iter_setup(&iter, array, typlen, typbyval, typalign);
for (i = 0; i < nitems; i++)
{
@@ -4337,7 +4348,7 @@ hash_array_extended(PG_FUNCTION_ARGS)
uint64 elthash;
/* Get element, checking for NULL */
- elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
+ elt = array_iter_next(&iter, &isnull, i);
if (isnull)
{
@@ -4451,7 +4462,7 @@ array_contain_compare(AnyArrayType *array1, AnyArrayType *array2, Oid collation,
/* Loop over source data */
nelems1 = ArrayGetNItems(AARR_NDIM(array1), AARR_DIMS(array1));
- array_iter_setup(&it1, array1);
+ array_iter_setup(&it1, array1, typlen, typbyval, typalign);
for (i = 0; i < nelems1; i++)
{
@@ -4459,7 +4470,7 @@ array_contain_compare(AnyArrayType *array1, AnyArrayType *array2, Oid collation,
bool isnull1;
/* Get element, checking for NULL */
- elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign);
+ elt1 = array_iter_next(&it1, &isnull1, i);
/*
* We assume that the comparison operator is strict, so a NULL can't
@@ -4626,6 +4637,7 @@ array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate)
&iterator->typlen,
&iterator->typbyval,
&iterator->typalign);
+ iterator->typalignby = typalign_to_alignby(iterator->typalign);
/*
* Remember the slicing parameters.
@@ -4700,7 +4712,7 @@ array_iterate(ArrayIterator iterator, Datum *value, bool *isnull)
/* Move our data pointer forward to the next element */
p = att_addlength_pointer(p, iterator->typlen, p);
- p = (char *) att_align_nominal(p, iterator->typalign);
+ p = (char *) att_nominal_alignby(p, iterator->typalignby);
iterator->data_ptr = p;
}
}
@@ -4730,7 +4742,7 @@ array_iterate(ArrayIterator iterator, Datum *value, bool *isnull)
/* Move our data pointer forward to the next element */
p = att_addlength_pointer(p, iterator->typlen, p);
- p = (char *) att_align_nominal(p, iterator->typalign);
+ p = (char *) att_nominal_alignby(p, iterator->typalignby);
}
}
@@ -4828,7 +4840,7 @@ static int
ArrayCastAndSet(Datum src,
int typlen,
bool typbyval,
- char typalign,
+ uint8 typalignby,
char *dest)
{
int inc;
@@ -4839,14 +4851,14 @@ ArrayCastAndSet(Datum src,
store_att_byval(dest, src, typlen);
else
memmove(dest, DatumGetPointer(src), typlen);
- inc = att_align_nominal(typlen, typalign);
+ inc = att_nominal_alignby(typlen, typalignby);
}
else
{
Assert(!typbyval);
inc = att_addlength_datum(0, typlen, src);
memmove(dest, DatumGetPointer(src), inc);
- inc = att_align_nominal(inc, typalign);
+ inc = att_nominal_alignby(inc, typalignby);
}
return inc;
@@ -4867,12 +4879,13 @@ static char *
array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
int typlen, bool typbyval, char typalign)
{
+ uint8 typalignby = typalign_to_alignby(typalign);
int bitmask;
int i;
/* easy if fixed-size elements and no NULLs */
if (typlen > 0 && !nullbitmap)
- return ptr + nitems * ((Size) att_align_nominal(typlen, typalign));
+ return ptr + nitems * ((Size) att_nominal_alignby(typlen, typalignby));
/* seems worth having separate loops for NULL and no-NULLs cases */
if (nullbitmap)
@@ -4885,7 +4898,7 @@ array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
if (*nullbitmap & bitmask)
{
ptr = att_addlength_pointer(ptr, typlen, ptr);
- ptr = (char *) att_align_nominal(ptr, typalign);
+ ptr = (char *) att_nominal_alignby(ptr, typalignby);
}
bitmask <<= 1;
if (bitmask == 0x100)
@@ -4900,7 +4913,7 @@ array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
for (i = 0; i < nitems; i++)
{
ptr = att_addlength_pointer(ptr, typlen, ptr);
- ptr = (char *) att_align_nominal(ptr, typalign);
+ ptr = (char *) att_nominal_alignby(ptr, typalignby);
}
}
return ptr;
@@ -5050,12 +5063,13 @@ array_slice_size(char *arraydataptr, bits8 *arraynullsptr,
j,
inc;
int count = 0;
+ uint8 typalignby = typalign_to_alignby(typalign);
mda_get_range(ndim, span, st, endp);
/* Pretty easy for fixed element length without nulls ... */
if (typlen > 0 && !arraynullsptr)
- return ArrayGetNItems(ndim, span) * att_align_nominal(typlen, typalign);
+ return ArrayGetNItems(ndim, span) * att_nominal_alignby(typlen, typalignby);
/* Else gotta do it the hard way */
src_offset = ArrayGetOffset(ndim, dim, lb, st);
@@ -5077,7 +5091,7 @@ array_slice_size(char *arraydataptr, bits8 *arraynullsptr,
if (!array_get_isnull(arraynullsptr, src_offset))
{
inc = att_addlength_pointer(0, typlen, ptr);
- inc = att_align_nominal(inc, typalign);
+ inc = att_nominal_alignby(inc, typalignby);
ptr += inc;
count += inc;
}
@@ -6096,6 +6110,7 @@ array_fill_internal(ArrayType *dims, ArrayType *lbs,
int16 elmlen;
bool elmbyval;
char elmalign;
+ uint8 elmalignby;
ArrayMetaState *my_extra;
/*
@@ -6190,6 +6205,7 @@ array_fill_internal(ArrayType *dims, ArrayType *lbs,
elmlen = my_extra->typlen;
elmbyval = my_extra->typbyval;
elmalign = my_extra->typalign;
+ elmalignby = typalign_to_alignby(elmalign);
/* compute required space */
if (!isnull)
@@ -6204,7 +6220,7 @@ array_fill_internal(ArrayType *dims, ArrayType *lbs,
value = PointerGetDatum(PG_DETOAST_DATUM(value));
nbytes = att_addlength_datum(0, elmlen, value);
- nbytes = att_align_nominal(nbytes, elmalign);
+ nbytes = att_nominal_alignby(nbytes, elmalignby);
Assert(nbytes > 0);
totbytes = nbytes * nitems;
@@ -6228,7 +6244,7 @@ array_fill_internal(ArrayType *dims, ArrayType *lbs,
p = ARR_DATA_PTR(result);
for (i = 0; i < nitems; i++)
- p += ArrayCastAndSet(value, elmlen, elmbyval, elmalign, p);
+ p += ArrayCastAndSet(value, elmlen, elmbyval, elmalignby, p);
}
else
{
@@ -6259,9 +6275,6 @@ array_unnest(PG_FUNCTION_ARGS)
array_iter iter;
int nextelem;
int numelems;
- int16 elmlen;
- bool elmbyval;
- char elmalign;
} array_unnest_fctx;
FuncCallContext *funcctx;
@@ -6272,6 +6285,9 @@ array_unnest(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
AnyArrayType *arr;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
/* create a function context for cross-call persistence */
funcctx = SRF_FIRSTCALL_INIT();
@@ -6293,23 +6309,24 @@ array_unnest(PG_FUNCTION_ARGS)
/* allocate memory for user context */
fctx = palloc_object(array_unnest_fctx);
- /* initialize state */
- array_iter_setup(&fctx->iter, arr);
- fctx->nextelem = 0;
- fctx->numelems = ArrayGetNItems(AARR_NDIM(arr), AARR_DIMS(arr));
-
+ /* get element-type data */
if (VARATT_IS_EXPANDED_HEADER(arr))
{
/* we can just grab the type data from expanded array */
- fctx->elmlen = arr->xpn.typlen;
- fctx->elmbyval = arr->xpn.typbyval;
- fctx->elmalign = arr->xpn.typalign;
+ elmlen = arr->xpn.typlen;
+ elmbyval = arr->xpn.typbyval;
+ elmalign = arr->xpn.typalign;
}
else
get_typlenbyvalalign(AARR_ELEMTYPE(arr),
- &fctx->elmlen,
- &fctx->elmbyval,
- &fctx->elmalign);
+ &elmlen,
+ &elmbyval,
+ &elmalign);
+
+ /* initialize state */
+ array_iter_setup(&fctx->iter, arr, elmlen, elmbyval, elmalign);
+ fctx->nextelem = 0;
+ fctx->numelems = ArrayGetNItems(AARR_NDIM(arr), AARR_DIMS(arr));
funcctx->user_fctx = fctx;
MemoryContextSwitchTo(oldcontext);
@@ -6324,8 +6341,7 @@ array_unnest(PG_FUNCTION_ARGS)
int offset = fctx->nextelem++;
Datum elem;
- elem = array_iter_next(&fctx->iter, &fcinfo->isnull, offset,
- fctx->elmlen, fctx->elmbyval, fctx->elmalign);
+ elem = array_iter_next(&fctx->iter, &fcinfo->isnull, offset);
SRF_RETURN_NEXT(funcctx, elem);
}
@@ -6401,6 +6417,7 @@ array_replace_internal(ArrayType *array,
int typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
char *arraydataptr;
bits8 *bitmap;
int bitmask;
@@ -6445,6 +6462,7 @@ array_replace_internal(ArrayType *array,
typlen = typentry->typlen;
typbyval = typentry->typbyval;
typalign = typentry->typalign;
+ typalignby = typalign_to_alignby(typalign);
/*
* Detoast values if they are toasted. The replacement value must be
@@ -6506,7 +6524,7 @@ array_replace_internal(ArrayType *array,
isNull = false;
elt = fetch_att(arraydataptr, typbyval, typlen);
arraydataptr = att_addlength_datum(arraydataptr, typlen, elt);
- arraydataptr = (char *) att_align_nominal(arraydataptr, typalign);
+ arraydataptr = (char *) att_nominal_alignby(arraydataptr, typalignby);
if (search_isnull)
{
@@ -6553,7 +6571,7 @@ array_replace_internal(ArrayType *array,
{
/* Update total result size */
nbytes = att_addlength_datum(nbytes, typlen, values[nresult]);
- nbytes = att_align_nominal(nbytes, typalign);
+ nbytes = att_nominal_alignby(nbytes, typalignby);
/* check for overflow of total request */
if (!AllocSizeIsValid(nbytes))
ereport(ERROR,
@@ -6860,6 +6878,7 @@ width_bucket_array_variable(Datum operand,
int typlen = typentry->typlen;
bool typbyval = typentry->typbyval;
char typalign = typentry->typalign;
+ uint8 typalignby = typalign_to_alignby(typalign);
int left;
int right;
@@ -6883,7 +6902,7 @@ width_bucket_array_variable(Datum operand,
for (i = left; i < mid; i++)
{
ptr = att_addlength_pointer(ptr, typlen, ptr);
- ptr = (char *) att_align_nominal(ptr, typalign);
+ ptr = (char *) att_nominal_alignby(ptr, typalignby);
}
locfcinfo->args[0].value = operand;
@@ -6908,7 +6927,7 @@ width_bucket_array_variable(Datum operand,
* ensures we do only O(N) array indexing work, not O(N^2).
*/
ptr = att_addlength_pointer(ptr, typlen, ptr);
- thresholds_data = (char *) att_align_nominal(ptr, typalign);
+ thresholds_data = (char *) att_nominal_alignby(ptr, typalignby);
}
}
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 8dc0ac50625..90946db72ff 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3594,7 +3594,7 @@ DecodeInterval(char **field, int *ftype, int nf, int range,
* handle signed float numbers and signed year-month values.
*/
- /* FALLTHROUGH */
+ pg_fallthrough;
case DTK_DATE:
case DTK_NUMBER:
@@ -4028,7 +4028,7 @@ DecodeISO8601Interval(char *str,
continue;
}
/* Else fall through to extended alternative format */
- /* FALLTHROUGH */
+ pg_fallthrough;
case '-': /* ISO 8601 4.4.3.3 Alternative Format,
* Extended */
if (havefield)
@@ -4111,7 +4111,7 @@ DecodeISO8601Interval(char *str,
return 0;
}
/* Else fall through to extended alternative format */
- /* FALLTHROUGH */
+ pg_fallthrough;
case ':': /* ISO 8601 4.4.3.3 Alternative Format,
* Extended */
if (havefield)
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
index cc26bd67a53..8832785540f 100644
--- a/src/backend/utils/adt/datum.c
+++ b/src/backend/utils/adt/datum.c
@@ -26,7 +26,7 @@
* The number of significant bytes are always equal to the typlen.
*
* C) if a type is not "byVal" and has typlen == -1,
- * then the "Datum" always points to a "struct varlena".
+ * then the "Datum" always points to a "varlena".
* This varlena structure has information about the actual length of this
* particular instance of the type and about its value.
*
@@ -82,7 +82,7 @@ datumGetSize(Datum value, bool typByVal, int typLen)
else if (typLen == -1)
{
/* It is a varlena datatype */
- struct varlena *s = (struct varlena *) DatumGetPointer(value);
+ varlena *s = (varlena *) DatumGetPointer(value);
if (!s)
ereport(ERROR,
@@ -138,7 +138,7 @@ datumCopy(Datum value, bool typByVal, int typLen)
else if (typLen == -1)
{
/* It is a varlena datatype */
- struct varlena *vl = (struct varlena *) DatumGetPointer(value);
+ varlena *vl = (varlena *) DatumGetPointer(value);
if (VARATT_IS_EXTERNAL_EXPANDED(vl))
{
@@ -288,8 +288,8 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
result = false;
else
{
- struct varlena *arg1val;
- struct varlena *arg2val;
+ varlena *arg1val;
+ varlena *arg2val;
arg1val = PG_DETOAST_DATUM_PACKED(value1);
arg2val = PG_DETOAST_DATUM_PACKED(value2);
@@ -346,7 +346,7 @@ datum_image_hash(Datum value, bool typByVal, int typLen)
result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
else if (typLen == -1)
{
- struct varlena *val;
+ varlena *val;
len = toast_raw_datum_size(value);
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 3c7f54f2638..f5f835e944a 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -290,7 +290,7 @@ hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal digit: \"%.*s\"",
- pg_mblen(s), s)));
+ pg_mblen_range(s, srcend), s)));
s++;
if (s >= srcend)
ereturn(escontext, 0,
@@ -300,7 +300,7 @@ hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
ereturn(escontext, 0,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal digit: \"%.*s\"",
- pg_mblen(s), s)));
+ pg_mblen_range(s, srcend), s)));
s++;
*p++ = (v1 << 4) | v2;
}
@@ -564,7 +564,7 @@ pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
- pg_mblen(s - 1), s - 1,
+ pg_mblen_range(s - 1, srcend), s - 1,
url ? "base64url" : "base64")));
}
}
diff --git a/src/backend/utils/adt/expandedrecord.c b/src/backend/utils/adt/expandedrecord.c
index d21ef9d8c08..123792aa725 100644
--- a/src/backend/utils/adt/expandedrecord.c
+++ b/src/backend/utils/adt/expandedrecord.c
@@ -1159,7 +1159,7 @@ expanded_record_set_field_internal(ExpandedRecordHeader *erh, int fnumber,
{
/* Detoasting should be done in short-lived context. */
oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh));
- newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue)));
+ newValue = PointerGetDatum(detoast_external_attr((varlena *) DatumGetPointer(newValue)));
MemoryContextSwitchTo(oldcxt);
}
else
@@ -1305,7 +1305,7 @@ expanded_record_set_fields(ExpandedRecordHeader *erh,
if (expand_external)
{
/* Detoast as requested while copying the value */
- newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue)));
+ newValue = PointerGetDatum(detoast_external_attr((varlena *) DatumGetPointer(newValue)));
}
else
{
diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c
index 544205ca067..3cd5053d118 100644
--- a/src/backend/utils/adt/format_type.c
+++ b/src/backend/utils/adt/format_type.c
@@ -448,11 +448,15 @@ oidvectortypes(PG_FUNCTION_ARGS)
{
oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0);
char *result;
- int numargs = oidArray->dim1;
+ int numargs;
int num;
size_t total;
size_t left;
+ /* validate input before fetching dim1 */
+ check_valid_oidvector(oidArray);
+ numargs = oidArray->dim1;
+
total = 20 * numargs + 1;
result = palloc(total);
result[0] = '\0';
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index cf580c63c78..0716aff22b6 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1236,7 +1236,7 @@ NUMDesc_prepare(NUMDesc *num, FormatNode *n)
case NUM_D:
num->flag |= NUM_F_LDECIMAL;
num->need_locale = true;
- /* FALLTHROUGH */
+ pg_fallthrough;
case NUM_DEC:
if (IS_DECIMAL(num))
ereport(ERROR,
@@ -1438,7 +1438,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
ereport(ERROR,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
errmsg("invalid datetime format separator: \"%s\"",
- pnstrdup(str, pg_mblen(str)))));
+ pnstrdup(str, pg_mblen_cstr(str)))));
if (*str == ' ')
n->type = NODE_TYPE_SPACE;
@@ -1468,7 +1468,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
/* backslash quotes the next character, if any */
if (*str == '\\' && *(str + 1))
str++;
- chlen = pg_mblen(str);
+ chlen = pg_mblen_cstr(str);
n->type = NODE_TYPE_CHAR;
memcpy(n->character, str, chlen);
n->character[chlen] = '\0';
@@ -1486,7 +1486,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
*/
if (*str == '\\' && *(str + 1) == '"')
str++;
- chlen = pg_mblen(str);
+ chlen = pg_mblen_cstr(str);
if ((flags & DCH_FLAG) && is_separator_char(str))
n->type = NODE_TYPE_SEPARATOR;
@@ -1992,8 +1992,8 @@ asc_toupper_z(const char *buff)
do { \
if (IS_SUFFIX_THth(_suf)) \
{ \
- if (*(ptr)) (ptr) += pg_mblen(ptr); \
- if (*(ptr)) (ptr) += pg_mblen(ptr); \
+ if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
+ if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \
} \
} while (0)
@@ -3022,7 +3022,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
s += strlen(s);
break;
case DCH_RM:
- /* FALLTHROUGH */
+ pg_fallthrough;
case DCH_rm:
/*
@@ -3183,7 +3183,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
* insist that the consumed character match the format's
* character.
*/
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
}
continue;
}
@@ -3205,11 +3205,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
if (extra_skip > 0)
extra_skip--;
else
- s += pg_mblen(s);
+ s += pg_mblen_cstr(s);
}
else
{
- int chlen = pg_mblen(s);
+ int chlen = pg_mblen_cstr(s);
/*
* Standard mode requires strict match of format characters.
@@ -3300,7 +3300,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
case DCH_FF5:
case DCH_FF6:
out->ff = n->key->id - DCH_FF1 + 1;
- /* FALLTHROUGH */
+ pg_fallthrough;
case DCH_US: /* microsecond */
len = from_char_parse_int_len(&out->us, &s,
n->key->id == DCH_US ? 6 :
@@ -3354,7 +3354,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
}
/* otherwise parse it like OF */
}
- /* FALLTHROUGH */
+ pg_fallthrough;
case DCH_OF:
/* OF is equivalent to TZH or TZH:TZM */
/* see TZH comments below */
@@ -5724,13 +5724,15 @@ NUM_numpart_to_char(NUMProc *Np, int id)
static void
NUM_eat_non_data_chars(NUMProc *Np, int n, size_t input_len)
{
+ const char *end = Np->inout + input_len;
+
while (n-- > 0)
{
if (OVERLOAD_TEST)
break; /* end of input */
if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
break; /* it's a data character */
- Np->inout_p += pg_mblen(Np->inout_p);
+ Np->inout_p += pg_mblen_range(Np->inout_p, end);
}
}
@@ -6167,7 +6169,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
}
else
{
- Np->inout_p += pg_mblen(Np->inout_p);
+ Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len);
}
continue;
}
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index d2302626585..ff54d50ea9d 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -134,6 +134,30 @@ buildint2vector(const int16 *int2s, int n)
return result;
}
+/*
+ * validate that an array object meets the restrictions of int2vector
+ *
+ * We need this because there are pathways by which a general int2[] array can
+ * be cast to int2vector, allowing the type's restrictions to be violated.
+ * All code that receives an int2vector as a SQL parameter should check this.
+ */
+static void
+check_valid_int2vector(const int2vector *int2Array)
+{
+ /*
+ * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because
+ * otherwise the array's layout will not be what calling code expects. We
+ * needn't be picky about the index lower bound though. Checking elemtype
+ * is just paranoia.
+ */
+ if (int2Array->ndim != 1 ||
+ int2Array->dataoffset != 0 ||
+ int2Array->elemtype != INT2OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("array is not a valid int2vector")));
+}
+
/*
* int2vectorin - converts "num num ..." to internal form
*/
@@ -208,10 +232,14 @@ int2vectorout(PG_FUNCTION_ARGS)
{
int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0);
int num,
- nnums = int2Array->dim1;
+ nnums;
char *rp;
char *result;
+ /* validate input before fetching dim1 */
+ check_valid_int2vector(int2Array);
+ nnums = int2Array->dim1;
+
/* assumes sign, 5 digits, ' ' */
rp = result = (char *) palloc(nnums * 7 + 1);
for (num = 0; num < nnums; num++)
@@ -272,6 +300,7 @@ int2vectorrecv(PG_FUNCTION_ARGS)
Datum
int2vectorsend(PG_FUNCTION_ARGS)
{
+ /* We don't do check_valid_int2vector, since array_send won't care */
return array_send(fcinfo);
}
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 28e7f80d77f..0a3a77ee786 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -772,7 +772,7 @@ datum_to_jsonb_internal(Datum val, bool is_null, JsonbInState *result,
case JSONTYPE_CAST:
/* cast to JSON, and then process as JSON */
val = OidFunctionCall1(outfuncoid, val);
- /* FALL THROUGH */
+ pg_fallthrough;
case JSONTYPE_JSON:
{
/* parse the json right into the existing result object */
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index e085042f912..91fb9ea09bf 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -721,7 +721,7 @@ pushJsonbValueScalar(JsonbInState *pstate, JsonbIteratorToken seq,
uniqueifyJsonbObject(&ppstate->contVal,
ppstate->unique_keys,
ppstate->skip_nulls);
- /* fall through! */
+ pg_fallthrough;
case WJB_END_ARRAY:
/* Steps here common to WJB_END_OBJECT case */
Assert(!scalarVal);
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 1e5b60801e4..d5b64d7fca5 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -695,7 +695,7 @@ report_json_context(JsonLexContext *lex)
{
/* Advance to next multibyte character */
if (IS_HIGHBIT_SET(*context_start))
- context_start += pg_mblen(context_start);
+ context_start += pg_mblen_range(context_start, context_end);
else
context_start++;
}
diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
index 18a8046d6cf..d70ff1eaa54 100644
--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
@@ -351,7 +351,7 @@ flattenJsonPathParseItem(StringInfo buf, int *result, struct Node *escontext,
break;
case jpiFilter:
argNestingLevel++;
- /* FALLTHROUGH */
+ pg_fallthrough;
case jpiIsUnknown:
case jpiNot:
case jpiPlus:
@@ -487,13 +487,13 @@ alignStringInfoInt(StringInfo buf)
{
case 3:
appendStringInfoCharMacro(buf, 0);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
appendStringInfoCharMacro(buf, 0);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
appendStringInfoCharMacro(buf, 0);
- /* FALLTHROUGH */
+ pg_fallthrough;
default:
break;
}
@@ -1021,7 +1021,7 @@ jspInitByBuffer(JsonPathItem *v, char *base, int32 pos)
case jpiKey:
case jpiVariable:
read_int32(v->content.value.datalen, base, pos);
- /* FALLTHROUGH */
+ pg_fallthrough;
case jpiNumeric:
case jpiBool:
v->content.value.data = base + pos;
@@ -1433,7 +1433,7 @@ jspIsMutableWalker(JsonPathItem *jpi, struct JsonPathMutableContext *cxt)
jspIsMutableWalker(&from, cxt);
}
- /* FALLTHROUGH */
+ pg_fallthrough;
case jpiAnyArray:
if (!cxt->lax)
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index 4543626ffc8..87070235d11 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -599,7 +599,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
- pg_mblen(flags->val + i), flags->val + i)));
+ pg_mblen_range(flags->val + i, flags->val + flags->len),
+ flags->val + i)));
break;
}
}
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
index fb2ba591acd..5b3d84029f6 100644
--- a/src/backend/utils/adt/levenshtein.c
+++ b/src/backend/utils/adt/levenshtein.c
@@ -83,6 +83,8 @@ varstr_levenshtein(const char *source, int slen,
int *s_char_len = NULL;
int j;
const char *y;
+ const char *send = source + slen;
+ const char *tend = target + tlen;
/*
* For varstr_levenshtein_less_equal, we have real variables called
@@ -183,10 +185,10 @@ varstr_levenshtein(const char *source, int slen,
#endif
/*
- * In order to avoid calling pg_mblen() repeatedly on each character in s,
- * we cache all the lengths before starting the main loop -- but if all
- * the characters in both strings are single byte, then we skip this and
- * use a fast-path in the main loop. If only one string contains
+ * In order to avoid calling pg_mblen_range() repeatedly on each character
+ * in s, we cache all the lengths before starting the main loop -- but if
+ * all the characters in both strings are single byte, then we skip this
+ * and use a fast-path in the main loop. If only one string contains
* multi-byte characters, we still build the array, so that the fast-path
* needn't deal with the case where the array hasn't been initialized.
*/
@@ -198,7 +200,7 @@ varstr_levenshtein(const char *source, int slen,
s_char_len = (int *) palloc((m + 1) * sizeof(int));
for (i = 0; i < m; ++i)
{
- s_char_len[i] = pg_mblen(cp);
+ s_char_len[i] = pg_mblen_range(cp, send);
cp += s_char_len[i];
}
s_char_len[i] = 0;
@@ -224,7 +226,7 @@ varstr_levenshtein(const char *source, int slen,
{
int *temp;
const char *x = source;
- int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
+ int y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1;
int i;
#ifdef LEVENSHTEIN_LESS_EQUAL
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 2143d8658e8..350bc07f210 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -55,20 +55,20 @@ static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
*--------------------
*/
static inline int
-wchareq(const char *p1, const char *p2)
+wchareq(const char *p1, int p1len, const char *p2, int p2len)
{
- int p1_len;
+ int p1clen;
/* Optimization: quickly compare the first byte. */
if (*p1 != *p2)
return 0;
- p1_len = pg_mblen(p1);
- if (pg_mblen(p2) != p1_len)
+ p1clen = pg_mblen_with_len(p1, p1len);
+ if (pg_mblen_with_len(p2, p2len) != p1clen)
return 0;
/* They are the same length */
- while (p1_len--)
+ while (p1clen--)
{
if (*p1++ != *p2++)
return 0;
@@ -93,11 +93,11 @@ wchareq(const char *p1, const char *p2)
#define NextByte(p, plen) ((p)++, (plen)--)
/* Set up to compile like_match.c for multibyte characters */
-#define CHAREQ(p1, p2) wchareq((p1), (p2))
+#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len))
#define NextChar(p, plen) \
- do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
+ do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0)
#define CopyAdvChar(dst, src, srclen) \
- do { int __l = pg_mblen(src); \
+ do { int __l = pg_mblen_with_len((src), (srclen)); \
(srclen) -= __l; \
while (__l-- > 0) \
*(dst)++ = *(src)++; \
@@ -109,7 +109,7 @@ wchareq(const char *p1, const char *p2)
#include "like_match.c"
/* Set up to compile like_match.c for single-byte characters */
-#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2))
#define NextChar(p, plen) NextByte((p), (plen))
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
index 02990ca9a1b..f5f72b82e21 100644
--- a/src/backend/utils/adt/like_match.c
+++ b/src/backend/utils/adt/like_match.c
@@ -442,6 +442,7 @@ do_like_escape(text *pat, text *esc)
errhint("Escape string must be empty or one character.")));
e = VARDATA_ANY(esc);
+ elen = VARSIZE_ANY_EXHDR(esc);
/*
* If specified escape is '\', just copy the pattern as-is.
@@ -460,7 +461,7 @@ do_like_escape(text *pat, text *esc)
afterescape = false;
while (plen > 0)
{
- if (CHAREQ(p, e) && !afterescape)
+ if (CHAREQ(p, plen, e, elen) && !afterescape)
{
*r++ = '\\';
NextChar(p, plen);
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
index 12b8d4cefaf..c7f7b8bc2dd 100644
--- a/src/backend/utils/adt/mcxtfuncs.c
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -19,6 +19,7 @@
#include "mb/pg_wchar.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "storage/procsignal.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c
index 07e2a81d46a..9548989d782 100644
--- a/src/backend/utils/adt/multirangetypes.c
+++ b/src/backend/utils/adt/multirangetypes.c
@@ -485,8 +485,9 @@ multirange_canonicalize(TypeCacheEntry *rangetyp, int32 input_range_count,
int32 output_range_count = 0;
/* Sort the ranges so we can find the ones that overlap/meet. */
- qsort_arg(ranges, input_range_count, sizeof(RangeType *), range_compare,
- rangetyp);
+ if (ranges != NULL)
+ qsort_arg(ranges, input_range_count, sizeof(RangeType *),
+ range_compare, rangetyp);
/* Now merge where possible: */
for (i = 0; i < input_range_count; i++)
@@ -572,21 +573,22 @@ multirange_size_estimate(TypeCacheEntry *rangetyp, int32 range_count,
RangeType **ranges)
{
char elemalign = rangetyp->rngelemtype->typalign;
+ uint8 elemalignby = typalign_to_alignby(elemalign);
Size size;
int32 i;
/*
* Count space for MultirangeType struct, items and flags.
*/
- size = att_align_nominal(sizeof(MultirangeType) +
- Max(range_count - 1, 0) * sizeof(uint32) +
- range_count * sizeof(uint8), elemalign);
+ size = att_nominal_alignby(sizeof(MultirangeType) +
+ Max(range_count - 1, 0) * sizeof(uint32) +
+ range_count * sizeof(uint8), elemalignby);
/* Count space for range bounds */
for (i = 0; i < range_count; i++)
- size += att_align_nominal(VARSIZE(ranges[i]) -
- sizeof(RangeType) -
- sizeof(char), elemalign);
+ size += att_nominal_alignby(VARSIZE(ranges[i]) -
+ sizeof(RangeType) -
+ sizeof(char), elemalignby);
return size;
}
@@ -605,6 +607,7 @@ write_multirange_data(MultirangeType *multirange, TypeCacheEntry *rangetyp,
const char *begin;
char *ptr;
char elemalign = rangetyp->rngelemtype->typalign;
+ uint8 elemalignby = typalign_to_alignby(elemalign);
items = MultirangeGetItemsPtr(multirange);
flags = MultirangeGetFlagsPtr(multirange);
@@ -630,7 +633,7 @@ write_multirange_data(MultirangeType *multirange, TypeCacheEntry *rangetyp,
flags[i] = *((char *) ranges[i] + VARSIZE(ranges[i]) - sizeof(char));
len = VARSIZE(ranges[i]) - sizeof(RangeType) - sizeof(char);
memcpy(ptr, ranges[i] + 1, len);
- ptr += att_align_nominal(len, elemalign);
+ ptr += att_nominal_alignby(len, elemalignby);
}
}
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
index 902f9c25db0..2a8d2ded907 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -43,9 +43,9 @@
/* Maximum number of items to consider in join selectivity calculations */
#define MAX_CONSIDERED_ELEMS 1024
-static Selectivity networkjoinsel_inner(Oid operator,
+static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum,
VariableStatData *vardata1, VariableStatData *vardata2);
-static Selectivity networkjoinsel_semi(Oid operator,
+static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum,
VariableStatData *vardata1, VariableStatData *vardata2);
static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
static Selectivity inet_hist_value_sel(const Datum *values, int nvalues,
@@ -82,6 +82,7 @@ networksel(PG_FUNCTION_ARGS)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ int opr_codenum;
VariableStatData vardata;
Node *other;
bool varonleft;
@@ -95,6 +96,14 @@ networksel(PG_FUNCTION_ARGS)
nullfrac;
FmgrInfo proc;
+ /*
+ * Before all else, verify that the operator is one of the ones supported
+ * by this function, which in turn proves that the input datatypes are
+ * what we expect. Otherwise, attaching this selectivity function to some
+ * unexpected operator could cause trouble.
+ */
+ opr_codenum = inet_opr_codenum(operator);
+
/*
* If expression is not (variable op something) or (something op
* variable), then punt and return a default estimate.
@@ -150,13 +159,12 @@ networksel(PG_FUNCTION_ARGS)
STATISTIC_KIND_HISTOGRAM, InvalidOid,
ATTSTATSSLOT_VALUES))
{
- int opr_codenum = inet_opr_codenum(operator);
+ int h_codenum;
/* Commute if needed, so we can consider histogram to be on the left */
- if (!varonleft)
- opr_codenum = -opr_codenum;
+ h_codenum = varonleft ? opr_codenum : -opr_codenum;
non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
- constvalue, opr_codenum);
+ constvalue, h_codenum);
free_attstatsslot(&hslot);
}
@@ -203,10 +211,19 @@ networkjoinsel(PG_FUNCTION_ARGS)
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
double selec;
+ int opr_codenum;
VariableStatData vardata1;
VariableStatData vardata2;
bool join_is_reversed;
+ /*
+ * Before all else, verify that the operator is one of the ones supported
+ * by this function, which in turn proves that the input datatypes are
+ * what we expect. Otherwise, attaching this selectivity function to some
+ * unexpected operator could cause trouble.
+ */
+ opr_codenum = inet_opr_codenum(operator);
+
get_join_variables(root, args, sjinfo,
&vardata1, &vardata2, &join_is_reversed);
@@ -220,15 +237,18 @@ networkjoinsel(PG_FUNCTION_ARGS)
* Selectivity for left/full join is not exactly the same as inner
* join, but we neglect the difference, as eqjoinsel does.
*/
- selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
+ selec = networkjoinsel_inner(operator, opr_codenum,
+ &vardata1, &vardata2);
break;
case JOIN_SEMI:
case JOIN_ANTI:
/* Here, it's important that we pass the outer var on the left. */
if (!join_is_reversed)
- selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
+ selec = networkjoinsel_semi(operator, opr_codenum,
+ &vardata1, &vardata2);
else
selec = networkjoinsel_semi(get_commutator(operator),
+ -opr_codenum,
&vardata2, &vardata1);
break;
default:
@@ -260,7 +280,7 @@ networkjoinsel(PG_FUNCTION_ARGS)
* Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
*/
static Selectivity
-networkjoinsel_inner(Oid operator,
+networkjoinsel_inner(Oid operator, int opr_codenum,
VariableStatData *vardata1, VariableStatData *vardata2)
{
Form_pg_statistic stats;
@@ -273,7 +293,6 @@ networkjoinsel_inner(Oid operator,
mcv2_exists = false,
hist1_exists = false,
hist2_exists = false;
- int opr_codenum;
int mcv1_length = 0,
mcv2_length = 0;
AttStatsSlot mcv1_slot;
@@ -325,8 +344,6 @@ networkjoinsel_inner(Oid operator,
memset(&hist2_slot, 0, sizeof(hist2_slot));
}
- opr_codenum = inet_opr_codenum(operator);
-
/*
* Calculate selectivity for MCV vs MCV matches.
*/
@@ -387,7 +404,7 @@ networkjoinsel_inner(Oid operator,
* histogram selectivity for semi/anti join cases.
*/
static Selectivity
-networkjoinsel_semi(Oid operator,
+networkjoinsel_semi(Oid operator, int opr_codenum,
VariableStatData *vardata1, VariableStatData *vardata2)
{
Form_pg_statistic stats;
@@ -401,7 +418,6 @@ networkjoinsel_semi(Oid operator,
mcv2_exists = false,
hist1_exists = false,
hist2_exists = false;
- int opr_codenum;
FmgrInfo proc;
int i,
mcv1_length = 0,
@@ -455,7 +471,6 @@ networkjoinsel_semi(Oid operator,
memset(&hist2_slot, 0, sizeof(hist2_slot));
}
- opr_codenum = inet_opr_codenum(operator);
fmgr_info(get_opcode(operator), &proc);
/* Estimate number of input rows represented by RHS histogram. */
@@ -827,6 +842,9 @@ inet_semi_join_sel(Datum lhs_value,
/*
* Assign useful code numbers for the subnet inclusion/overlap operators
*
+ * This will throw an error if the operator is not one of the ones we
+ * support in networksel() and networkjoinsel().
+ *
* Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
* on the exact codes assigned here; but many other places in this file
* know that they can negate a code to obtain the code for the commutator
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 891ae6ba7fe..d25b8ad505d 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -48,8 +48,8 @@
* Uncomment the following to enable compilation of dump_numeric()
* and dump_var() and to get a dump of any result produced by make_result().
* ----------
-#define NUMERIC_DEBUG
*/
+/* #define NUMERIC_DEBUG */
/* ----------
@@ -2378,13 +2378,13 @@ numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
{
default:
result |= ((int64) var->digits[3]);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 3:
result |= ((int64) var->digits[2]) << 14;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
result |= ((int64) var->digits[1]) << 28;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
result |= ((int64) var->digits[0]) << 42;
break;
@@ -8818,22 +8818,22 @@ mul_var_short(const NumericVar *var1, const NumericVar *var2,
term = PRODSUM5(var1digits, 0, var2digits, 4) + carry;
res_digits[5] = (NumericDigit) (term % NBASE);
carry = term / NBASE;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 5:
term = PRODSUM4(var1digits, 0, var2digits, 3) + carry;
res_digits[4] = (NumericDigit) (term % NBASE);
carry = term / NBASE;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 4:
term = PRODSUM3(var1digits, 0, var2digits, 2) + carry;
res_digits[3] = (NumericDigit) (term % NBASE);
carry = term / NBASE;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 3:
term = PRODSUM2(var1digits, 0, var2digits, 1) + carry;
res_digits[2] = (NumericDigit) (term % NBASE);
carry = term / NBASE;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
term = PRODSUM1(var1digits, 0, var2digits, 0) + carry;
res_digits[1] = (NumericDigit) (term % NBASE);
diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c
index 6f4c299dee9..a3419728971 100644
--- a/src/backend/utils/adt/oid.c
+++ b/src/backend/utils/adt/oid.c
@@ -107,6 +107,30 @@ buildoidvector(const Oid *oids, int n)
return result;
}
+/*
+ * validate that an array object meets the restrictions of oidvector
+ *
+ * We need this because there are pathways by which a general oid[] array can
+ * be cast to oidvector, allowing the type's restrictions to be violated.
+ * All code that receives an oidvector as a SQL parameter should check this.
+ */
+void
+check_valid_oidvector(const oidvector *oidArray)
+{
+ /*
+ * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because
+ * otherwise the array's layout will not be what calling code expects. We
+ * needn't be picky about the index lower bound though. Checking elemtype
+ * is just paranoia.
+ */
+ if (oidArray->ndim != 1 ||
+ oidArray->dataoffset != 0 ||
+ oidArray->elemtype != OIDOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("array is not a valid oidvector")));
+}
+
/*
* oidvectorin - converts "num num ..." to internal form
*/
@@ -159,10 +183,14 @@ oidvectorout(PG_FUNCTION_ARGS)
{
oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0);
int num,
- nnums = oidArray->dim1;
+ nnums;
char *rp;
char *result;
+ /* validate input before fetching dim1 */
+ check_valid_oidvector(oidArray);
+ nnums = oidArray->dim1;
+
/* assumes sign, 10 digits, ' ' */
rp = result = (char *) palloc(nnums * 12 + 1);
for (num = 0; num < nnums; num++)
@@ -225,6 +253,7 @@ oidvectorrecv(PG_FUNCTION_ARGS)
Datum
oidvectorsend(PG_FUNCTION_ARGS)
{
+ /* We don't do check_valid_oidvector, since array_send won't care */
return array_send(fcinfo);
}
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index a003f90066c..5b0d098bd07 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -169,8 +169,8 @@ lpad(PG_FUNCTION_ARGS)
char *ptr1,
*ptr2,
*ptr2start,
- *ptr2end,
*ptr_ret;
+ const char *ptr2end;
int m,
s1len,
s2len;
@@ -215,7 +215,7 @@ lpad(PG_FUNCTION_ARGS)
while (m--)
{
- int mlen = pg_mblen(ptr2);
+ int mlen = pg_mblen_range(ptr2, ptr2end);
memcpy(ptr_ret, ptr2, mlen);
ptr_ret += mlen;
@@ -228,7 +228,7 @@ lpad(PG_FUNCTION_ARGS)
while (s1len--)
{
- int mlen = pg_mblen(ptr1);
+ int mlen = pg_mblen_unbounded(ptr1);
memcpy(ptr_ret, ptr1, mlen);
ptr_ret += mlen;
@@ -267,8 +267,8 @@ rpad(PG_FUNCTION_ARGS)
char *ptr1,
*ptr2,
*ptr2start,
- *ptr2end,
*ptr_ret;
+ const char *ptr2end;
int m,
s1len,
s2len;
@@ -308,11 +308,12 @@ rpad(PG_FUNCTION_ARGS)
m = len - s1len;
ptr1 = VARDATA_ANY(string1);
+
ptr_ret = VARDATA(ret);
while (s1len--)
{
- int mlen = pg_mblen(ptr1);
+ int mlen = pg_mblen_unbounded(ptr1);
memcpy(ptr_ret, ptr1, mlen);
ptr_ret += mlen;
@@ -324,7 +325,7 @@ rpad(PG_FUNCTION_ARGS)
while (m--)
{
- int mlen = pg_mblen(ptr2);
+ int mlen = pg_mblen_range(ptr2, ptr2end);
memcpy(ptr_ret, ptr2, mlen);
ptr_ret += mlen;
@@ -409,6 +410,7 @@ dotrim(const char *string, int stringlen,
*/
const char **stringchars;
const char **setchars;
+ const char *setend;
int *stringmblen;
int *setmblen;
int stringnchars;
@@ -416,6 +418,7 @@ dotrim(const char *string, int stringlen,
int resultndx;
int resultnchars;
const char *p;
+ const char *pend;
int len;
int mblen;
const char *str_pos;
@@ -426,10 +429,11 @@ dotrim(const char *string, int stringlen,
stringnchars = 0;
p = string;
len = stringlen;
+ pend = p + len;
while (len > 0)
{
stringchars[stringnchars] = p;
- stringmblen[stringnchars] = mblen = pg_mblen(p);
+ stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend);
stringnchars++;
p += mblen;
len -= mblen;
@@ -440,10 +444,11 @@ dotrim(const char *string, int stringlen,
setnchars = 0;
p = set;
len = setlen;
+ setend = set + setlen;
while (len > 0)
{
setchars[setnchars] = p;
- setmblen[setnchars] = mblen = pg_mblen(p);
+ setmblen[setnchars] = mblen = pg_mblen_range(p, setend);
setnchars++;
p += mblen;
len -= mblen;
@@ -821,6 +826,8 @@ translate(PG_FUNCTION_ARGS)
*to_end;
char *source,
*target;
+ const char *source_end;
+ const char *from_end;
int m,
fromlen,
tolen,
@@ -835,9 +842,11 @@ translate(PG_FUNCTION_ARGS)
if (m <= 0)
PG_RETURN_TEXT_P(string);
source = VARDATA_ANY(string);
+ source_end = source + m;
fromlen = VARSIZE_ANY_EXHDR(from);
from_ptr = VARDATA_ANY(from);
+ from_end = from_ptr + fromlen;
tolen = VARSIZE_ANY_EXHDR(to);
to_ptr = VARDATA_ANY(to);
to_end = to_ptr + tolen;
@@ -861,12 +870,12 @@ translate(PG_FUNCTION_ARGS)
while (m > 0)
{
- source_len = pg_mblen(source);
+ source_len = pg_mblen_range(source, source_end);
from_index = 0;
for (i = 0; i < fromlen; i += len)
{
- len = pg_mblen(&from_ptr[i]);
+ len = pg_mblen_range(&from_ptr[i], from_end);
if (len == source_len &&
memcmp(source, &from_ptr[i], len) == 0)
break;
@@ -882,11 +891,11 @@ translate(PG_FUNCTION_ARGS)
{
if (p >= to_end)
break;
- p += pg_mblen(p);
+ p += pg_mblen_range(p, to_end);
}
if (p < to_end)
{
- len = pg_mblen(p);
+ len = pg_mblen_range(p, to_end);
memcpy(target, p, len);
target += len;
retlen += len;
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 2f96e889595..78f6ea161a0 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -527,11 +527,11 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
result_size = wchar2char(result, workspace, max_size + 1, loc);
- if (result_size + 1 > destsize)
- return result_size;
-
- memcpy(dest, result, result_size);
- dest[result_size] = '\0';
+ if (destsize >= result_size + 1)
+ {
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+ }
pfree(workspace);
pfree(result);
@@ -638,11 +638,11 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
result_size = wchar2char(result, workspace, max_size + 1, loc);
- if (result_size + 1 > destsize)
- return result_size;
-
- memcpy(dest, result, result_size);
- dest[result_size] = '\0';
+ if (destsize >= result_size + 1)
+ {
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+ }
pfree(workspace);
pfree(result);
@@ -725,11 +725,11 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
result_size = wchar2char(result, workspace, max_size + 1, loc);
- if (result_size + 1 > destsize)
- return result_size;
-
- memcpy(dest, result, result_size);
- dest[result_size] = '\0';
+ if (destsize >= result_size + 1)
+ {
+ memcpy(dest, result, result_size);
+ dest[result_size] = '\0';
+ }
pfree(workspace);
pfree(result);
diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c
index 697143aec44..b505a6b4fee 100644
--- a/src/backend/utils/adt/pg_upgrade_support.c
+++ b/src/backend/utils/adt/pg_upgrade_support.c
@@ -282,11 +282,12 @@ binary_upgrade_set_missing_value(PG_FUNCTION_ARGS)
* upgraded without data loss.
*/
Datum
-binary_upgrade_logical_slot_has_caught_up(PG_FUNCTION_ARGS)
+binary_upgrade_check_logical_slot_pending_wal(PG_FUNCTION_ARGS)
{
Name slot_name;
XLogRecPtr end_of_wal;
- bool found_pending_wal;
+ XLogRecPtr scan_cutoff_lsn;
+ XLogRecPtr last_pending_wal;
CHECK_IS_BINARY_UPGRADE;
@@ -297,6 +298,7 @@ binary_upgrade_logical_slot_has_caught_up(PG_FUNCTION_ARGS)
Assert(has_rolreplication(GetUserId()));
slot_name = PG_GETARG_NAME(0);
+ scan_cutoff_lsn = PG_GETARG_LSN(1);
/* Acquire the given slot */
ReplicationSlotAcquire(NameStr(*slot_name), true, true);
@@ -307,12 +309,16 @@ binary_upgrade_logical_slot_has_caught_up(PG_FUNCTION_ARGS)
Assert(MyReplicationSlot->data.invalidated == RS_INVAL_NONE);
end_of_wal = GetFlushRecPtr(NULL);
- found_pending_wal = LogicalReplicationSlotHasPendingWal(end_of_wal);
+ last_pending_wal = LogicalReplicationSlotCheckPendingWal(end_of_wal,
+ scan_cutoff_lsn);
/* Clean up */
ReplicationSlotRelease();
- PG_RETURN_BOOL(!found_pending_wal);
+ if (XLogRecPtrIsValid(last_pending_wal))
+ PG_RETURN_LSN(last_pending_wal);
+ else
+ PG_RETURN_NULL();
}
/*
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 73ca0bb0b7f..b1df96e7b0b 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -824,8 +824,14 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS)
wait_event_type = "";
else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
wait_event_type = "";
- else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL)
- wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
+ else
+ {
+ proc = BackendPidGetProc(beentry->st_procpid);
+ if (!proc)
+ proc = AuxiliaryPidGetProc(beentry->st_procpid);
+ if (proc)
+ wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
+ }
if (!wait_event_type)
PG_RETURN_NULL();
@@ -845,8 +851,14 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS)
wait_event = "";
else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
wait_event = "";
- else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL)
- wait_event = pgstat_get_wait_event(proc->wait_event_info);
+ else
+ {
+ proc = BackendPidGetProc(beentry->st_procpid);
+ if (!proc)
+ proc = AuxiliaryPidGetProc(beentry->st_procpid);
+ if (proc)
+ wait_event = pgstat_get_wait_event(proc->wait_event_info);
+ }
if (!wait_event)
PG_RETURN_NULL();
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
index 38d12dedbc5..278d4e6941a 100644
--- a/src/backend/utils/adt/rangetypes_typanalyze.c
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -398,7 +398,7 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
stats->statypid[slot_idx] = FLOAT8OID;
stats->statyplen[slot_idx] = sizeof(float8);
stats->statypbyval[slot_idx] = true;
- stats->statypalign[slot_idx] = 'd';
+ stats->statypalign[slot_idx] = TYPALIGN_DOUBLE;
/* Store the fraction of empty ranges */
emptyfrac = palloc_object(float4);
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 94cd15bbab1..311b9877bbb 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -443,7 +443,7 @@ parse_re_flags(pg_re_flags *flags, text *opts)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%.*s\"",
- pg_mblen(opt_p + i), opt_p + i)));
+ pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i)));
break;
}
}
@@ -673,12 +673,13 @@ textregexreplace(PG_FUNCTION_ARGS)
if (VARSIZE_ANY_EXHDR(opt) > 0)
{
char *opt_p = VARDATA_ANY(opt);
+ const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt);
if (*opt_p >= '0' && *opt_p <= '9')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%.*s\"",
- pg_mblen(opt_p), opt_p),
+ pg_mblen_range(opt_p, end_p), opt_p),
errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
}
@@ -772,6 +773,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
*r;
int plen,
elen;
+ const char *pend;
bool afterescape = false;
int nquotes = 0;
int bracket_depth = 0; /* square bracket nesting level */
@@ -779,6 +781,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
+ pend = p + plen;
if (esc_text == NULL)
{
/* No ESCAPE clause provided; default to backslash as escape */
@@ -878,7 +881,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
if (elen > 1)
{
- int mblen = pg_mblen(p);
+ int mblen = pg_mblen_range(p, pend);
if (mblen > 1)
{
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index bbadecef5f9..d22b8ef7f3c 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -213,7 +213,8 @@ static bool ri_CompareWithCast(Oid eq_opr, Oid typeid, Oid collid,
Datum lhs, Datum rhs);
static void ri_InitHashTables(void);
-static void InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue);
+static void InvalidateConstraintCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static SPIPlanPtr ri_FetchPreparedPlan(RI_QueryKey *key);
static void ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan);
static RI_CompareHashEntry *ri_HashCompareOp(Oid eq_opr, Oid typeid);
@@ -2397,7 +2398,8 @@ get_ri_constraint_root(Oid constrOid)
* data from changing under it --- but we may get cache flushes anyway.)
*/
static void
-InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateConstraintCacheCallBack(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
dlist_mutable_iter iter;
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index db67e86e760..e4eb7111ee7 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -1515,8 +1515,8 @@ record_image_cmp(FunctionCallInfo fcinfo)
{
Size len1,
len2;
- struct varlena *arg1val;
- struct varlena *arg2val;
+ varlena *arg1val;
+ varlena *arg2val;
len1 = toast_raw_datum_size(values1[i1]);
len2 = toast_raw_datum_size(values2[i2]);
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index b5a7ad9066e..f16f1535785 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -426,6 +426,7 @@ static void get_update_query_targetlist_def(Query *query, List *targetList,
static void get_delete_query_def(Query *query, deparse_context *context);
static void get_merge_query_def(Query *query, deparse_context *context);
static void get_utility_query_def(Query *query, deparse_context *context);
+static char *get_lock_clause_strength(LockClauseStrength strength);
static void get_basic_select_query(Query *query, deparse_context *context);
static void get_target_list(List *targetList, deparse_context *context);
static void get_returning_clause(Query *query, deparse_context *context);
@@ -5186,10 +5187,10 @@ set_deparse_plan(deparse_namespace *dpns, Plan *plan)
* source, and all INNER_VAR Vars in other parts of the query refer to its
* targetlist.
*
- * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the
- * excluded expression's tlist. (Similar to the SubqueryScan we don't want
- * to reuse OUTER, it's used for RETURNING in some modify table cases,
- * although not INSERT .. CONFLICT).
+ * For ON CONFLICT DO SELECT/UPDATE we just need the inner tlist to point
+ * to the excluded expression's tlist. (Similar to the SubqueryScan we
+ * don't want to reuse OUTER, it's used for RETURNING in some modify table
+ * cases, although not INSERT .. CONFLICT).
*/
if (IsA(plan, SubqueryScan))
dpns->inner_plan = ((SubqueryScan *) plan)->subplan;
@@ -5997,30 +5998,9 @@ get_select_query_def(Query *query, deparse_context *context)
if (rc->pushedDown)
continue;
- switch (rc->strength)
- {
- case LCS_NONE:
- /* we intentionally throw an error for LCS_NONE */
- elog(ERROR, "unrecognized LockClauseStrength %d",
- (int) rc->strength);
- break;
- case LCS_FORKEYSHARE:
- appendContextKeyword(context, " FOR KEY SHARE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
- break;
- case LCS_FORSHARE:
- appendContextKeyword(context, " FOR SHARE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
- break;
- case LCS_FORNOKEYUPDATE:
- appendContextKeyword(context, " FOR NO KEY UPDATE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
- break;
- case LCS_FORUPDATE:
- appendContextKeyword(context, " FOR UPDATE",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
- break;
- }
+ appendContextKeyword(context,
+ get_lock_clause_strength(rc->strength),
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
appendStringInfo(buf, " OF %s",
quote_identifier(get_rtable_name(rc->rti,
@@ -6033,6 +6013,28 @@ get_select_query_def(Query *query, deparse_context *context)
}
}
+static char *
+get_lock_clause_strength(LockClauseStrength strength)
+{
+ switch (strength)
+ {
+ case LCS_NONE:
+ /* we intentionally throw an error for LCS_NONE */
+ elog(ERROR, "unrecognized LockClauseStrength %d",
+ (int) strength);
+ break;
+ case LCS_FORKEYSHARE:
+ return " FOR KEY SHARE";
+ case LCS_FORSHARE:
+ return " FOR SHARE";
+ case LCS_FORNOKEYUPDATE:
+ return " FOR NO KEY UPDATE";
+ case LCS_FORUPDATE:
+ return " FOR UPDATE";
+ }
+ return NULL; /* keep compiler quiet */
+}
+
/*
* Detect whether query looks like SELECT ... FROM VALUES(),
* with no need to rename the output columns of the VALUES RTE.
@@ -7125,7 +7127,7 @@ get_insert_query_def(Query *query, deparse_context *context)
{
appendStringInfoString(buf, " DO NOTHING");
}
- else
+ else if (confl->action == ONCONFLICT_UPDATE)
{
appendStringInfoString(buf, " DO UPDATE SET ");
/* Deparse targetlist */
@@ -7140,6 +7142,23 @@ get_insert_query_def(Query *query, deparse_context *context)
get_rule_expr(confl->onConflictWhere, context, false);
}
}
+ else
+ {
+ Assert(confl->action == ONCONFLICT_SELECT);
+ appendStringInfoString(buf, " DO SELECT");
+
+ /* Add FOR [KEY] UPDATE/SHARE clause if present */
+ if (confl->lockStrength != LCS_NONE)
+ appendStringInfoString(buf, get_lock_clause_strength(confl->lockStrength));
+
+ /* Add a WHERE clause if given */
+ if (confl->onConflictWhere != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(confl->onConflictWhere, context, false);
+ }
+ }
}
/* Add RETURNING if present */
@@ -8982,7 +9001,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
}
/* else do the same stuff as for T_SubLink et al. */
}
- /* FALLTHROUGH */
+ pg_fallthrough;
case T_SubLink:
case T_NullTest:
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 8deb2369471..e2603183f1c 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -4744,14 +4744,14 @@ timestamp_trunc(PG_FUNCTION_ARGS)
tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999;
else
tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_CENTURY:
/* see comments in timestamptz_trunc */
if (tm->tm_year > 0)
tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99;
else
tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DECADE:
/* see comments in timestamptz_trunc */
if (val != DTK_MILLENNIUM && val != DTK_CENTURY)
@@ -4761,25 +4761,25 @@ timestamp_trunc(PG_FUNCTION_ARGS)
else
tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10;
}
- /* FALL THRU */
+ pg_fallthrough;
case DTK_YEAR:
tm->tm_mon = 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_QUARTER:
tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MONTH:
tm->tm_mday = 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DAY:
tm->tm_hour = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_HOUR:
tm->tm_min = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MINUTE:
tm->tm_sec = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_SECOND:
fsec = 0;
break;
@@ -4990,14 +4990,14 @@ timestamptz_trunc_internal(text *units, TimestampTz timestamp, pg_tz *tzp)
tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999;
else
tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_CENTURY:
/* truncating to the century? as above: -100, 1, 101... */
if (tm->tm_year > 0)
tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99;
else
tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DECADE:
/*
@@ -5011,26 +5011,26 @@ timestamptz_trunc_internal(text *units, TimestampTz timestamp, pg_tz *tzp)
else
tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10;
}
- /* FALL THRU */
+ pg_fallthrough;
case DTK_YEAR:
tm->tm_mon = 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_QUARTER:
tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MONTH:
tm->tm_mday = 1;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DAY:
tm->tm_hour = 0;
redotz = true; /* for all cases >= DAY */
- /* FALL THRU */
+ pg_fallthrough;
case DTK_HOUR:
tm->tm_min = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MINUTE:
tm->tm_sec = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_SECOND:
fsec = 0;
break;
@@ -5171,33 +5171,33 @@ interval_trunc(PG_FUNCTION_ARGS)
case DTK_MILLENNIUM:
/* caution: C division may have negative remainder */
tm->tm_year = (tm->tm_year / 1000) * 1000;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_CENTURY:
/* caution: C division may have negative remainder */
tm->tm_year = (tm->tm_year / 100) * 100;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DECADE:
/* caution: C division may have negative remainder */
tm->tm_year = (tm->tm_year / 10) * 10;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_YEAR:
tm->tm_mon = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_QUARTER:
tm->tm_mon = 3 * (tm->tm_mon / 3);
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MONTH:
tm->tm_mday = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_DAY:
tm->tm_hour = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_HOUR:
tm->tm_min = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_MINUTE:
tm->tm_sec = 0;
- /* FALL THRU */
+ pg_fallthrough;
case DTK_SECOND:
tm->tm_usec = 0;
break;
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index e3bf1fbbfd7..7e54f36c2a7 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
return buf;
buf++;
- while (*buf && pg_mblen(buf) == 1)
+ while (*buf && pg_mblen_cstr(buf) == 1)
{
switch (*buf)
{
@@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate)
return false;
/* it shouldn't be a part of any word */
- if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
+ if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr))
return false;
for (;;)
{
- ptr += pg_mblen(ptr);
+ ptr += pg_mblen_cstr(ptr);
if (*ptr == '\0') /* got end of string without operand */
return false;
@@ -390,7 +390,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator,
break;
}
- state->buf += pg_mblen(state->buf);
+ state->buf += pg_mblen_cstr(state->buf);
}
}
@@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
break;
}
- state->buf += pg_mblen(state->buf);
+ state->buf += pg_mblen_cstr(state->buf);
}
}
@@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
*(in->cur) = '\\';
in->cur++;
}
- COPYCHAR(in->cur, op);
- clen = pg_mblen(op);
+ clen = ts_copychar_cstr(in->cur, op);
op += clen;
in->cur += clen;
}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 38342298a5d..024f5160cd4 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -319,9 +319,9 @@ tsvectorout(PG_FUNCTION_ARGS)
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
- char *curbegin,
- *curin,
+ char *curin,
*curout;
+ const char *curend;
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
@@ -334,13 +334,14 @@ tsvectorout(PG_FUNCTION_ARGS)
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
- curbegin = curin = STRPTR(out) + ptr->pos;
+ curin = STRPTR(out) + ptr->pos;
+ curend = curin + ptr->len;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
- while (curin - curbegin < ptr->len)
+ while (curin < curend)
{
- int len = pg_mblen(curin);
+ int len = pg_mblen_range(curin, curend);
if (t_iseq(curin, '\''))
*curout++ = '\'';
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 94e0fed8309..71c7c7d3b3c 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -2604,11 +2604,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
if (ws)
{
char *buf;
+ const char *end;
buf = VARDATA_ANY(ws);
- while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
+ end = buf + VARSIZE_ANY_EXHDR(ws);
+ while (buf < end)
{
- if (pg_mblen(buf) == 1)
+ int len = pg_mblen_range(buf, end);
+
+ if (len == 1)
{
switch (*buf)
{
@@ -2632,7 +2636,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
stat->weight |= 0;
}
}
- buf += pg_mblen(buf);
+ buf += len;
}
}
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index b3c04f6344f..efeaeb55334 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -208,8 +208,7 @@ gettoken_tsvector(TSVectorParseState state,
PRSSYNTAXERROR;
else if (!isspace((unsigned char) *state->prsbuf))
{
- COPYCHAR(curpos, state->prsbuf);
- curpos += pg_mblen(state->prsbuf);
+ curpos += ts_copychar_cstr(curpos, state->prsbuf);
statecode = WAITENDWORD;
}
}
@@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
- COPYCHAR(curpos, state->prsbuf);
- curpos += pg_mblen(state->prsbuf);
+ curpos += ts_copychar_cstr(curpos, state->prsbuf);
Assert(oldstate != 0);
statecode = oldstate;
}
@@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
- COPYCHAR(curpos, state->prsbuf);
- curpos += pg_mblen(state->prsbuf);
+ curpos += ts_copychar_cstr(curpos, state->prsbuf);
}
}
else if (statecode == WAITENDCMPLX)
@@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
else
{
RESIZEPRSBUF;
- COPYCHAR(curpos, state->prsbuf);
- curpos += pg_mblen(state->prsbuf);
+ curpos += ts_copychar_cstr(curpos, state->prsbuf);
}
}
else if (statecode == WAITCHARCMPLX)
@@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state,
if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
- COPYCHAR(curpos, state->prsbuf);
- curpos += pg_mblen(state->prsbuf);
+ curpos += ts_copychar_cstr(curpos, state->prsbuf);
statecode = WAITENDCMPLX;
}
else
@@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state,
PRSSYNTAXERROR;
if (state->oprisdelim)
{
- /* state->prsbuf+=pg_mblen(state->prsbuf); */
+ /* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
RETURN_TOKEN;
}
else
@@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state,
statecode);
/* get next char */
- state->prsbuf += pg_mblen(state->prsbuf);
+ state->prsbuf += pg_mblen_cstr(state->prsbuf);
}
}
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index 50ffee679b9..65ad1bfe18f 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -232,7 +232,7 @@ bit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid binary digit",
- pg_mblen(sp), sp)));
+ pg_mblen_cstr(sp), sp)));
x >>= 1;
if (x == 0)
@@ -257,7 +257,7 @@ bit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid hexadecimal digit",
- pg_mblen(sp), sp)));
+ pg_mblen_cstr(sp), sp)));
if (bc)
{
@@ -533,7 +533,7 @@ varbit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid binary digit",
- pg_mblen(sp), sp)));
+ pg_mblen_cstr(sp), sp)));
x >>= 1;
if (x == 0)
@@ -558,7 +558,7 @@ varbit_in(PG_FUNCTION_ARGS)
ereturn(escontext, (Datum) 0,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%.*s\" is not a valid hexadecimal digit",
- pg_mblen(sp), sp)));
+ pg_mblen_cstr(sp), sp)));
if (bc)
{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 6c1ebb0866d..7caf700fd61 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -42,7 +42,7 @@
#include "utils/sortsupport.h"
#include "utils/varlena.h"
-typedef struct varlena VarString;
+typedef varlena VarString;
/*
* State for text_position_* functions.
@@ -133,6 +133,7 @@ static text *text_substring(Datum str,
int32 start,
int32 length,
bool length_not_specified);
+static int pg_mbcharcliplen_chars(const char *mbstr, int len, int limit);
static text *text_overlay(text *t1, text *t2, int sp, int sl);
static int text_position(text *t1, text *t2, Oid collid);
static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
@@ -494,8 +495,11 @@ text_catenate(text *t1, text *t2)
* charlen_to_bytelen()
* Compute the number of bytes occupied by n characters starting at *p
*
- * It is caller's responsibility that there actually are n characters;
- * the string need not be null-terminated.
+ * The caller shall ensure there are n complete characters. Callers achieve
+ * this by deriving "n" from regmatch_t findings from searching a wchar array.
+ * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
+ * matches will end no later than the last complete character. (The string
+ * need not be null-terminated.)
*/
static int
charlen_to_bytelen(const char *p, int n)
@@ -510,7 +514,7 @@ charlen_to_bytelen(const char *p, int n)
const char *s;
for (s = p; n > 0; n--)
- s += pg_mblen(s);
+ s += pg_mblen_unbounded(s); /* caller verified encoding */
return s - p;
}
@@ -583,7 +587,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
int32 S = start; /* start position */
int32 S1; /* adjusted start position */
int32 L1; /* adjusted substring length */
- int32 E; /* end position */
+ int32 E; /* end position, exclusive */
/*
* SQL99 says S can be zero or negative (which we don't document), but we
@@ -644,6 +648,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
int32 slice_start;
int32 slice_size;
int32 slice_strlen;
+ int32 slice_len;
text *slice;
int32 E1;
int32 i;
@@ -660,14 +665,14 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
if (length_not_specified) /* special case - get length to end of
* string */
- slice_size = L1 = -1;
+ E = slice_size = L1 = -1;
else if (length < 0)
{
/* SQL99 says to throw an error for E < S, i.e., negative length */
ereport(ERROR,
(errcode(ERRCODE_SUBSTRING_ERROR),
errmsg("negative substring length not allowed")));
- slice_size = L1 = -1; /* silence stupider compilers */
+ E = slice_size = L1 = -1; /* silence stupider compilers */
}
else if (pg_add_s32_overflow(S, length, &E))
{
@@ -680,11 +685,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
else
{
/*
- * A zero or negative value for the end position can happen if the
- * start was negative or one. SQL99 says to return a zero-length
- * string.
+ * Ending at position 1, exclusive, obviously yields an empty
+ * string. A zero or negative value can happen if the start was
+ * negative or one. SQL99 says to return a zero-length string.
*/
- if (E < 1)
+ if (E <= 1)
return cstring_to_text("");
/*
@@ -694,11 +699,11 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
L1 = E - S1;
/*
- * Total slice size in bytes can't be any longer than the start
- * position plus substring length times the encoding max length.
- * If that overflows, we can just use -1.
+ * Total slice size in bytes can't be any longer than the
+ * inclusive end position times the encoding max length. If that
+ * overflows, we can just use -1.
*/
- if (pg_mul_s32_overflow(E, eml, &slice_size))
+ if (pg_mul_s32_overflow(E - 1, eml, &slice_size))
slice_size = -1;
}
@@ -713,16 +718,25 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
slice = (text *) DatumGetPointer(str);
/* see if we got back an empty string */
- if (VARSIZE_ANY_EXHDR(slice) == 0)
+ slice_len = VARSIZE_ANY_EXHDR(slice);
+ if (slice_len == 0)
{
if (slice != (text *) DatumGetPointer(str))
pfree(slice);
return cstring_to_text("");
}
- /* Now we can get the actual length of the slice in MB characters */
- slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
- VARSIZE_ANY_EXHDR(slice));
+ /*
+ * Now we can get the actual length of the slice in MB characters,
+ * stopping at the end of the substring. Continuing beyond the
+ * substring end could find an incomplete character attributable
+ * solely to DatumGetTextPSlice() chopping in the middle of a
+ * character, and it would be superfluous work at best.
+ */
+ slice_strlen =
+ (slice_size == -1 ?
+ pg_mbstrlen_with_len(VARDATA_ANY(slice), slice_len) :
+ pg_mbcharcliplen_chars(VARDATA_ANY(slice), slice_len, E - 1));
/*
* Check that the start position wasn't > slice_strlen. If so, SQL99
@@ -749,7 +763,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
*/
p = VARDATA_ANY(slice);
for (i = 0; i < S1 - 1; i++)
- p += pg_mblen(p);
+ p += pg_mblen_unbounded(p);
/* hang onto a pointer to our start position */
s = p;
@@ -759,7 +773,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
* length.
*/
for (i = S1; i < E1; i++)
- p += pg_mblen(p);
+ p += pg_mblen_unbounded(p);
ret = (text *) palloc(VARHDRSZ + (p - s));
SET_VARSIZE(ret, VARHDRSZ + (p - s));
@@ -777,6 +791,35 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
return NULL;
}
+/*
+ * pg_mbcharcliplen_chars -
+ * Mirror pg_mbcharcliplen(), except return value unit is chars, not bytes.
+ *
+ * This mirrors all the dubious historical behavior, so it's static to
+ * discourage proliferation. The assertions are specific to the one caller.
+ */
+static int
+pg_mbcharcliplen_chars(const char *mbstr, int len, int limit)
+{
+ int nch = 0;
+ int l;
+
+ Assert(len > 0);
+ Assert(limit > 0);
+ Assert(pg_database_encoding_max_length() > 1);
+
+ while (len > 0 && *mbstr)
+ {
+ l = pg_mblen_with_len(mbstr, len);
+ nch++;
+ if (nch == limit)
+ break;
+ len -= l;
+ mbstr += l;
+ }
+ return nch;
+}
+
/*
* textoverlay
* Replace specified substring of first string with second
@@ -1064,6 +1107,8 @@ text_position_next(TextPositionState *state)
*/
if (state->is_multibyte_char_in_char && state->locale->deterministic)
{
+ const char *haystack_end = state->str1 + state->len1;
+
/* Walk one character at a time, until we reach the match. */
/* the search should never move backwards. */
@@ -1072,7 +1117,7 @@ text_position_next(TextPositionState *state)
while (state->refpoint < matchptr)
{
/* step to next character. */
- state->refpoint += pg_mblen(state->refpoint);
+ state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
state->refpos++;
/*
@@ -1160,7 +1205,7 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
test_end = hptr;
do
{
- test_end += pg_mblen(test_end);
+ test_end += pg_mblen_range(test_end, haystack_end);
if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
{
state->last_match_len_tmp = (test_end - hptr);
@@ -1173,7 +1218,7 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
if (result_hptr)
break;
- hptr += pg_mblen(hptr);
+ hptr += pg_mblen_range(hptr, haystack_end);
}
return (char *) result_hptr;
@@ -3767,6 +3812,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
}
else
{
+ const char *end_ptr;
+
/*
* When fldsep is NULL, each character in the input string becomes a
* separate element in the result set. The separator is effectively
@@ -3775,10 +3822,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
start_ptr = VARDATA_ANY(inputstring);
+ end_ptr = start_ptr + inputstring_len;
while (inputstring_len > 0)
{
- int chunk_len = pg_mblen(start_ptr);
+ int chunk_len = pg_mblen_range(start_ptr, end_ptr);
CHECK_FOR_INTERRUPTS();
@@ -3898,6 +3946,7 @@ array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
int typlen;
bool typbyval;
char typalign;
+ uint8 typalignby;
StringInfoData buf;
bool printed = false;
char *p;
@@ -3947,6 +3996,7 @@ array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
typlen = my_extra->typlen;
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
+ typalignby = typalign_to_alignby(typalign);
p = ARR_DATA_PTR(v);
bitmap = ARR_NULLBITMAP(v);
@@ -3983,7 +4033,7 @@ array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
printed = true;
p = att_addlength_pointer(p, typlen, p);
- p = (char *) att_align_nominal(p, typalign);
+ p = (char *) att_nominal_alignby(p, typalignby);
}
/* advance bitmap pointer if any */
@@ -4167,7 +4217,7 @@ pg_column_compression(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
/* get the compression method id stored in the compressed varlena */
- cmid = toast_get_compression_id((struct varlena *)
+ cmid = toast_get_compression_id((varlena *)
DatumGetPointer(PG_GETARG_DATUM(0)));
if (cmid == TOAST_INVALID_COMPRESSION_ID)
PG_RETURN_NULL();
@@ -4196,8 +4246,8 @@ Datum
pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
{
int typlen;
- struct varlena *attr;
- struct varatt_external toast_pointer;
+ varlena *attr;
+ varatt_external toast_pointer;
/* On first call, get the input type's typlen, and save at *fn_extra */
if (fcinfo->flinfo->fn_extra == NULL)
@@ -4219,7 +4269,7 @@ pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
if (typlen != -1)
PG_RETURN_NULL();
- attr = (struct varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
+ attr = (varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
if (!VARATT_IS_EXTERNAL_ONDISK(attr))
PG_RETURN_NULL();
@@ -4682,7 +4732,7 @@ text_reverse(PG_FUNCTION_ARGS)
{
int sz;
- sz = pg_mblen(p);
+ sz = pg_mblen_range(p, endp);
dst -= sz;
memcpy(dst, p, sz);
p += sz;
@@ -4843,7 +4893,7 @@ text_format(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%.*s\"",
- pg_mblen(cp), cp),
+ pg_mblen_range(cp, end_ptr), cp),
errhint("For a single \"%%\" use \"%%%%\".")));
/* If indirect width was specified, get its value */
@@ -4964,7 +5014,7 @@ text_format(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%.*s\"",
- pg_mblen(cp), cp),
+ pg_mblen_range(cp, end_ptr), cp),
errhint("For a single \"%%\" use \"%%%%\".")));
break;
}
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index f69dc68286c..ac675d50212 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -2186,7 +2186,7 @@ xml_errorHandler(void *data, PgXmlErrorPtr error)
if (error->code == XML_ERR_NOT_WELL_BALANCED &&
xmlerrcxt->err_occurred)
return;
- /* fall through */
+ pg_fallthrough;
case XML_FROM_NONE:
case XML_FROM_MEMORY:
@@ -2376,8 +2376,7 @@ sqlchar_to_unicode(const char *s)
char *utf8string;
pg_wchar ret[2]; /* need space for trailing zero */
- /* note we're not assuming s is null-terminated */
- utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
+ utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
pg_encoding_mblen(PG_UTF8, utf8string));
@@ -2430,7 +2429,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
initStringInfo(&buf);
- for (p = ident; *p; p += pg_mblen(p))
+ for (p = ident; *p; p += pg_mblen_cstr(p))
{
if (*p == ':' && (p == ident || fully_escaped))
appendStringInfoString(&buf, "_x003A_");
@@ -2455,7 +2454,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
: !is_valid_xml_namechar(u))
appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
else
- appendBinaryStringInfo(&buf, p, pg_mblen(p));
+ appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
}
}
@@ -2478,7 +2477,7 @@ map_xml_name_to_sql_identifier(const char *name)
initStringInfo(&buf);
- for (p = name; *p; p += pg_mblen(p))
+ for (p = name; *p; p += pg_mblen_cstr(p))
{
if (*p == '_' && *(p + 1) == 'x'
&& isxdigit((unsigned char) *(p + 2))
@@ -2496,7 +2495,7 @@ map_xml_name_to_sql_identifier(const char *name)
p += 6;
}
else
- appendBinaryStringInfo(&buf, p, pg_mblen(p));
+ appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
}
return buf.data;
diff --git a/src/backend/utils/cache/attoptcache.c b/src/backend/utils/cache/attoptcache.c
index 72edc8f665b..9244a23013e 100644
--- a/src/backend/utils/cache/attoptcache.c
+++ b/src/backend/utils/cache/attoptcache.c
@@ -50,7 +50,8 @@ typedef struct
* for that attribute.
*/
static void
-InvalidateAttoptCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateAttoptCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
AttoptCacheEntry *attopt;
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 681aa923403..519089322f4 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -357,15 +357,15 @@ CatalogCacheComputeHashValue(CatCache *cache, int nkeys,
case 4:
oneHash = (cc_hashfunc[3]) (v4);
hashValue ^= pg_rotate_left32(oneHash, 24);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 3:
oneHash = (cc_hashfunc[2]) (v3);
hashValue ^= pg_rotate_left32(oneHash, 16);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
oneHash = (cc_hashfunc[1]) (v2);
hashValue ^= pg_rotate_left32(oneHash, 8);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
oneHash = (cc_hashfunc[0]) (v1);
hashValue ^= oneHash;
@@ -403,21 +403,21 @@ CatalogCacheComputeTupleHashValue(CatCache *cache, int nkeys, HeapTuple tuple)
cc_tupdesc,
&isNull);
Assert(!isNull);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 3:
v3 = fastgetattr(tuple,
cc_keyno[2],
cc_tupdesc,
&isNull);
Assert(!isNull);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
v2 = fastgetattr(tuple,
cc_keyno[1],
cc_tupdesc,
&isNull);
Assert(!isNull);
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
v1 = fastgetattr(tuple,
cc_keyno[0],
diff --git a/src/backend/utils/cache/evtcache.c b/src/backend/utils/cache/evtcache.c
index 2b4453e54a7..3fe89c9c98f 100644
--- a/src/backend/utils/cache/evtcache.c
+++ b/src/backend/utils/cache/evtcache.c
@@ -49,7 +49,8 @@ static EventTriggerCacheStateType EventTriggerCacheState = ETCS_NEEDS_REBUILD;
static void BuildEventTriggerCache(void);
static void InvalidateEventCacheCallback(Datum arg,
- int cacheid, uint32 hashvalue);
+ SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static Bitmapset *DecodeTextArrayToBitmapset(Datum array);
/*
@@ -254,7 +255,8 @@ DecodeTextArrayToBitmapset(Datum array)
* memory leaks.
*/
static void
-InvalidateEventCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateEventCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
/*
* If the cache isn't valid, then there might be a rebuild in progress, so
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index bf465a295e3..d59216b28f1 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -1813,7 +1813,7 @@ CacheInvalidateRelmap(Oid databaseId)
* flush all cached state anyway.
*/
void
-CacheRegisterSyscacheCallback(int cacheid,
+CacheRegisterSyscacheCallback(SysCacheIdentifier cacheid,
SyscacheCallbackFunction func,
Datum arg)
{
@@ -1895,7 +1895,7 @@ CacheRegisterRelSyncCallback(RelSyncCallbackFunction func,
* this module from knowing which catcache IDs correspond to which catalogs.
*/
void
-CallSyscacheCallbacks(int cacheid, uint32 hashvalue)
+CallSyscacheCallbacks(SysCacheIdentifier cacheid, uint32 hashvalue)
{
int i;
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index b924a2d900b..1913b009d40 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -2492,6 +2492,7 @@ get_type_io_data(Oid typid,
{
Oid typinput;
Oid typoutput;
+ Oid typcollation;
boot_get_type_io_data(typid,
typlen,
@@ -2500,7 +2501,8 @@ get_type_io_data(Oid typid,
typdelim,
typioparam,
&typinput,
- &typoutput);
+ &typoutput,
+ &typcollation);
switch (which_func)
{
case IOFunc_input:
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index 37d5d73b7fb..812e2265734 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -106,8 +106,10 @@ static void ScanQueryForLocks(Query *parsetree, bool acquire);
static bool ScanQueryWalker(Node *node, bool *acquire);
static TupleDesc PlanCacheComputeResultDesc(List *stmt_list);
static void PlanCacheRelCallback(Datum arg, Oid relid);
-static void PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue);
-static void PlanCacheSysCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void PlanCacheObjectCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
+static void PlanCacheSysCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
/* ResourceOwner callbacks to track plancache references */
static void ResOwnerReleaseCachedPlan(Datum res);
@@ -2201,7 +2203,7 @@ PlanCacheRelCallback(Datum arg, Oid relid)
* or all plans mentioning any member of this cache if hashvalue == 0.
*/
static void
-PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue)
+PlanCacheObjectCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
dlist_iter iter;
@@ -2310,7 +2312,7 @@ PlanCacheObjectCallback(Datum arg, int cacheid, uint32 hashvalue)
* Just invalidate everything...
*/
static void
-PlanCacheSysCallback(Datum arg, int cacheid, uint32 hashvalue)
+PlanCacheSysCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
ResetPlanCache();
}
diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c
index 8f1a5e69595..362169b7d97 100644
--- a/src/backend/utils/cache/spccache.c
+++ b/src/backend/utils/cache/spccache.c
@@ -52,7 +52,8 @@ typedef struct
* tablespaces, nor do we expect them to be frequently modified.
*/
static void
-InvalidateTableSpaceCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateTableSpaceCacheCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue)
{
HASH_SEQ_STATUS status;
TableSpaceCacheEntry *spc;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index ae3d18e0e74..007a9a15d71 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -109,7 +109,7 @@ static int oid_compare(const void *a, const void *b);
void
InitCatalogCache(void)
{
- int cacheId;
+ SysCacheIdentifier cacheId;
Assert(!CacheInitialized);
@@ -179,7 +179,7 @@ InitCatalogCache(void)
void
InitCatalogCachePhase2(void)
{
- int cacheId;
+ SysCacheIdentifier cacheId;
Assert(CacheInitialized);
@@ -205,7 +205,7 @@ InitCatalogCachePhase2(void)
* CAUTION: The tuple that is returned must NOT be freed by the caller!
*/
HeapTuple
-SearchSysCache(int cacheId,
+SearchSysCache(SysCacheIdentifier cacheId,
Datum key1,
Datum key2,
Datum key3,
@@ -217,7 +217,7 @@ SearchSysCache(int cacheId,
}
HeapTuple
-SearchSysCache1(int cacheId,
+SearchSysCache1(SysCacheIdentifier cacheId,
Datum key1)
{
Assert(cacheId >= 0 && cacheId < SysCacheSize && SysCache[cacheId]);
@@ -227,7 +227,7 @@ SearchSysCache1(int cacheId,
}
HeapTuple
-SearchSysCache2(int cacheId,
+SearchSysCache2(SysCacheIdentifier cacheId,
Datum key1, Datum key2)
{
Assert(cacheId >= 0 && cacheId < SysCacheSize && SysCache[cacheId]);
@@ -237,7 +237,7 @@ SearchSysCache2(int cacheId,
}
HeapTuple
-SearchSysCache3(int cacheId,
+SearchSysCache3(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3)
{
Assert(cacheId >= 0 && cacheId < SysCacheSize && SysCache[cacheId]);
@@ -247,7 +247,7 @@ SearchSysCache3(int cacheId,
}
HeapTuple
-SearchSysCache4(int cacheId,
+SearchSysCache4(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4)
{
Assert(cacheId >= 0 && cacheId < SysCacheSize && SysCache[cacheId]);
@@ -279,7 +279,7 @@ ReleaseSysCache(HeapTuple tuple)
* doesn't prevent the "tuple concurrently updated" error.
*/
HeapTuple
-SearchSysCacheLocked1(int cacheId,
+SearchSysCacheLocked1(SysCacheIdentifier cacheId,
Datum key1)
{
CatCache *cache = SysCache[cacheId];
@@ -371,7 +371,7 @@ SearchSysCacheLocked1(int cacheId,
* heap_freetuple() the result when done with it.
*/
HeapTuple
-SearchSysCacheCopy(int cacheId,
+SearchSysCacheCopy(SysCacheIdentifier cacheId,
Datum key1,
Datum key2,
Datum key3,
@@ -396,7 +396,7 @@ SearchSysCacheCopy(int cacheId,
* heap_freetuple().
*/
HeapTuple
-SearchSysCacheLockedCopy1(int cacheId,
+SearchSysCacheLockedCopy1(SysCacheIdentifier cacheId,
Datum key1)
{
HeapTuple tuple,
@@ -417,7 +417,7 @@ SearchSysCacheLockedCopy1(int cacheId,
* No lock is retained on the syscache entry.
*/
bool
-SearchSysCacheExists(int cacheId,
+SearchSysCacheExists(SysCacheIdentifier cacheId,
Datum key1,
Datum key2,
Datum key3,
@@ -440,7 +440,7 @@ SearchSysCacheExists(int cacheId,
* No lock is retained on the syscache entry.
*/
Oid
-GetSysCacheOid(int cacheId,
+GetSysCacheOid(SysCacheIdentifier cacheId,
AttrNumber oidcol,
Datum key1,
Datum key2,
@@ -592,7 +592,7 @@ SearchSysCacheCopyAttNum(Oid relid, int16 attnum)
* a different cache for the same catalog the tuple was fetched from.
*/
Datum
-SysCacheGetAttr(int cacheId, HeapTuple tup,
+SysCacheGetAttr(SysCacheIdentifier cacheId, HeapTuple tup,
AttrNumber attributeNumber,
bool *isNull)
{
@@ -622,7 +622,7 @@ SysCacheGetAttr(int cacheId, HeapTuple tup,
* be NULL.
*/
Datum
-SysCacheGetAttrNotNull(int cacheId, HeapTuple tup,
+SysCacheGetAttrNotNull(SysCacheIdentifier cacheId, HeapTuple tup,
AttrNumber attributeNumber)
{
bool isnull;
@@ -652,7 +652,7 @@ SysCacheGetAttrNotNull(int cacheId, HeapTuple tup,
* catcache code that need to be able to compute the hash values.
*/
uint32
-GetSysCacheHashValue(int cacheId,
+GetSysCacheHashValue(SysCacheIdentifier cacheId,
Datum key1,
Datum key2,
Datum key3,
@@ -668,7 +668,7 @@ GetSysCacheHashValue(int cacheId,
* List-search interface
*/
struct catclist *
-SearchSysCacheList(int cacheId, int nkeys,
+SearchSysCacheList(SysCacheIdentifier cacheId, int nkeys,
Datum key1, Datum key2, Datum key3)
{
if (cacheId < 0 || cacheId >= SysCacheSize || !SysCache[cacheId])
@@ -687,7 +687,7 @@ SearchSysCacheList(int cacheId, int nkeys,
* This routine is only quasi-public: it should only be used by inval.c.
*/
void
-SysCacheInvalidate(int cacheId, uint32 hashValue)
+SysCacheInvalidate(SysCacheIdentifier cacheId, uint32 hashValue)
{
if (cacheId < 0 || cacheId >= SysCacheSize)
elog(ERROR, "invalid cache ID: %d", cacheId);
diff --git a/src/backend/utils/cache/ts_cache.c b/src/backend/utils/cache/ts_cache.c
index 71e49b2b919..744c8e71d71 100644
--- a/src/backend/utils/cache/ts_cache.c
+++ b/src/backend/utils/cache/ts_cache.c
@@ -91,7 +91,7 @@ static Oid TSCurrentConfigCache = InvalidOid;
* table address as the "arg".
*/
static void
-InvalidateTSCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
+InvalidateTSCacheCallBack(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
HTAB *hash = (HTAB *) DatumGetPointer(arg);
HASH_SEQ_STATUS status;
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index dc4b1a56414..627e534609a 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -337,9 +337,12 @@ static bool multirange_element_has_hashing(TypeCacheEntry *typentry);
static bool multirange_element_has_extended_hashing(TypeCacheEntry *typentry);
static void cache_multirange_element_properties(TypeCacheEntry *typentry);
static void TypeCacheRelCallback(Datum arg, Oid relid);
-static void TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue);
-static void TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue);
-static void TypeCacheConstrCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void TypeCacheTypCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
+static void TypeCacheOpcCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
+static void TypeCacheConstrCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
static void load_enum_cache_data(TypeCacheEntry *tcache);
static EnumItem *find_enumitem(TypeCacheEnumData *enumdata, Oid arg);
static int enum_oid_cmp(const void *left, const void *right);
@@ -2512,7 +2515,7 @@ TypeCacheRelCallback(Datum arg, Oid relid)
* it as needing to be reloaded.
*/
static void
-TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
+TypeCacheTypCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS status;
TypeCacheEntry *typentry;
@@ -2569,7 +2572,7 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
* of members are not going to get cached here.
*/
static void
-TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue)
+TypeCacheOpcCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
HASH_SEQ_STATUS status;
TypeCacheEntry *typentry;
@@ -2607,7 +2610,7 @@ TypeCacheOpcCallback(Datum arg, int cacheid, uint32 hashvalue)
* approach to domain constraints.
*/
static void
-TypeCacheConstrCallback(Datum arg, int cacheid, uint32 hashvalue)
+TypeCacheConstrCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
TypeCacheEntry *typentry;
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index aa530d3685e..cb1c9d85ffe 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -182,6 +182,7 @@ static bool matches_backtrace_functions(const char *funcname);
static pg_noinline void set_backtrace(ErrorData *edata, int num_skip);
static void set_errdata_field(MemoryContextData *cxt, char **ptr, const char *str);
static void FreeErrorDataContents(ErrorData *edata);
+static int log_min_messages_cmp(const ListCell *a, const ListCell *b);
static void write_console(const char *line, int len);
static const char *process_log_prefix_padding(const char *p, int *ppadding);
static void log_line_prefix(StringInfo buf, ErrorData *edata);
@@ -235,7 +236,7 @@ is_log_level_output(int elevel, int log_min_level)
static inline bool
should_output_to_server(int elevel)
{
- return is_log_level_output(elevel, log_min_messages);
+ return is_log_level_output(elevel, log_min_messages[MyBackendType]);
}
/*
@@ -2170,6 +2171,251 @@ DebugFileOpen(void)
}
+/*
+ * GUC check_hook for log_min_messages
+ *
+ * This value is parsed as a comma-separated list of zero or more TYPE:LEVEL
+ * elements. For each element, TYPE corresponds to a bk_category value (see
+ * postmaster/proctypelist.h); LEVEL is one of server_message_level_options.
+ *
+ * In addition, there must be a single LEVEL element (with no TYPE part)
+ * which sets the default level for process types that aren't specified.
+ */
+bool
+check_log_min_messages(char **newval, void **extra, GucSource source)
+{
+ char *rawstring;
+ List *elemlist;
+ StringInfoData buf;
+ char *result;
+ int newlevel[BACKEND_NUM_TYPES];
+ bool assigned[BACKEND_NUM_TYPES] = {0};
+ int defaultlevel = -1; /* -1 means not assigned */
+
+ const char *const process_types[] = {
+#define PG_PROCTYPE(bktype, bkcategory, description, main_func, shmem_attach) \
+ [bktype] = bkcategory,
+#include "postmaster/proctypelist.h"
+#undef PG_PROCTYPE
+ };
+
+ /* Need a modifiable copy of string. */
+ rawstring = guc_strdup(LOG, *newval);
+ if (rawstring == NULL)
+ return false;
+
+ /* Parse the string into a list. */
+ if (!SplitGUCList(rawstring, ',', &elemlist))
+ {
+ /* syntax error in list */
+ GUC_check_errdetail("List syntax is invalid.");
+ list_free(elemlist);
+ guc_free(rawstring);
+ return false;
+ }
+
+ /* Validate and assign log level and process type. */
+ foreach_ptr(char, elem, elemlist)
+ {
+ char *sep = strchr(elem, ':');
+
+ /*
+ * If there's no ':' separator in the entry, this is the default log
+ * level. Otherwise it's a process type-specific entry.
+ */
+ if (sep == NULL)
+ {
+ const struct config_enum_entry *entry;
+ bool found;
+
+ /* Reject duplicates for default log level. */
+ if (defaultlevel != -1)
+ {
+ GUC_check_errdetail("Redundant specification of default log level.");
+ goto lmm_fail;
+ }
+
+ /* Validate the log level */
+ found = false;
+ for (entry = server_message_level_options; entry && entry->name; entry++)
+ {
+ if (pg_strcasecmp(entry->name, elem) == 0)
+ {
+ defaultlevel = entry->val;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ GUC_check_errdetail("Unrecognized log level: \"%s\".", elem);
+ goto lmm_fail;
+ }
+ }
+ else
+ {
+ char *loglevel = sep + 1;
+ char *ptype = elem;
+ bool found;
+ int level;
+ const struct config_enum_entry *entry;
+
+ /*
+ * Temporarily clobber the ':' with a string terminator, so that
+ * we can validate it. We restore this at the bottom.
+ */
+ *sep = '\0';
+
+ /* Validate the log level */
+ found = false;
+ for (entry = server_message_level_options; entry && entry->name; entry++)
+ {
+ if (pg_strcasecmp(entry->name, loglevel) == 0)
+ {
+ level = entry->val;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ GUC_check_errdetail("Unrecognized log level for process type \"%s\": \"%s\".",
+ ptype, loglevel);
+ goto lmm_fail;
+ }
+
+ /* Is the process type name valid and unique? */
+ found = false;
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ {
+ if (pg_strcasecmp(process_types[i], ptype) == 0)
+ {
+ /* Reject duplicates for a process type. */
+ if (assigned[i])
+ {
+ GUC_check_errdetail("Redundant log level specification for process type \"%s\".",
+ ptype);
+ goto lmm_fail;
+ }
+
+ newlevel[i] = level;
+ assigned[i] = true;
+ found = true;
+
+ /*
+ * note: we must keep looking! some process types appear
+ * multiple times in proctypelist.h.
+ */
+ }
+ }
+
+ if (!found)
+ {
+ GUC_check_errdetail("Unrecognized process type \"%s\".", ptype);
+ goto lmm_fail;
+ }
+
+ /* Put the separator back in place */
+ *sep = ':';
+ }
+
+ /* all good */
+ continue;
+
+lmm_fail:
+ guc_free(rawstring);
+ list_free(elemlist);
+ return false;
+ }
+
+ /*
+ * The default log level must be specified. It is the fallback value.
+ */
+ if (defaultlevel == -1)
+ {
+ GUC_check_errdetail("Default log level was not defined.");
+ guc_free(rawstring);
+ list_free(elemlist);
+ return false;
+ }
+
+ /* Apply the default log level to all processes not listed. */
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ {
+ if (!assigned[i])
+ newlevel[i] = defaultlevel;
+ }
+
+ /*
+ * Save an ordered representation of the user-specified string, for the
+ * show_hook.
+ */
+ list_sort(elemlist, log_min_messages_cmp);
+
+ initStringInfoExt(&buf, strlen(rawstring) + 1);
+ foreach_ptr(char, elem, elemlist)
+ {
+ if (foreach_current_index(elem) == 0)
+ appendStringInfoString(&buf, elem);
+ else
+ appendStringInfo(&buf, ", %s", elem);
+ }
+
+ result = guc_strdup(LOG, buf.data);
+ if (!result)
+ {
+ pfree(buf.data);
+ return false;
+ }
+
+ guc_free(*newval);
+ *newval = result;
+
+ guc_free(rawstring);
+ list_free(elemlist);
+ pfree(buf.data);
+
+ /*
+ * Pass back data for assign_log_min_messages to use.
+ */
+ *extra = guc_malloc(LOG, BACKEND_NUM_TYPES * sizeof(int));
+ if (!*extra)
+ return false;
+ memcpy(*extra, newlevel, BACKEND_NUM_TYPES * sizeof(int));
+
+ return true;
+}
+
+/*
+ * list_sort() callback for check_log_min_messages. The default element
+ * goes first; the rest are ordered by strcmp() of the process type.
+ */
+static int
+log_min_messages_cmp(const ListCell *a, const ListCell *b)
+{
+ const char *s = lfirst(a);
+ const char *t = lfirst(b);
+
+ if (strchr(s, ':') == NULL)
+ return -1;
+ else if (strchr(t, ':') == NULL)
+ return 1;
+ else
+ return strcmp(s, t);
+}
+
+/*
+ * GUC assign_hook for log_min_messages
+ */
+void
+assign_log_min_messages(const char *newval, void *extra)
+{
+ for (int i = 0; i < BACKEND_NUM_TYPES; i++)
+ log_min_messages[i] = ((int *) extra)[i];
+}
+
/*
* GUC check_hook for backtrace_functions
*
@@ -2779,7 +3025,12 @@ get_backend_type_for_log(void)
if (MyProcPid == PostmasterPid)
backend_type_str = "postmaster";
else if (MyBackendType == B_BG_WORKER)
- backend_type_str = MyBgworkerEntry->bgw_type;
+ {
+ if (MyBgworkerEntry)
+ backend_type_str = MyBgworkerEntry->bgw_type;
+ else
+ backend_type_str = "early bgworker";
+ }
else
backend_type_str = GetBackendTypeDesc(MyBackendType);
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 05984e7ef26..4e26df7c63a 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -1793,8 +1793,8 @@ OidSendFunctionCall(Oid functionId, Datum val)
*-------------------------------------------------------------------------
*/
-struct varlena *
-pg_detoast_datum(struct varlena *datum)
+varlena *
+pg_detoast_datum(varlena *datum)
{
if (VARATT_IS_EXTENDED(datum))
return detoast_attr(datum);
@@ -1802,8 +1802,8 @@ pg_detoast_datum(struct varlena *datum)
return datum;
}
-struct varlena *
-pg_detoast_datum_copy(struct varlena *datum)
+varlena *
+pg_detoast_datum_copy(varlena *datum)
{
if (VARATT_IS_EXTENDED(datum))
return detoast_attr(datum);
@@ -1811,22 +1811,22 @@ pg_detoast_datum_copy(struct varlena *datum)
{
/* Make a modifiable copy of the varlena object */
Size len = VARSIZE(datum);
- struct varlena *result = (struct varlena *) palloc(len);
+ varlena *result = (varlena *) palloc(len);
memcpy(result, datum, len);
return result;
}
}
-struct varlena *
-pg_detoast_datum_slice(struct varlena *datum, int32 first, int32 count)
+varlena *
+pg_detoast_datum_slice(varlena *datum, int32 first, int32 count)
{
/* Only get the specified portion from the toast rel */
return detoast_attr_slice(datum, first, count);
}
-struct varlena *
-pg_detoast_datum_packed(struct varlena *datum)
+varlena *
+pg_detoast_datum_packed(varlena *datum)
{
if (VARATT_IS_COMPRESSED(datum) || VARATT_IS_EXTERNAL(datum))
return detoast_attr(datum);
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index 563f20374ff..03f6c8479f2 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -266,7 +266,7 @@ GetBackendTypeDesc(BackendType backendType)
switch (backendType)
{
-#define PG_PROCTYPE(bktype, description, main_func, shmem_attach) \
+#define PG_PROCTYPE(bktype, bkcategory, description, main_func, shmem_attach) \
case bktype: backendDesc = description; break;
#include "postmaster/proctypelist.h"
#undef PG_PROCTYPE
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 3f401faf3de..b59e08605cc 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -70,6 +70,13 @@
#include "utils/syscache.h"
#include "utils/timeout.h"
+/* has this backend called EmitConnectionWarnings()? */
+static bool ConnectionWarningsEmitted;
+
+/* content of warnings to send via EmitConnectionWarnings() */
+static List *ConnectionWarningMessages;
+static List *ConnectionWarningDetails;
+
static HeapTuple GetDatabaseTuple(const char *dbname);
static HeapTuple GetDatabaseTupleByOid(Oid dboid);
static void PerformAuthentication(Port *port);
@@ -85,6 +92,7 @@ static void ClientCheckTimeoutHandler(void);
static bool ThereIsAtLeastOneRole(void);
static void process_startup_options(Port *port, bool am_superuser);
static void process_settings(Oid databaseid, Oid roleid);
+static void EmitConnectionWarnings(void);
/*** InitPostgres support ***/
@@ -987,6 +995,9 @@ InitPostgres(const char *in_dbname, Oid dboid,
/* close the transaction we started above */
CommitTransactionCommand();
+ /* send any WARNINGs we've accumulated during initialization */
+ EmitConnectionWarnings();
+
return;
}
@@ -1232,6 +1243,9 @@ InitPostgres(const char *in_dbname, Oid dboid,
/* close the transaction we started above */
if (!bootstrap)
CommitTransactionCommand();
+
+ /* send any WARNINGs we've accumulated during initialization */
+ EmitConnectionWarnings();
}
/*
@@ -1446,3 +1460,58 @@ ThereIsAtLeastOneRole(void)
return result;
}
+
+/*
+ * Stores a warning message to be sent later via EmitConnectionWarnings().
+ * Both msg and detail must be non-NULL.
+ *
+ * NB: Caller should ensure the strings are allocated in a long-lived context
+ * like TopMemoryContext.
+ */
+void
+StoreConnectionWarning(char *msg, char *detail)
+{
+ MemoryContext oldcontext;
+
+ Assert(msg);
+ Assert(detail);
+
+ if (ConnectionWarningsEmitted)
+ elog(ERROR, "StoreConnectionWarning() called after EmitConnectionWarnings()");
+
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ ConnectionWarningMessages = lappend(ConnectionWarningMessages, msg);
+ ConnectionWarningDetails = lappend(ConnectionWarningDetails, detail);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Sends the warning messages saved via StoreConnectionWarning() and frees the
+ * strings and lists.
+ *
+ * NB: This can only be called once per backend.
+ */
+static void
+EmitConnectionWarnings(void)
+{
+ ListCell *lc_msg;
+ ListCell *lc_detail;
+
+ if (ConnectionWarningsEmitted)
+ elog(ERROR, "EmitConnectionWarnings() called more than once");
+ else
+ ConnectionWarningsEmitted = true;
+
+ forboth(lc_msg, ConnectionWarningMessages,
+ lc_detail, ConnectionWarningDetails)
+ {
+ ereport(WARNING,
+ (errmsg("%s", (char *) lfirst(lc_msg)),
+ errdetail("%s", (char *) lfirst(lc_detail))));
+ }
+
+ list_free_deep(ConnectionWarningMessages);
+ list_free_deep(ConnectionWarningDetails);
+}
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 6950e743d03..78f4d5e202c 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -38,6 +38,7 @@
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
+#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "varatt.h"
@@ -97,6 +98,13 @@ static char *perform_default_encoding_conversion(const char *src,
int len, bool is_client_to_server);
static int cliplen(const char *str, int len, int limit);
+pg_noreturn
+static void report_invalid_encoding_int(int encoding, const char *mbstr,
+ int mblen, int len);
+
+pg_noreturn
+static void report_invalid_encoding_db(const char *mbstr, int mblen, int len);
+
/*
* Prepare for a future call to SetClientEncoding. Success should mean
@@ -1021,11 +1029,128 @@ pg_encoding_wchar2mb_with_len(int encoding,
return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
}
-/* returns the byte length of a multibyte character */
+/*
+ * Returns the byte length of a multibyte character sequence in a
+ * null-terminated string. Raises an illegal byte sequence error if the
+ * sequence would hit a null terminator.
+ *
+ * The caller is expected to have checked for a terminator at *mbstr == 0
+ * before calling, but some callers want 1 in that case, so this function
+ * continues that tradition.
+ *
+ * This must only be used for strings that have a null-terminator to enable
+ * bounds detection.
+ */
+int
+pg_mblen_cstr(const char *mbstr)
+{
+ int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+ /*
+ * The .mblen functions return 1 when given a pointer to a terminator.
+ * Some callers depend on that, so we tolerate it for now. Well-behaved
+ * callers check the leading byte for a terminator *before* calling.
+ */
+ for (int i = 1; i < length; ++i)
+ if (unlikely(mbstr[i] == 0))
+ report_invalid_encoding_db(mbstr, length, i);
+
+ /*
+ * String should be NUL-terminated, but checking that would make typical
+ * callers O(N^2), tripling Valgrind check-world time. Unless
+ * VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we
+ * found a character, not a terminator, the next byte must be a terminator
+ * or the start of the next character.) If the caller iterates the whole
+ * string, the last call will diagnose a missing terminator.
+ */
+ if (mbstr[0] != '\0')
+ {
+#ifdef VALGRIND_EXPENSIVE
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr));
+#else
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1);
+#endif
+ }
+
+ return length;
+}
+
+/*
+ * Returns the byte length of a multibyte character sequence bounded by a range
+ * [mbstr, end) of at least one byte in size. Raises an illegal byte sequence
+ * error if the sequence would exceed the range.
+ */
+int
+pg_mblen_range(const char *mbstr, const char *end)
+{
+ int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+ Assert(end > mbstr);
+
+ if (unlikely(mbstr + length > end))
+ report_invalid_encoding_db(mbstr, length, end - mbstr);
+
+#ifdef VALGRIND_EXPENSIVE
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr);
+#else
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+#endif
+
+ return length;
+}
+
+/*
+ * Returns the byte length of a multibyte character sequence bounded by a range
+ * extending for 'limit' bytes, which must be at least one. Raises an illegal
+ * byte sequence error if the sequence would exceed the range.
+ */
+int
+pg_mblen_with_len(const char *mbstr, int limit)
+{
+ int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+ Assert(limit >= 1);
+
+ if (unlikely(length > limit))
+ report_invalid_encoding_db(mbstr, length, limit);
+
+#ifdef VALGRIND_EXPENSIVE
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit);
+#else
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+#endif
+
+ return length;
+}
+
+
+/*
+ * Returns the length of a multibyte character sequence, without any
+ * validation of bounds.
+ *
+ * PLEASE NOTE: This function can only be used safely if the caller has
+ * already verified the input string, since otherwise there is a risk of
+ * overrunning the buffer if the string is invalid. A prior call to a
+ * pg_mbstrlen* function suffices.
+ */
+int
+pg_mblen_unbounded(const char *mbstr)
+{
+ int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+
+ VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length);
+
+ return length;
+}
+
+/*
+ * Historical name for pg_mblen_unbounded(). Should not be used and will be
+ * removed in a later version.
+ */
int
pg_mblen(const char *mbstr)
{
- return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
+ return pg_mblen_unbounded(mbstr);
}
/* returns the display length of a multibyte character */
@@ -1047,14 +1172,14 @@ pg_mbstrlen(const char *mbstr)
while (*mbstr)
{
- mbstr += pg_mblen(mbstr);
+ mbstr += pg_mblen_cstr(mbstr);
len++;
}
return len;
}
/* returns the length (counted in wchars) of a multibyte string
- * (not necessarily NULL terminated)
+ * (stops at the first of "limit" or a NUL)
*/
int
pg_mbstrlen_with_len(const char *mbstr, int limit)
@@ -1067,7 +1192,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
while (limit > 0 && *mbstr)
{
- int l = pg_mblen(mbstr);
+ int l = pg_mblen_with_len(mbstr, limit);
limit -= l;
mbstr += l;
@@ -1137,7 +1262,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
while (len > 0 && *mbstr)
{
- l = pg_mblen(mbstr);
+ l = pg_mblen_with_len(mbstr, len);
nch++;
if (nch > limit)
break;
@@ -1376,7 +1501,7 @@ pg_utf8_increment(unsigned char *charptr, int length)
charptr[3]++;
break;
}
- /* FALL THRU */
+ pg_fallthrough;
case 3:
a = charptr[2];
if (a < 0xBF)
@@ -1384,7 +1509,7 @@ pg_utf8_increment(unsigned char *charptr, int length)
charptr[2]++;
break;
}
- /* FALL THRU */
+ pg_fallthrough;
case 2:
a = charptr[1];
switch (*charptr)
@@ -1404,7 +1529,7 @@ pg_utf8_increment(unsigned char *charptr, int length)
charptr[1]++;
break;
}
- /* FALL THRU */
+ pg_fallthrough;
case 1:
a = *charptr;
if (a == 0x7F || a == 0xDF || a == 0xEF || a == 0xF4)
@@ -1701,12 +1826,19 @@ void
report_invalid_encoding(int encoding, const char *mbstr, int len)
{
int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len);
+
+ report_invalid_encoding_int(encoding, mbstr, l, len);
+}
+
+static void
+report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len)
+{
char buf[8 * 5 + 1];
char *p = buf;
int j,
jlimit;
- jlimit = Min(l, len);
+ jlimit = Min(mblen, len);
jlimit = Min(jlimit, 8); /* prevent buffer overrun */
for (j = 0; j < jlimit; j++)
@@ -1723,6 +1855,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)
buf)));
}
+static void
+report_invalid_encoding_db(const char *mbstr, int mblen, int len)
+{
+ report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len);
+}
+
/*
* report_untranslatable_char: complain about untranslatable character
*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ae9d5f3fb70..d77502838c4 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3415,7 +3415,7 @@ set_config_with_handle(const char *name, config_handle *handle,
}
}
/* fall through to process the same as PGC_BACKEND */
- /* FALLTHROUGH */
+ pg_fallthrough;
case PGC_BACKEND:
if (context == PGC_SIGHUP)
{
diff --git a/src/backend/utils/misc/guc_funcs.c b/src/backend/utils/misc/guc_funcs.c
index 4f3e40bf470..8524dd3a981 100644
--- a/src/backend/utils/misc/guc_funcs.c
+++ b/src/backend/utils/misc/guc_funcs.c
@@ -139,7 +139,7 @@ ExecSetVariableStmt(VariableSetStmt *stmt, bool isTopLevel)
case VAR_SET_DEFAULT:
if (stmt->is_local)
WarnNoTransactionBlock(isTopLevel, "SET LOCAL");
- /* fall through */
+ pg_fallthrough;
case VAR_RESET:
(void) set_config_option(stmt->name,
NULL,
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index f0260e6e412..9507778415d 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -1042,6 +1042,13 @@
options => 'file_copy_method_options',
},
+{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
+ short_desc => 'Selects the method used for extending data files.',
+ variable => 'file_extend_method',
+ boot_val => 'DEFAULT_FILE_EXTEND_METHOD',
+ options => 'file_extend_method_options',
+},
+
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',
@@ -1686,12 +1693,14 @@
options => 'server_message_level_options',
},
-{ name => 'log_min_messages', type => 'enum', context => 'PGC_SUSET', group => 'LOGGING_WHEN',
+{ name => 'log_min_messages', type => 'string', context => 'PGC_SUSET', group => 'LOGGING_WHEN',
short_desc => 'Sets the message levels that are logged.',
long_desc => 'Each level includes all the levels that follow it. The later the level, the fewer messages are sent.',
- variable => 'log_min_messages',
- boot_val => 'WARNING',
- options => 'server_message_level_options',
+ flags => 'GUC_LIST_INPUT',
+ variable => 'log_min_messages_string',
+ boot_val => '"warning"',
+ check_hook => 'check_log_min_messages',
+ assign_hook => 'assign_log_min_messages',
},
{ name => 'log_parameter_max_length', type => 'int', context => 'PGC_SUSET', group => 'LOGGING_WHAT',
@@ -2242,6 +2251,16 @@
options => 'password_encryption_options',
},
+{ name => 'password_expiration_warning_threshold', type => 'int', context => 'PGC_SIGHUP', group => 'CONN_AUTH_AUTH',
+ short_desc => 'Threshold for password expiration warnings.',
+ long_desc => '0 means not to emit these warnings.',
+ flags => 'GUC_UNIT_S',
+ variable => 'password_expiration_warning_threshold',
+ boot_val => '604800',
+ min => '0',
+ max => 'INT_MAX',
+},
+
{ name => 'plan_cache_mode', type => 'enum', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Controls the planner\'s selection of custom or generic plan.',
long_desc => 'Prepared statements can have custom and generic plans, and the planner will attempt to choose which is better. This can be set to override the default behavior.',
@@ -3394,7 +3413,7 @@
max => 'INT_MAX / 1000',
},
-{ name => 'wal_receiver_timeout', type => 'int', context => 'PGC_SIGHUP', group => 'REPLICATION_STANDBY',
+{ name => 'wal_receiver_timeout', type => 'int', context => 'PGC_USERSET', group => 'REPLICATION_STANDBY',
short_desc => 'Sets the maximum wait time to receive data from the sending server.',
long_desc => '0 disables the timeout.',
flags => 'GUC_UNIT_MS',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 13c569d8790..741fce8dede 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -80,6 +80,7 @@
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/copydir.h"
+#include "storage/fd.h"
#include "storage/io_worker.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
@@ -146,7 +147,7 @@ static const struct config_enum_entry client_message_level_options[] = {
{NULL, 0, false}
};
-static const struct config_enum_entry server_message_level_options[] = {
+const struct config_enum_entry server_message_level_options[] = {
{"debug5", DEBUG5, false},
{"debug4", DEBUG4, false},
{"debug3", DEBUG3, false},
@@ -491,6 +492,14 @@ static const struct config_enum_entry file_copy_method_options[] = {
{NULL, 0, false}
};
+static const struct config_enum_entry file_extend_method_options[] = {
+#ifdef HAVE_POSIX_FALLOCATE
+ {"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
+#endif
+ {"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
+ {NULL, 0, false}
+};
+
/*
* Options for enum values stored in other modules
*/
@@ -537,7 +546,6 @@ static bool standard_conforming_strings = true;
bool current_role_is_superuser;
int log_min_error_statement = ERROR;
-int log_min_messages = WARNING;
int client_min_messages = NOTICE;
int log_min_duration_sample = -1;
int log_min_duration_statement = -1;
@@ -595,6 +603,7 @@ static char *server_version_string;
static int server_version_num;
static char *debug_io_direct_string;
static char *restrict_nonsystem_relation_kind_string;
+static char *log_min_messages_string;
#ifdef HAVE_SYSLOG
#define DEFAULT_SYSLOG_FACILITY LOG_LOCAL0
@@ -647,6 +656,15 @@ char *role_string;
/* should be static, but guc.c needs to get at this */
bool in_hot_standby_guc;
+/*
+ * set default log_min_messages to WARNING for all process types
+ */
+int log_min_messages[] = {
+#define PG_PROCTYPE(bktype, bkcategory, description, main_func, shmem_attach) \
+ [bktype] = WARNING,
+#include "postmaster/proctypelist.h"
+#undef PG_PROCTYPE
+};
/*
* Displayable names for context types (enum GucContext)
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index c4f92fcdac8..f938cc65a3a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -96,7 +96,8 @@
#authentication_timeout = 1min # 1s-600s
#password_encryption = scram-sha-256 # scram-sha-256 or (deprecated) md5
#scram_iterations = 4096
-#md5_password_warnings = on # display md5 deprecation warnings?
+#password_expiration_warning_threshold = 7d # threshold for expiration warnings
+#md5_password_warnings = on # display md5 deprecation warnings?
#oauth_validator_libraries = '' # comma-separated list of trusted validator modules
# GSSAPI using Kerberos
@@ -179,6 +180,10 @@
# in kilobytes, or -1 for no limit
#file_copy_method = copy # copy, clone (if supported by OS)
+#file_extend_method = posix_fallocate # the default is the first option supported
+ # by the operating system:
+ # posix_fallocate (most Unix-like systems)
+ # write_zeros
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue
@@ -528,7 +533,21 @@
# - When to Log -
-#log_min_messages = warning # values in order of decreasing detail:
+#log_min_messages = 'warning' # comma-separated list of
+ # process_type:level entries, plus
+ # one freestanding level as default.
+ # Valid process types are:
+ # archiver autovacuum
+ # backend bgworker
+ # bgwriter checkpointer
+ # ioworker postmaster
+ # slotsyncworker startup
+ # syslogger walreceiver
+ # walsummarizer walwriter
+ # walsender
+ #
+ # Level values in order of decreasing
+ # detail:
# debug5
# debug4
# debug3
diff --git a/src/backend/utils/misc/superuser.c b/src/backend/utils/misc/superuser.c
index 7821624687a..b9c3a0ceaa8 100644
--- a/src/backend/utils/misc/superuser.c
+++ b/src/backend/utils/misc/superuser.c
@@ -36,7 +36,8 @@ static Oid last_roleid = InvalidOid; /* InvalidOid == cache not valid */
static bool last_roleid_is_super = false;
static bool roleid_callback_registered = false;
-static void RoleidCallback(Datum arg, int cacheid, uint32 hashvalue);
+static void RoleidCallback(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
/*
@@ -100,7 +101,7 @@ superuser_arg(Oid roleid)
* Syscache inval callback function
*/
static void
-RoleidCallback(Datum arg, int cacheid, uint32 hashvalue)
+RoleidCallback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
{
/* Invalidate our local cache in case role's superuserness changed */
last_roleid = InvalidOid;
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index ae7d1647aea..161c2e2d3df 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -87,6 +87,10 @@
#define ALLOC_CHUNK_FRACTION 4
/* We allow chunks to be at most 1/4 of maxBlockSize (less overhead) */
+/* ALLOC_CHUNK_LIMIT must be equal to ALLOCSET_SEPARATE_THRESHOLD */
+StaticAssertDecl(ALLOC_CHUNK_LIMIT == ALLOCSET_SEPARATE_THRESHOLD,
+ "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD");
+
/*--------------------
* The first block allocated for an allocset has size initBlockSize.
* Each time we have to allocate another block, we double the block size
@@ -501,12 +505,6 @@ AllocSetContextCreateInternal(MemoryContext parent,
* requests that are all the maximum chunk size we will waste at most
* 1/8th of the allocated space.
*
- * Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD.
- */
- StaticAssertStmt(ALLOC_CHUNK_LIMIT == ALLOCSET_SEPARATE_THRESHOLD,
- "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD");
-
- /*
* Determine the maximum size that a chunk can be before we allocate an
* entire AllocBlock dedicated for that chunk. We set the absolute limit
* of that size as ALLOC_CHUNK_LIMIT but we reduce it further so that we
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 4fa4d432021..c1a53e658cb 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -294,9 +294,8 @@ PortalDefineQuery(Portal portal,
portal->prepStmtName = prepStmtName;
portal->sourceText = sourceText;
- portal->qc.commandTag = commandTag;
- portal->qc.nprocessed = 0;
portal->commandTag = commandTag;
+ SetQueryCompletion(&portal->qc, commandTag, 0);
portal->stmts = stmts;
portal->cplan = cplan;
portal->status = PORTAL_DEFINED;
diff --git a/src/backend/utils/sort/sharedtuplestore.c b/src/backend/utils/sort/sharedtuplestore.c
index 8f35a255263..04189f708fa 100644
--- a/src/backend/utils/sort/sharedtuplestore.c
+++ b/src/backend/utils/sort/sharedtuplestore.c
@@ -323,7 +323,8 @@ sts_puttuple(SharedTuplestoreAccessor *accessor, void *meta_data,
/* Do we have space? */
size = accessor->sts->meta_data_size + tuple->t_len;
- if (accessor->write_pointer + size > accessor->write_end)
+ if (accessor->write_pointer == NULL ||
+ accessor->write_pointer + size > accessor->write_end)
{
if (accessor->write_chunk == NULL)
{
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 1edcad89c88..1fc440ea6ca 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -7,8 +7,8 @@
* applied to different kinds of sortable objects. Implementation of
* the particular sorting variants is given in tuplesortvariants.c.
* This module works efficiently for both small and large amounts
- * of data. Small amounts are sorted in-memory using qsort(). Large
- * amounts are sorted using temporary files and a standard external sort
+ * of data. Small amounts are sorted in-memory. Large amounts are
+ * sorted using temporary files and a standard external sort
* algorithm.
*
* See Knuth, volume 3, for more than you want to know about external
@@ -26,16 +26,16 @@
* Historically, we divided the input into sorted runs using replacement
* selection, in the form of a priority tree implemented as a heap
* (essentially Knuth's Algorithm 5.2.3H), but now we always use quicksort
- * for run generation.
+ * or radix sort for run generation.
*
* The approximate amount of memory allowed for any one sort operation
* is specified in kilobytes by the caller (most pass work_mem). Initially,
* we absorb tuples and simply store them in an unsorted array as long as
* we haven't exceeded workMem. If we reach the end of the input without
- * exceeding workMem, we sort the array using qsort() and subsequently return
+ * exceeding workMem, we sort the array in memory and subsequently return
* tuples just by scanning the tuple array sequentially. If we do exceed
* workMem, we begin to emit tuples into sorted runs in temporary tapes.
- * When tuples are dumped in batch after quicksorting, we begin a new run
+ * When tuples are dumped in batch after in-memory sorting, we begin a new run
* with a new output tape. If we reach the max number of tapes, we write
* subsequent runs on the existing tapes in a round-robin fashion. We will
* need multiple merge passes to finish the merge in that case. After the
@@ -476,121 +476,15 @@ static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup);
static void tuplesort_free(Tuplesortstate *state);
static void tuplesort_updatemax(Tuplesortstate *state);
-/*
- * Specialized comparators that we can inline into specialized sorts. The goal
- * is to try to sort two tuples without having to follow the pointers to the
- * comparator or the tuple.
- *
- * XXX: For now, there is no specialization for cases where datum1 is
- * authoritative and we don't even need to fall back to a callback at all (that
- * would be true for types like int4/int8/timestamp/date, but not true for
- * abbreviations of text or multi-key sorts. There could be! Is it worth it?
- */
-
-/* Used if first key's comparator is ssup_datum_unsigned_cmp */
-static pg_attribute_always_inline int
-qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
-{
- int compare;
-
- compare = ApplyUnsignedSortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- &state->base.sortKeys[0]);
- if (compare != 0)
- return compare;
-
- /*
- * No need to waste effort calling the tiebreak function when there are no
- * other keys to sort on.
- */
- if (state->base.onlyKey != NULL)
- return 0;
-
- return state->base.comparetup_tiebreak(a, b, state);
-}
-
-/* Used if first key's comparator is ssup_datum_signed_cmp */
-static pg_attribute_always_inline int
-qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
-{
- int compare;
-
- compare = ApplySignedSortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- &state->base.sortKeys[0]);
-
- if (compare != 0)
- return compare;
-
- /*
- * No need to waste effort calling the tiebreak function when there are no
- * other keys to sort on.
- */
- if (state->base.onlyKey != NULL)
- return 0;
-
- return state->base.comparetup_tiebreak(a, b, state);
-}
-
-/* Used if first key's comparator is ssup_datum_int32_cmp */
-static pg_attribute_always_inline int
-qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
-{
- int compare;
-
- compare = ApplyInt32SortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- &state->base.sortKeys[0]);
-
- if (compare != 0)
- return compare;
-
- /*
- * No need to waste effort calling the tiebreak function when there are no
- * other keys to sort on.
- */
- if (state->base.onlyKey != NULL)
- return 0;
-
- return state->base.comparetup_tiebreak(a, b, state);
-}
/*
* Special versions of qsort just for SortTuple objects. qsort_tuple() sorts
* any variant of SortTuples, using the appropriate comparetup function.
* qsort_ssup() is specialized for the case where the comparetup function
* reduces to ApplySortComparator(), that is single-key MinimalTuple sorts
- * and Datum sorts. qsort_tuple_{unsigned,signed,int32} are specialized for
- * common comparison functions on pass-by-value leading datums.
+ * and Datum sorts.
*/
-#define ST_SORT qsort_tuple_unsigned
-#define ST_ELEMENT_TYPE SortTuple
-#define ST_COMPARE(a, b, state) qsort_tuple_unsigned_compare(a, b, state)
-#define ST_COMPARE_ARG_TYPE Tuplesortstate
-#define ST_CHECK_FOR_INTERRUPTS
-#define ST_SCOPE static
-#define ST_DEFINE
-#include "lib/sort_template.h"
-
-#define ST_SORT qsort_tuple_signed
-#define ST_ELEMENT_TYPE SortTuple
-#define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state)
-#define ST_COMPARE_ARG_TYPE Tuplesortstate
-#define ST_CHECK_FOR_INTERRUPTS
-#define ST_SCOPE static
-#define ST_DEFINE
-#include "lib/sort_template.h"
-
-#define ST_SORT qsort_tuple_int32
-#define ST_ELEMENT_TYPE SortTuple
-#define ST_COMPARE(a, b, state) qsort_tuple_int32_compare(a, b, state)
-#define ST_COMPARE_ARG_TYPE Tuplesortstate
-#define ST_CHECK_FOR_INTERRUPTS
-#define ST_SCOPE static
-#define ST_DEFINE
-#include "lib/sort_template.h"
-
#define ST_SORT qsort_tuple
#define ST_ELEMENT_TYPE SortTuple
#define ST_COMPARE_RUNTIME_POINTER
@@ -612,6 +506,23 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
#define ST_DEFINE
#include "lib/sort_template.h"
+/* state for radix sort */
+typedef struct RadixSortInfo
+{
+ union
+ {
+ size_t count;
+ size_t offset;
+ };
+ size_t next_offset;
+} RadixSortInfo;
+
+/*
+ * Threshold below which qsort_tuple() is generally faster than a radix sort.
+ */
+#define QSORT_THRESHOLD 40
+
+
/*
* tuplesort_begin_xxx
*
@@ -1363,7 +1274,7 @@ tuplesort_performsort(Tuplesortstate *state)
*/
if (SERIAL(state))
{
- /* Just qsort 'em and we're done */
+ /* Sort in memory and we're done */
tuplesort_sort_memtuples(state);
state->status = TSS_SORTEDINMEM;
}
@@ -2337,7 +2248,7 @@ dumptuples(Tuplesortstate *state, bool alltuples)
/*
* Sort all tuples accumulated within the allowed amount of memory for
- * this run using quicksort
+ * this run.
*/
tuplesort_sort_memtuples(state);
@@ -2652,10 +2563,396 @@ sort_bounded_heap(Tuplesortstate *state)
state->boundUsed = true;
}
+
+/* radix sort routines */
+
+/*
+ * Retrieve byte from datum, indexed by 'level': 0 for MSB, 7 for LSB
+ */
+static inline uint8
+current_byte(Datum key, int level)
+{
+ int shift = (sizeof(Datum) - 1 - level) * BITS_PER_BYTE;
+
+ return (key >> shift) & 0xFF;
+}
+
/*
- * Sort all memtuples using specialized qsort() routines.
+ * Normalize datum such that unsigned comparison is order-preserving,
+ * taking ASC/DESC into account as well.
+ */
+static inline Datum
+normalize_datum(Datum orig, SortSupport ssup)
+{
+ Datum norm_datum1;
+
+ if (ssup->comparator == ssup_datum_signed_cmp)
+ {
+ norm_datum1 = orig + ((uint64) PG_INT64_MAX) + 1;
+ }
+ else if (ssup->comparator == ssup_datum_int32_cmp)
+ {
+ /*
+ * First truncate to uint32. Technically, we don't need to do this,
+ * but it forces the upper half of the datum to be zero regardless of
+ * sign.
+ */
+ uint32 u32 = DatumGetUInt32(orig) + ((uint32) PG_INT32_MAX) + 1;
+
+ norm_datum1 = UInt32GetDatum(u32);
+ }
+ else
+ {
+ Assert(ssup->comparator == ssup_datum_unsigned_cmp);
+ norm_datum1 = orig;
+ }
+
+ if (ssup->ssup_reverse)
+ norm_datum1 = ~norm_datum1;
+
+ return norm_datum1;
+}
+
+/*
+ * radix_sort_recursive
+ *
+ * Radix sort by (pass-by-value) datum1, diverting to qsort_tuple()
+ * for tiebreaks.
+ *
+ * This is a modification of
+ * ska_byte_sort() from https://github.com/skarupke/ska_sort
+ * The original copyright notice follows:
+ *
+ * Copyright Malte Skarupke 2016.
+ * Distributed under the Boost Software License, Version 1.0.
+ *
+ * Boost Software License - Version 1.0 - August 17th, 2003
+ *
+ * Permission is hereby granted, free of charge, to any person or organization
+ * obtaining a copy of the software and accompanying documentation covered by
+ * this license (the "Software") to use, reproduce, display, distribute,
+ * execute, and transmit the Software, and to prepare derivative works of the
+ * Software, and to permit third-parties to whom the Software is furnished to
+ * do so, all subject to the following:
+ *
+ * The copyright notices in the Software and this entire statement, including
+ * the above license grant, this restriction and the following disclaimer,
+ * must be included in all copies of the Software, in whole or in part, and
+ * all derivative works of the Software, unless such copies or derivative
+ * works are solely in the form of machine-executable object code generated by
+ * a source language processor.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+ * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+static void
+radix_sort_recursive(SortTuple *begin, size_t n_elems, int level, Tuplesortstate *state)
+{
+ RadixSortInfo partitions[256] = {0};
+ uint8 remaining_partitions[256];
+ size_t total = 0;
+ int num_partitions = 0;
+ int num_remaining;
+ SortSupport ssup = &state->base.sortKeys[0];
+ size_t start_offset = 0;
+ SortTuple *partition_begin = begin;
+
+ /* count number of occurrences of each byte */
+ for (SortTuple *st = begin; st < begin + n_elems; st++)
+ {
+ uint8 this_partition;
+
+ /* extract the byte for this level from the normalized datum */
+ this_partition = current_byte(normalize_datum(st->datum1, ssup),
+ level);
+
+ /* save it for the permutation step */
+ st->curbyte = this_partition;
+
+ partitions[this_partition].count++;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ /* compute partition offsets */
+ for (int i = 0; i < 256; i++)
+ {
+ size_t count = partitions[i].count;
+
+ if (count != 0)
+ {
+ partitions[i].offset = total;
+ total += count;
+ remaining_partitions[num_partitions] = i;
+ num_partitions++;
+ }
+ partitions[i].next_offset = total;
+ }
+
+ /*
+ * Swap tuples to correct partition.
+ *
+ * In traditional American flag sort, a swap sends the current element to
+ * the correct partition, but the array pointer only advances if the
+ * partner of the swap happens to be an element that belongs in the
+ * current partition. That only requires one pass through the array, but
+ * the disadvantage is we don't know if the pointer can advance until the
+ * swap completes. Here lies the most interesting innovation from the
+ * upstream ska_byte_sort: After initiating the swap, we immediately
+ * proceed to the next element. This makes better use of CPU pipelining,
+ * but also means that we will often need multiple iterations of this
+ * loop. ska_byte_sort() maintains a separate list of which partitions
+ * haven't finished, which is updated every loop iteration. Here we simply
+ * check each partition during every iteration.
+ *
+ * If we started with a single partition, there is nothing to do. If a
+ * previous loop iteration results in only one partition that hasn't been
+ * counted as sorted, we know it's actually sorted and can exit the loop.
+ */
+ num_remaining = num_partitions;
+ while (num_remaining > 1)
+ {
+ /* start the count over */
+ num_remaining = num_partitions;
+
+ for (int i = 0; i < num_partitions; i++)
+ {
+ uint8 idx = remaining_partitions[i];
+
+ for (SortTuple *st = begin + partitions[idx].offset;
+ st < begin + partitions[idx].next_offset;
+ st++)
+ {
+ size_t offset = partitions[st->curbyte].offset++;
+ SortTuple tmp;
+
+ /* swap current tuple with destination position */
+ Assert(offset < n_elems);
+ tmp = *st;
+ *st = begin[offset];
+ begin[offset] = tmp;
+
+ CHECK_FOR_INTERRUPTS();
+ };
+
+ /* Is this partition sorted? */
+ if (partitions[idx].offset == partitions[idx].next_offset)
+ num_remaining--;
+ }
+ }
+
+ /* recurse */
+ for (uint8 *rp = remaining_partitions;
+ rp < remaining_partitions + num_partitions;
+ rp++)
+ {
+ size_t end_offset = partitions[*rp].next_offset;
+ SortTuple *partition_end = begin + end_offset;
+ size_t num_elements = end_offset - start_offset;
+
+ if (num_elements > 1)
+ {
+ if (level < sizeof(Datum) - 1)
+ {
+ if (num_elements < QSORT_THRESHOLD)
+ {
+ qsort_tuple(partition_begin,
+ num_elements,
+ state->base.comparetup,
+ state);
+ }
+ else
+ {
+ radix_sort_recursive(partition_begin,
+ num_elements,
+ level + 1,
+ state);
+ }
+ }
+ else if (state->base.onlyKey == NULL)
+ {
+ /*
+ * We've finished radix sort on all bytes of the pass-by-value
+ * datum (possibly abbreviated), now sort using the tiebreak
+ * comparator.
+ */
+ qsort_tuple(partition_begin,
+ num_elements,
+ state->base.comparetup_tiebreak,
+ state);
+ }
+ }
+
+ start_offset = end_offset;
+ partition_begin = partition_end;
+ }
+}
+
+/*
+ * Entry point for radix_sort_recursive
*
- * Quicksort is used for small in-memory sorts, and external sort runs.
+ * Partition tuples by isnull1, then sort both partitions, using
+ * radix sort on the NOT NULL partition if it's large enough.
+ */
+static void
+radix_sort_tuple(SortTuple *data, size_t n, Tuplesortstate *state)
+{
+ bool nulls_first = state->base.sortKeys[0].ssup_nulls_first;
+ SortTuple *null_start;
+ SortTuple *not_null_start;
+ size_t d1 = 0,
+ d2,
+ null_count,
+ not_null_count;
+
+ /*
+ * Find the first NOT NULL if NULLS FIRST, or first NULL if NULLS LAST.
+ * This also serves as a quick check for the common case where all tuples
+ * are NOT NULL in the first sort key.
+ */
+ while (d1 < n && data[d1].isnull1 == nulls_first)
+ {
+ d1++;
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ /*
+ * If we have more than one tuple left after the quick check, partition
+ * the remainder using branchless cyclic permutation, based on
+ * https://orlp.net/blog/branchless-lomuto-partitioning/
+ */
+ Assert(n > 0);
+ if (d1 < n - 1)
+ {
+ size_t i = d1,
+ j = d1;
+ SortTuple tmp = data[d1]; /* create gap at front */
+
+ while (j < n - 1)
+ {
+ /* gap is at j, move i's element to gap */
+ data[j] = data[i];
+ /* advance j to the first unknown element */
+ j += 1;
+ /* move the first unknown element back to i */
+ data[i] = data[j];
+ /* advance i if this element belongs in the left partition */
+ i += (data[i].isnull1 == nulls_first);
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ /* place gap between left and right partitions */
+ data[j] = data[i];
+ /* restore the saved element */
+ data[i] = tmp;
+ /* assign it to the correct partition */
+ i += (data[i].isnull1 == nulls_first);
+
+ /* d1 is now the number of elements in the left partition */
+ d1 = i;
+ }
+
+ d2 = n - d1;
+
+ /* set pointers and counts for each partition */
+ if (nulls_first)
+ {
+ null_start = data;
+ null_count = d1;
+ not_null_start = data + d1;
+ not_null_count = d2;
+ }
+ else
+ {
+ not_null_start = data;
+ not_null_count = d1;
+ null_start = data + d1;
+ null_count = d2;
+ }
+
+ for (SortTuple *st = null_start;
+ st < null_start + null_count;
+ st++)
+ Assert(st->isnull1 == true);
+ for (SortTuple *st = not_null_start;
+ st < not_null_start + not_null_count;
+ st++)
+ Assert(st->isnull1 == false);
+
+ /*
+ * Sort the NULL partition using tiebreak comparator, if necessary.
+ */
+ if (state->base.onlyKey == NULL && null_count > 1)
+ {
+ qsort_tuple(null_start,
+ null_count,
+ state->base.comparetup_tiebreak,
+ state);
+ }
+
+ /*
+ * Sort the NOT NULL partition, using radix sort if large enough,
+ * otherwise fall back to quicksort.
+ */
+ if (not_null_count < QSORT_THRESHOLD)
+ {
+ qsort_tuple(not_null_start,
+ not_null_count,
+ state->base.comparetup,
+ state);
+ }
+ else
+ {
+ bool presorted = true;
+
+ for (SortTuple *st = not_null_start + 1;
+ st < not_null_start + not_null_count;
+ st++)
+ {
+ if (COMPARETUP(state, st - 1, st) > 0)
+ {
+ presorted = false;
+ break;
+ }
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ if (presorted)
+ return;
+ else
+ {
+ radix_sort_recursive(not_null_start,
+ not_null_count,
+ 0,
+ state);
+ }
+ }
+}
+
+/* Verify in-memory sort using standard comparator. */
+static void
+verify_memtuples_sorted(Tuplesortstate *state)
+{
+#ifdef USE_ASSERT_CHECKING
+ for (SortTuple *st = state->memtuples + 1;
+ st < state->memtuples + state->memtupcount;
+ st++)
+ Assert(COMPARETUP(state, st - 1, st) <= 0);
+#endif
+}
+
+/*
+ * Sort all memtuples using specialized routines.
+ *
+ * Quicksort or radix sort is used for small in-memory sorts,
+ * and external sort runs.
*/
static void
tuplesort_sort_memtuples(Tuplesortstate *state)
@@ -2665,30 +2962,22 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
if (state->memtupcount > 1)
{
/*
- * Do we have the leading column's value or abbreviation in datum1,
- * and is there a specialization for its comparator?
+ * Do we have the leading column's value or abbreviation in datum1?
*/
if (state->base.haveDatum1 && state->base.sortKeys)
{
- if (state->base.sortKeys[0].comparator == ssup_datum_unsigned_cmp)
- {
- qsort_tuple_unsigned(state->memtuples,
- state->memtupcount,
- state);
- return;
- }
- else if (state->base.sortKeys[0].comparator == ssup_datum_signed_cmp)
- {
- qsort_tuple_signed(state->memtuples,
- state->memtupcount,
- state);
- return;
- }
- else if (state->base.sortKeys[0].comparator == ssup_datum_int32_cmp)
+ SortSupport ssup = &state->base.sortKeys[0];
+
+ /* Does it compare as an integer? */
+ if (state->memtupcount >= QSORT_THRESHOLD &&
+ (ssup->comparator == ssup_datum_unsigned_cmp ||
+ ssup->comparator == ssup_datum_signed_cmp ||
+ ssup->comparator == ssup_datum_int32_cmp))
{
- qsort_tuple_int32(state->memtuples,
- state->memtupcount,
- state);
+ radix_sort_tuple(state->memtuples,
+ state->memtupcount,
+ state);
+ verify_memtuples_sorted(state);
return;
}
}
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index e57fa5bca64..afba82f28a2 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -1024,7 +1024,7 @@ tuplestore_gettuple(Tuplestorestate *state, bool forward,
(errcode_for_file_access(),
errmsg("could not seek in tuplestore temporary file")));
state->status = TSS_READFILE;
- /* FALLTHROUGH */
+ pg_fallthrough;
case TSS_READFILE:
*should_free = true;
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index a3980e5535f..7c49dd433a7 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1463,9 +1463,6 @@ setup_config(void)
conflines = readfile(hba_file);
- conflines = replace_token(conflines, "@remove-line-for-nolocal@", "");
-
-
/*
* Probe to see if there is really any platform support for IPv6, and
* comment out the relevant pg_hba line if not. This avoids runtime
diff --git a/src/bin/pg_combinebackup/copy_file.c b/src/bin/pg_combinebackup/copy_file.c
index dd3c0dc1c89..0287d6e87df 100644
--- a/src/bin/pg_combinebackup/copy_file.c
+++ b/src/bin/pg_combinebackup/copy_file.c
@@ -210,7 +210,7 @@ copy_file_blocks(const char *src, const char *dst,
}
if (rb < 0)
- pg_fatal("could not read from file \"%s\": %m", dst);
+ pg_fatal("could not read from file \"%s\": %m", src);
pg_free(buffer);
close(src_fd);
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index 918b8b35646..b9f26ce782e 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -501,7 +501,7 @@ add_tablespace_mapping(cb_options *opt, char *arg)
tsmap->old_dir);
if (!is_absolute_path(tsmap->new_dir))
- pg_fatal("old directory is not an absolute path in tablespace mapping: %s",
+ pg_fatal("new directory is not an absolute path in tablespace mapping: %s",
tsmap->new_dir);
/* Canonicalize paths to avoid spurious failures when comparing. */
diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c
index 41a3d059f98..c9ce8a53aaa 100644
--- a/src/bin/pg_dump/compress_gzip.c
+++ b/src/bin/pg_dump/compress_gzip.c
@@ -57,8 +57,8 @@ DeflateCompressorInit(CompressorState *cs)
GzipCompressorState *gzipcs;
z_streamp zp;
- gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState));
- zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream));
+ gzipcs = pg_malloc0_object(GzipCompressorState);
+ zp = gzipcs->zp = pg_malloc_object(z_stream);
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
@@ -178,7 +178,7 @@ ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs)
char *buf;
size_t buflen;
- zp = (z_streamp) pg_malloc(sizeof(z_stream));
+ zp = pg_malloc_object(z_stream);
zp->zalloc = Z_NULL;
zp->zfree = Z_NULL;
zp->opaque = Z_NULL;
diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c
index af47ef88839..52652b0d979 100644
--- a/src/bin/pg_dump/compress_io.c
+++ b/src/bin/pg_dump/compress_io.c
@@ -125,7 +125,7 @@ AllocateCompressor(const pg_compress_specification compression_spec,
{
CompressorState *cs;
- cs = (CompressorState *) pg_malloc0(sizeof(CompressorState));
+ cs = pg_malloc0_object(CompressorState);
cs->readF = readF;
cs->writeF = writeF;
@@ -195,7 +195,7 @@ InitCompressFileHandle(const pg_compress_specification compression_spec)
{
CompressFileHandle *CFH;
- CFH = pg_malloc0(sizeof(CompressFileHandle));
+ CFH = pg_malloc0_object(CompressFileHandle);
if (compression_spec.algorithm == PG_COMPRESSION_NONE)
InitCompressFileHandleNone(CFH, compression_spec);
diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c
index 20a8741d3ca..b72bad130ad 100644
--- a/src/bin/pg_dump/compress_lz4.c
+++ b/src/bin/pg_dump/compress_lz4.c
@@ -305,7 +305,7 @@ InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compressi
if (cs->readF)
return;
- state = pg_malloc0(sizeof(*state));
+ state = pg_malloc0_object(LZ4State);
if (cs->compression_spec.level >= 0)
state->prefs.compressionLevel = cs->compression_spec.level;
@@ -754,7 +754,7 @@ InitCompressFileHandleLZ4(CompressFileHandle *CFH,
CFH->get_error_func = LZ4Stream_get_error;
CFH->compression_spec = compression_spec;
- state = pg_malloc0(sizeof(*state));
+ state = pg_malloc0_object(LZ4State);
if (CFH->compression_spec.level >= 0)
state->prefs.compressionLevel = CFH->compression_spec.level;
diff --git a/src/bin/pg_dump/compress_none.c b/src/bin/pg_dump/compress_none.c
index 9997519e351..d862d8ca6e9 100644
--- a/src/bin/pg_dump/compress_none.c
+++ b/src/bin/pg_dump/compress_none.c
@@ -124,7 +124,7 @@ InitCompressorNone(CompressorState *cs,
{
NoneCompressorState *nonecs;
- nonecs = (NoneCompressorState *) pg_malloc(sizeof(NoneCompressorState));
+ nonecs = pg_malloc_object(NoneCompressorState);
nonecs->buflen = DEFAULT_IO_BUFFER_SIZE;
nonecs->buffer = pg_malloc(nonecs->buflen);
nonecs->bufdata = 0;
diff --git a/src/bin/pg_dump/compress_zstd.c b/src/bin/pg_dump/compress_zstd.c
index 889691aa0c2..cf2db2649ac 100644
--- a/src/bin/pg_dump/compress_zstd.c
+++ b/src/bin/pg_dump/compress_zstd.c
@@ -219,7 +219,7 @@ InitCompressorZstd(CompressorState *cs,
cs->compression_spec = compression_spec;
- zstdcs = (ZstdCompressorState *) pg_malloc0(sizeof(*zstdcs));
+ zstdcs = pg_malloc0_object(ZstdCompressorState);
cs->private_data = zstdcs;
/* We expect that exactly one of readF/writeF is specified */
diff --git a/src/bin/pg_dump/connectdb.c b/src/bin/pg_dump/connectdb.c
index 388d29d0aeb..f3ce8b1cfb1 100644
--- a/src/bin/pg_dump/connectdb.c
+++ b/src/bin/pg_dump/connectdb.c
@@ -89,8 +89,8 @@ ConnectDatabase(const char *dbname, const char *connection_string,
argcount++;
}
- keywords = pg_malloc0((argcount + 1) * sizeof(*keywords));
- values = pg_malloc0((argcount + 1) * sizeof(*values));
+ keywords = pg_malloc0_array(const char *, (argcount + 1));
+ values = pg_malloc0_array(const char *, (argcount + 1));
for (conn_opt = conn_opts; conn_opt->keyword != NULL; conn_opt++)
{
@@ -105,8 +105,8 @@ ConnectDatabase(const char *dbname, const char *connection_string,
}
else
{
- keywords = pg_malloc0((argcount + 1) * sizeof(*keywords));
- values = pg_malloc0((argcount + 1) * sizeof(*values));
+ keywords = pg_malloc0_array(const char *, (argcount + 1));
+ values = pg_malloc0_array(const char *, (argcount + 1));
}
if (pghost)
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c
index acfa3f22cc8..5bc77fed974 100644
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -160,7 +160,7 @@ buildACLCommands(const char *name, const char *subname, const char *nspname,
* Besides, a false mismatch will just cause the output to be a little
* more verbose than it really needed to be.
*/
- grantitems = (char **) pg_malloc(naclitems * sizeof(char *));
+ grantitems = pg_malloc_array(char *, naclitems);
for (i = 0; i < naclitems; i++)
{
bool found = false;
@@ -176,7 +176,7 @@ buildACLCommands(const char *name, const char *subname, const char *nspname,
if (!found)
grantitems[ngrantitems++] = aclitems[i];
}
- revokeitems = (char **) pg_malloc(nbaseitems * sizeof(char *));
+ revokeitems = pg_malloc_array(char *, nbaseitems);
for (i = 0; i < nbaseitems; i++)
{
bool found = false;
@@ -774,8 +774,8 @@ SplitGUCList(char *rawstring, char separator,
* overestimate of the number of pointers we could need. Allow one for
* list terminator.
*/
- *namelist = nextptr = (char **)
- pg_malloc((strlen(rawstring) / 2 + 2) * sizeof(char *));
+ *namelist = nextptr =
+ pg_malloc_array(char *, (strlen(rawstring) / 2 + 2));
*nextptr = NULL;
while (isspace((unsigned char) *nextp))
diff --git a/src/bin/pg_dump/parallel.c b/src/bin/pg_dump/parallel.c
index ddaf08faa30..56cb2c1f32d 100644
--- a/src/bin/pg_dump/parallel.c
+++ b/src/bin/pg_dump/parallel.c
@@ -469,7 +469,7 @@ WaitForTerminatingWorkers(ParallelState *pstate)
}
#else /* WIN32 */
/* On Windows, we must use WaitForMultipleObjects() */
- HANDLE *lpHandles = pg_malloc(sizeof(HANDLE) * pstate->numWorkers);
+ HANDLE *lpHandles = pg_malloc_array(HANDLE, pstate->numWorkers);
int nrun = 0;
DWORD ret;
uintptr_t hThread;
@@ -903,7 +903,7 @@ ParallelBackupStart(ArchiveHandle *AH)
Assert(AH->public.numWorkers > 0);
- pstate = (ParallelState *) pg_malloc(sizeof(ParallelState));
+ pstate = pg_malloc_object(ParallelState);
pstate->numWorkers = AH->public.numWorkers;
pstate->te = NULL;
@@ -913,10 +913,10 @@ ParallelBackupStart(ArchiveHandle *AH)
return pstate;
/* Create status arrays, being sure to initialize all fields to 0 */
- pstate->te = (TocEntry **)
- pg_malloc0(pstate->numWorkers * sizeof(TocEntry *));
- pstate->parallelSlot = (ParallelSlot *)
- pg_malloc0(pstate->numWorkers * sizeof(ParallelSlot));
+ pstate->te =
+ pg_malloc0_array(TocEntry *, pstate->numWorkers);
+ pstate->parallelSlot =
+ pg_malloc0_array(ParallelSlot, pstate->numWorkers);
#ifdef WIN32
/* Make fmtId() and fmtQualifiedId() use thread-local storage */
@@ -969,7 +969,7 @@ ParallelBackupStart(ArchiveHandle *AH)
#ifdef WIN32
/* Create transient structure to pass args to worker function */
- wi = (WorkerInfo *) pg_malloc(sizeof(WorkerInfo));
+ wi = pg_malloc_object(WorkerInfo);
wi->AH = AH;
wi->slot = slot;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 35d3a07915d..7afcc0859c8 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -134,7 +134,7 @@ static void StrictNamesCheck(RestoreOptions *ropt);
DumpOptions *
NewDumpOptions(void)
{
- DumpOptions *opts = (DumpOptions *) pg_malloc(sizeof(DumpOptions));
+ DumpOptions *opts = pg_malloc_object(DumpOptions);
InitDumpOptions(opts);
return opts;
@@ -1107,7 +1107,7 @@ NewRestoreOptions(void)
{
RestoreOptions *opts;
- opts = (RestoreOptions *) pg_malloc0(sizeof(RestoreOptions));
+ opts = pg_malloc0_object(RestoreOptions);
/* set any fields that shouldn't default to zeroes */
opts->format = archUnknown;
@@ -1244,7 +1244,7 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
ArchiveHandle *AH = (ArchiveHandle *) AHX;
TocEntry *newToc;
- newToc = (TocEntry *) pg_malloc0(sizeof(TocEntry));
+ newToc = pg_malloc0_object(TocEntry);
AH->tocCount++;
if (dumpId > AH->maxDumpId)
@@ -1272,7 +1272,7 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
if (opts->nDeps > 0)
{
- newToc->dependencies = (DumpId *) pg_malloc(opts->nDeps * sizeof(DumpId));
+ newToc->dependencies = pg_malloc_array(DumpId, opts->nDeps);
memcpy(newToc->dependencies, opts->deps, opts->nDeps * sizeof(DumpId));
newToc->nDeps = opts->nDeps;
}
@@ -1575,7 +1575,7 @@ SortTocFromFile(Archive *AHX)
StringInfoData linebuf;
/* Allocate space for the 'wanted' array, and init it */
- ropt->idWanted = (bool *) pg_malloc0(sizeof(bool) * AH->maxDumpId);
+ ropt->idWanted = pg_malloc0_array(bool, AH->maxDumpId);
/* Setup the file */
fh = fopen(ropt->tocFile, PG_BINARY_R);
@@ -1990,8 +1990,8 @@ buildTocEntryArrays(ArchiveHandle *AH)
DumpId maxDumpId = AH->maxDumpId;
TocEntry *te;
- AH->tocsByDumpId = (TocEntry **) pg_malloc0((maxDumpId + 1) * sizeof(TocEntry *));
- AH->tableDataId = (DumpId *) pg_malloc0((maxDumpId + 1) * sizeof(DumpId));
+ AH->tocsByDumpId = pg_malloc0_array(TocEntry *, (maxDumpId + 1));
+ AH->tableDataId = pg_malloc0_array(DumpId, (maxDumpId + 1));
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
@@ -2385,7 +2385,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
pg_log_debug("allocating AH for %s, format %d",
FileSpec ? FileSpec : "(stdio)", fmt);
- AH = (ArchiveHandle *) pg_malloc0(sizeof(ArchiveHandle));
+ AH = pg_malloc0_object(ArchiveHandle);
AH->version = K_VERS_SELF;
@@ -2422,7 +2422,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
AH->currTablespace = NULL; /* ditto */
AH->currTableAm = NULL; /* ditto */
- AH->toc = (TocEntry *) pg_malloc0(sizeof(TocEntry));
+ AH->toc = pg_malloc0_object(TocEntry);
AH->toc->next = AH->toc;
AH->toc->prev = AH->toc;
@@ -2509,7 +2509,7 @@ WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate)
TocEntry **tes;
int ntes;
- tes = (TocEntry **) pg_malloc(AH->tocCount * sizeof(TocEntry *));
+ tes = pg_malloc_array(TocEntry *, AH->tocCount);
ntes = 0;
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
@@ -2720,7 +2720,7 @@ ReadToc(ArchiveHandle *AH)
for (i = 0; i < AH->tocCount; i++)
{
- te = (TocEntry *) pg_malloc0(sizeof(TocEntry));
+ te = pg_malloc0_object(TocEntry);
te->dumpId = ReadInt(AH);
if (te->dumpId > AH->maxDumpId)
@@ -2817,7 +2817,7 @@ ReadToc(ArchiveHandle *AH)
if (AH->version >= K_VERS_1_5)
{
depSize = 100;
- deps = (DumpId *) pg_malloc(sizeof(DumpId) * depSize);
+ deps = pg_malloc_array(DumpId, depSize);
depIdx = 0;
for (;;)
{
@@ -2827,7 +2827,7 @@ ReadToc(ArchiveHandle *AH)
if (depIdx >= depSize)
{
depSize *= 2;
- deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depSize);
+ deps = pg_realloc_array(deps, DumpId, depSize);
}
sscanf(tmp, "%d", &deps[depIdx]);
free(tmp);
@@ -2836,7 +2836,7 @@ ReadToc(ArchiveHandle *AH)
if (depIdx > 0) /* We have a non-null entry */
{
- deps = (DumpId *) pg_realloc(deps, sizeof(DumpId) * depIdx);
+ deps = pg_realloc_array(deps, DumpId, depIdx);
te->dependencies = deps;
te->nDeps = depIdx;
}
@@ -2991,12 +2991,9 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
/*
* For binary upgrade mode, dump pg_largeobject_metadata and the
* associated pg_shdepend rows. This is faster to restore than the
- * equivalent set of large object commands. We can only do this for
- * upgrades from v12 and newer; in older versions, pg_largeobject_metadata
- * was created WITH OIDS, so the OID column is hidden and won't be dumped.
+ * equivalent set of large object commands.
*/
- if (ropt->binary_upgrade && AH->public.remoteVersion >= 120000 &&
- strcmp(te->desc, "TABLE DATA") == 0 &&
+ if (ropt->binary_upgrade && strcmp(te->desc, "TABLE DATA") == 0 &&
(te->catalogId.oid == LargeObjectMetadataRelationId ||
te->catalogId.oid == SharedDependRelationId))
return REQ_DATA;
@@ -4882,7 +4879,7 @@ fix_dependencies(ArchiveHandle *AH)
{
if (strcmp(te2->desc, "BLOBS") == 0)
{
- te->dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
+ te->dependencies = pg_malloc_object(DumpId);
te->dependencies[0] = te2->dumpId;
te->nDeps++;
te->depCount++;
@@ -4925,7 +4922,7 @@ fix_dependencies(ArchiveHandle *AH)
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
if (te->nRevDeps > 0)
- te->revDeps = (DumpId *) pg_malloc(te->nRevDeps * sizeof(DumpId));
+ te->revDeps = pg_malloc_array(DumpId, te->nRevDeps);
te->nRevDeps = 0;
}
@@ -5040,7 +5037,7 @@ identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te)
* difference between a dependency on a table and a dependency on its
* data, so that closer analysis would be needed here.
*/
- lockids = (DumpId *) pg_malloc(te->nDeps * sizeof(DumpId));
+ lockids = pg_malloc_array(DumpId, te->nDeps);
nlockids = 0;
for (i = 0; i < te->nDeps; i++)
{
@@ -5058,7 +5055,7 @@ identify_locking_dependencies(ArchiveHandle *AH, TocEntry *te)
return;
}
- te->lockDeps = pg_realloc(lockids, nlockids * sizeof(DumpId));
+ te->lockDeps = pg_realloc_array(lockids, DumpId, nlockids);
te->nLockDeps = nlockids;
}
@@ -5148,11 +5145,11 @@ CloneArchive(ArchiveHandle *AH)
ArchiveHandle *clone;
/* Make a "flat" copy */
- clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
+ clone = pg_malloc_object(ArchiveHandle);
memcpy(clone, AH, sizeof(ArchiveHandle));
/* Likewise flat-copy the RestoreOptions, so we can alter them locally */
- clone->public.ropt = (RestoreOptions *) pg_malloc(sizeof(RestoreOptions));
+ clone->public.ropt = pg_malloc_object(RestoreOptions);
memcpy(clone->public.ropt, AH->public.ropt, sizeof(RestoreOptions));
/* Handle format-independent fields */
diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index 2226520dffc..52990620940 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -136,7 +136,7 @@ InitArchiveFmt_Custom(ArchiveHandle *AH)
AH->WorkerJobRestorePtr = _WorkerJobRestoreCustom;
/* Set up a private area. */
- ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
+ ctx = pg_malloc0_object(lclContext);
AH->formatData = ctx;
/*
@@ -199,7 +199,7 @@ _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
{
lclTocEntry *ctx;
- ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
+ ctx = pg_malloc0_object(lclTocEntry);
if (te->dataDumper)
ctx->dataState = K_OFFSET_POS_NOT_SET;
else
@@ -240,7 +240,7 @@ _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
if (ctx == NULL)
{
- ctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
+ ctx = pg_malloc0_object(lclTocEntry);
te->formatData = ctx;
}
@@ -893,7 +893,7 @@ _Clone(ArchiveHandle *AH)
/*
* Each thread must have private lclContext working state.
*/
- AH->formatData = (lclContext *) pg_malloc(sizeof(lclContext));
+ AH->formatData = pg_malloc_object(lclContext);
memcpy(AH->formatData, ctx, sizeof(lclContext));
ctx = (lclContext *) AH->formatData;
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index cd4036ead82..d6a1428c67a 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -140,7 +140,7 @@ InitArchiveFmt_Directory(ArchiveHandle *AH)
AH->WorkerJobDumpPtr = _WorkerJobDumpDirectory;
/* Set up our private context */
- ctx = (lclContext *) pg_malloc0(sizeof(lclContext));
+ ctx = pg_malloc0_object(lclContext);
AH->formatData = ctx;
ctx->dataFH = NULL;
@@ -200,7 +200,7 @@ _ArchiveEntry(ArchiveHandle *AH, TocEntry *te)
lclTocEntry *tctx;
char fn[MAXPGPATH];
- tctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
+ tctx = pg_malloc0_object(lclTocEntry);
if (strcmp(te->desc, "BLOBS") == 0)
{
snprintf(fn, MAXPGPATH, "blobs_%d.toc", te->dumpId);
@@ -252,7 +252,7 @@ _ReadExtraToc(ArchiveHandle *AH, TocEntry *te)
if (tctx == NULL)
{
- tctx = (lclTocEntry *) pg_malloc0(sizeof(lclTocEntry));
+ tctx = pg_malloc0_object(lclTocEntry);
te->formatData = tctx;
}
@@ -769,7 +769,7 @@ _Clone(ArchiveHandle *AH)
{
lclContext *ctx = (lclContext *) AH->formatData;
- AH->formatData = (lclContext *) pg_malloc(sizeof(lclContext));
+ AH->formatData = pg_malloc_object(lclContext);
memcpy(AH->formatData, ctx, sizeof(lclContext));
ctx = (lclContext *) AH->formatData;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 2bebefd0ba2..450cec285b3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -214,12 +214,6 @@ static int nbinaryUpgradeClassOids = 0;
static SequenceItem *sequences = NULL;
static int nsequences = 0;
-/*
- * For binary upgrade, the dump ID of pg_largeobject_metadata is saved for use
- * as a dependency for pg_shdepend and any large object comments/seclabels.
- */
-static DumpId lo_metadata_dumpId;
-
/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
#define MAX_ATTR_STATS_RELS 64
@@ -1121,27 +1115,18 @@ main(int argc, char **argv)
getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE);
/*
- * For binary upgrade mode, dump pg_largeobject_metadata and the
- * associated pg_shdepend rows. This is faster to restore than the
- * equivalent set of large object commands. We can only do this for
- * upgrades from v12 and newer; in older versions, pg_largeobject_metadata
- * was created WITH OIDS, so the OID column is hidden and won't be dumped.
+ * For binary upgrade mode, dump the pg_shdepend rows for large objects
+ * and maybe even pg_largeobject_metadata (see comment below for details).
+ * This is faster to restore than the equivalent set of large object
+ * commands.
*/
- if (dopt.binary_upgrade && fout->remoteVersion >= 120000)
+ if (dopt.binary_upgrade)
{
- TableInfo *lo_metadata = findTableByOid(LargeObjectMetadataRelationId);
- TableInfo *shdepend = findTableByOid(SharedDependRelationId);
+ TableInfo *shdepend;
- makeTableDataInfo(&dopt, lo_metadata);
+ shdepend = findTableByOid(SharedDependRelationId);
makeTableDataInfo(&dopt, shdepend);
- /*
- * Save pg_largeobject_metadata's dump ID for use as a dependency for
- * pg_shdepend and any large object comments/seclabels.
- */
- lo_metadata_dumpId = lo_metadata->dataObj->dobj.dumpId;
- addObjectDependency(&shdepend->dataObj->dobj, lo_metadata_dumpId);
-
/*
* Only dump large object shdepend rows for this database.
*/
@@ -1150,21 +1135,19 @@ main(int argc, char **argv)
" WHERE datname = current_database())";
/*
- * If upgrading from v16 or newer, only dump large objects with
- * comments/seclabels. For these upgrades, pg_upgrade can copy/link
- * pg_largeobject_metadata's files (which is usually faster) but we
- * still need to dump LOs with comments/seclabels here so that the
- * subsequent COMMENT and SECURITY LABEL commands work. pg_upgrade
- * can't copy/link the files from older versions because aclitem
- * (needed by pg_largeobject_metadata.lomacl) changed its storage
- * format in v16.
+ * For binary upgrades from v16 and newer versions, we can copy
+ * pg_largeobject_metadata's files from the old cluster, so we don't
+ * need to dump its contents. pg_upgrade can't copy/link the files
+ * from older versions because aclitem (needed by
+ * pg_largeobject_metadata.lomacl) changed its storage format in v16.
*/
- if (fout->remoteVersion >= 160000)
- lo_metadata->dataObj->filtercond = "WHERE oid IN "
- "(SELECT objoid FROM pg_description "
- "WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
- "UNION SELECT objoid FROM pg_seclabel "
- "WHERE classoid = " CppAsString2(LargeObjectRelationId) ")";
+ if (fout->remoteVersion < 160000)
+ {
+ TableInfo *lo_metadata;
+
+ lo_metadata = findTableByOid(LargeObjectMetadataRelationId);
+ makeTableDataInfo(&dopt, lo_metadata);
+ }
}
/*
@@ -1556,7 +1539,7 @@ setup_connection(Archive *AH, const char *dumpencoding,
* Initialize prepared-query state to "nothing prepared". We do this here
* so that a parallel dump worker will have its own state.
*/
- AH->is_prepared = (bool *) pg_malloc0(NUM_PREP_QUERIES * sizeof(bool));
+ AH->is_prepared = pg_malloc0_array(bool, NUM_PREP_QUERIES);
/*
* Start transaction-snapshot mode transaction to dump consistent data.
@@ -2421,11 +2404,14 @@ dumpTableData_copy(Archive *fout, const void *dcontext)
column_list = fmtCopyColumnList(tbinfo, clistBuf);
/*
- * Use COPY (SELECT ...) TO when dumping a foreign table's data, and when
- * a filter condition was specified. For other cases a simple COPY
- * suffices.
+ * Use COPY (SELECT ...) TO when dumping a foreign table's data, when a
+ * filter condition was specified, and when in binary upgrade mode and
+ * dumping an old pg_largeobject_metadata defined WITH OIDS. For other
+ * cases a simple COPY suffices.
*/
- if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE)
+ if (tdinfo->filtercond || tbinfo->relkind == RELKIND_FOREIGN_TABLE ||
+ (fout->dopt->binary_upgrade && fout->remoteVersion < 120000 &&
+ tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId))
{
/* Temporary allows to access to foreign tables to dump data */
if (tbinfo->relkind == RELKIND_FOREIGN_TABLE)
@@ -2584,7 +2570,7 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
* actual column value --- but we can save a few cycles by fetching nulls
* rather than the uninteresting-to-us value.
*/
- attgenerated = (char *) pg_malloc(tbinfo->numatts * sizeof(char));
+ attgenerated = pg_malloc_array(char, tbinfo->numatts);
appendPQExpBufferStr(q, "DECLARE _pg_dump_cursor CURSOR FOR SELECT ");
nfields = 0;
for (i = 0; i < tbinfo->numatts; i++)
@@ -3085,7 +3071,7 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo)
return;
/* OK, let's dump it */
- tdinfo = (TableDataInfo *) pg_malloc(sizeof(TableDataInfo));
+ tdinfo = pg_malloc_object(TableDataInfo);
if (tbinfo->relkind == RELKIND_MATVIEW)
tdinfo->dobj.objType = DO_REFRESH_MATVIEW;
@@ -3979,7 +3965,24 @@ getLOs(Archive *fout)
appendPQExpBufferStr(loQry,
"SELECT oid, lomowner, lomacl, "
"acldefault('L', lomowner) AS acldefault "
- "FROM pg_largeobject_metadata "
+ "FROM pg_largeobject_metadata ");
+
+ /*
+ * For binary upgrades, we transfer pg_largeobject_metadata via COPY or by
+ * copying/linking its files from the old cluster. On such upgrades, we
+ * only need to consider large objects that have comments or security
+ * labels, since we still restore those objects via COMMENT/SECURITY LABEL
+ * commands.
+ */
+ if (dopt->binary_upgrade)
+ appendPQExpBufferStr(loQry,
+ "WHERE oid IN "
+ "(SELECT objoid FROM pg_description "
+ "WHERE classoid = " CppAsString2(LargeObjectRelationId) " "
+ "UNION SELECT objoid FROM pg_seclabel "
+ "WHERE classoid = " CppAsString2(LargeObjectRelationId) ") ");
+
+ appendPQExpBufferStr(loQry,
"ORDER BY lomowner, lomacl::pg_catalog.text, oid");
res = ExecuteSqlQuery(fout, loQry->data, PGRES_TUPLES_OK);
@@ -4060,54 +4063,26 @@ getLOs(Archive *fout)
loinfo->dobj.components |= DUMP_COMPONENT_ACL;
/*
- * In binary-upgrade mode for LOs, we do *not* dump out the LO data,
- * as it will be copied by pg_upgrade, which simply copies the
- * pg_largeobject table. We *do* however dump out anything but the
- * data, as pg_upgrade copies just pg_largeobject, but not
- * pg_largeobject_metadata, after the dump is restored. In versions
- * before v12, this is done via proper large object commands. In
- * newer versions, we dump the content of pg_largeobject_metadata and
- * any associated pg_shdepend rows, which is faster to restore. (On
- * binary_upgrade)
- {
- if (fout->remoteVersion >= 120000)
- {
- /*
- * We should've saved pg_largeobject_metadata's dump ID before
- * this point.
- */
- Assert(lo_metadata_dumpId);
-
- loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION);
-
- /*
- * Mark the large object as dependent on
- * pg_largeobject_metadata so that any large object
- * comments/seclables are dumped after it.
- */
- loinfo->dobj.dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
- loinfo->dobj.dependencies[0] = lo_metadata_dumpId;
- loinfo->dobj.nDeps = loinfo->dobj.allocDeps = 1;
- }
- else
- loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA;
- }
+ loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION);
/*
* Create a "BLOBS" data item for the group, too. This is just a
* placeholder for sorting; it carries no data now.
*/
- lodata = (DumpableObject *) pg_malloc(sizeof(DumpableObject));
+ lodata = pg_malloc_object(DumpableObject);
lodata->objType = DO_LARGE_OBJECT_DATA;
lodata->catId = nilCatalogId;
AssignDumpId(lodata);
lodata->name = pg_strdup(namebuf);
lodata->components |= DUMP_COMPONENT_DATA;
/* Set up explicit dependency from data to metadata */
- lodata->dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
+ lodata->dependencies = pg_malloc_object(DumpId);
lodata->dependencies[0] = loinfo->dobj.dumpId;
lodata->nDeps = lodata->allocDeps = 1;
}
@@ -4323,7 +4298,7 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
* Note: use tableoid 0 so that this object won't be mistaken for
* something that pg_depend entries apply to.
*/
- polinfo = pg_malloc(sizeof(PolicyInfo));
+ polinfo = pg_malloc_object(PolicyInfo);
polinfo->dobj.objType = DO_POLICY;
polinfo->dobj.catId.tableoid = 0;
polinfo->dobj.catId.oid = tbinfo->dobj.catId.oid;
@@ -4379,7 +4354,7 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
i_polqual = PQfnumber(res, "polqual");
i_polwithcheck = PQfnumber(res, "polwithcheck");
- polinfo = pg_malloc(ntups * sizeof(PolicyInfo));
+ polinfo = pg_malloc_array(PolicyInfo, ntups);
for (j = 0; j < ntups; j++)
{
@@ -4621,7 +4596,7 @@ getPublications(Archive *fout)
i_pubviaroot = PQfnumber(res, "pubviaroot");
i_pubgencols = PQfnumber(res, "pubgencols");
- pubinfo = pg_malloc(ntups * sizeof(PublicationInfo));
+ pubinfo = pg_malloc_array(PublicationInfo, ntups);
for (i = 0; i < ntups; i++)
{
@@ -4800,7 +4775,7 @@ getPublicationNamespaces(Archive *fout)
i_pnnspid = PQfnumber(res, "pnnspid");
/* this allocation may be more than we need */
- pubsinfo = pg_malloc(ntups * sizeof(PublicationSchemaInfo));
+ pubsinfo = pg_malloc_array(PublicationSchemaInfo, ntups);
j = 0;
for (i = 0; i < ntups; i++)
@@ -4899,7 +4874,7 @@ getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables)
i_prattrs = PQfnumber(res, "prattrs");
/* this allocation may be more than we need */
- pubrinfo = pg_malloc(ntups * sizeof(PublicationRelInfo));
+ pubrinfo = pg_malloc_array(PublicationRelInfo, ntups);
j = 0;
for (i = 0; i < ntups; i++)
@@ -5137,6 +5112,7 @@ getSubscriptions(Archive *fout)
int i_subconninfo;
int i_subslotname;
int i_subsynccommit;
+ int i_subwalrcvtimeout;
int i_subpublications;
int i_suborigin;
int i_suboriginremotelsn;
@@ -5230,10 +5206,17 @@ getSubscriptions(Archive *fout)
if (fout->remoteVersion >= 190000)
appendPQExpBufferStr(query,
- " s.submaxretention\n");
+ " s.submaxretention,\n");
else
appendPQExpBuffer(query,
- " 0 AS submaxretention\n");
+ " 0 AS submaxretention,\n");
+
+ if (fout->remoteVersion >= 190000)
+ appendPQExpBufferStr(query,
+ " s.subwalrcvtimeout\n");
+ else
+ appendPQExpBufferStr(query,
+ " '-1' AS subwalrcvtimeout\n");
appendPQExpBufferStr(query,
"FROM pg_subscription s\n");
@@ -5272,11 +5255,12 @@ getSubscriptions(Archive *fout)
i_subconninfo = PQfnumber(res, "subconninfo");
i_subslotname = PQfnumber(res, "subslotname");
i_subsynccommit = PQfnumber(res, "subsynccommit");
+ i_subwalrcvtimeout = PQfnumber(res, "subwalrcvtimeout");
i_subpublications = PQfnumber(res, "subpublications");
i_suborigin = PQfnumber(res, "suborigin");
i_suboriginremotelsn = PQfnumber(res, "suboriginremotelsn");
- subinfo = pg_malloc(ntups * sizeof(SubscriptionInfo));
+ subinfo = pg_malloc_array(SubscriptionInfo, ntups);
for (i = 0; i < ntups; i++)
{
@@ -5315,6 +5299,8 @@ getSubscriptions(Archive *fout)
pg_strdup(PQgetvalue(res, i, i_subslotname));
subinfo[i].subsynccommit =
pg_strdup(PQgetvalue(res, i, i_subsynccommit));
+ subinfo[i].subwalrcvtimeout =
+ pg_strdup(PQgetvalue(res, i, i_subwalrcvtimeout));
subinfo[i].subpublications =
pg_strdup(PQgetvalue(res, i, i_subpublications));
subinfo[i].suborigin = pg_strdup(PQgetvalue(res, i, i_suborigin));
@@ -5370,7 +5356,7 @@ getSubscriptionRelations(Archive *fout)
i_srsubstate = PQfnumber(res, "srsubstate");
i_srsublsn = PQfnumber(res, "srsublsn");
- subrinfo = pg_malloc(ntups * sizeof(SubRelInfo));
+ subrinfo = pg_malloc_array(SubRelInfo, ntups);
for (int i = 0; i < ntups; i++)
{
Oid cur_srsubid = atooid(PQgetvalue(res, i, i_srsubid));
@@ -5573,6 +5559,9 @@ dumpSubscription(Archive *fout, const SubscriptionInfo *subinfo)
if (strcmp(subinfo->subsynccommit, "off") != 0)
appendPQExpBuffer(query, ", synchronous_commit = %s", fmtId(subinfo->subsynccommit));
+ if (strcmp(subinfo->subwalrcvtimeout, "-1") != 0)
+ appendPQExpBuffer(query, ", wal_receiver_timeout = %s", fmtId(subinfo->subwalrcvtimeout));
+
if (pg_strcasecmp(subinfo->suborigin, LOGICALREP_ORIGIN_ANY) != 0)
appendPQExpBuffer(query, ", origin = %s", subinfo->suborigin);
@@ -5850,8 +5839,8 @@ collectBinaryUpgradeClassOids(Archive *fout)
res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
nbinaryUpgradeClassOids = PQntuples(res);
- binaryUpgradeClassOids = (BinaryUpgradeClassOidItem *)
- pg_malloc(nbinaryUpgradeClassOids * sizeof(BinaryUpgradeClassOidItem));
+ binaryUpgradeClassOids =
+ pg_malloc_array(BinaryUpgradeClassOidItem, nbinaryUpgradeClassOids);
for (int i = 0; i < nbinaryUpgradeClassOids; i++)
{
@@ -6032,7 +6021,7 @@ getNamespaces(Archive *fout)
ntups = PQntuples(res);
- nsinfo = (NamespaceInfo *) pg_malloc(ntups * sizeof(NamespaceInfo));
+ nsinfo = pg_malloc_array(NamespaceInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6164,7 +6153,7 @@ getExtensions(Archive *fout, int *numExtensions)
if (ntups == 0)
goto cleanup;
- extinfo = (ExtensionInfo *) pg_malloc(ntups * sizeof(ExtensionInfo));
+ extinfo = pg_malloc_array(ExtensionInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6263,7 +6252,7 @@ getTypes(Archive *fout)
ntups = PQntuples(res);
- tyinfo = (TypeInfo *) pg_malloc(ntups * sizeof(TypeInfo));
+ tyinfo = pg_malloc_array(TypeInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6349,7 +6338,7 @@ getTypes(Archive *fout)
(tyinfo[i].typtype == TYPTYPE_BASE ||
tyinfo[i].typtype == TYPTYPE_RANGE))
{
- stinfo = (ShellTypeInfo *) pg_malloc(sizeof(ShellTypeInfo));
+ stinfo = pg_malloc_object(ShellTypeInfo);
stinfo->dobj.objType = DO_SHELL_TYPE;
stinfo->dobj.catId = nilCatalogId;
AssignDumpId(&stinfo->dobj);
@@ -6412,7 +6401,7 @@ getOperators(Archive *fout)
ntups = PQntuples(res);
- oprinfo = (OprInfo *) pg_malloc(ntups * sizeof(OprInfo));
+ oprinfo = pg_malloc_array(OprInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6484,7 +6473,7 @@ getCollations(Archive *fout)
ntups = PQntuples(res);
- collinfo = (CollInfo *) pg_malloc(ntups * sizeof(CollInfo));
+ collinfo = pg_malloc_array(CollInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6548,7 +6537,7 @@ getConversions(Archive *fout)
ntups = PQntuples(res);
- convinfo = (ConvInfo *) pg_malloc(ntups * sizeof(ConvInfo));
+ convinfo = pg_malloc_array(ConvInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6621,7 +6610,7 @@ getAccessMethods(Archive *fout)
ntups = PQntuples(res);
- aminfo = (AccessMethodInfo *) pg_malloc(ntups * sizeof(AccessMethodInfo));
+ aminfo = pg_malloc_array(AccessMethodInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6683,7 +6672,7 @@ getOpclasses(Archive *fout)
ntups = PQntuples(res);
- opcinfo = (OpclassInfo *) pg_malloc(ntups * sizeof(OpclassInfo));
+ opcinfo = pg_malloc_array(OpclassInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6748,7 +6737,7 @@ getOpfamilies(Archive *fout)
ntups = PQntuples(res);
- opfinfo = (OpfamilyInfo *) pg_malloc(ntups * sizeof(OpfamilyInfo));
+ opfinfo = pg_malloc_array(OpfamilyInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6866,7 +6855,7 @@ getAggregates(Archive *fout)
ntups = PQntuples(res);
- agginfo = (AggInfo *) pg_malloc(ntups * sizeof(AggInfo));
+ agginfo = pg_malloc_array(AggInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -6899,7 +6888,7 @@ getAggregates(Archive *fout)
agginfo[i].aggfn.argtypes = NULL;
else
{
- agginfo[i].aggfn.argtypes = (Oid *) pg_malloc(agginfo[i].aggfn.nargs * sizeof(Oid));
+ agginfo[i].aggfn.argtypes = pg_malloc_array(Oid, agginfo[i].aggfn.nargs);
parseOidArray(PQgetvalue(res, i, i_proargtypes),
agginfo[i].aggfn.argtypes,
agginfo[i].aggfn.nargs);
@@ -7057,7 +7046,7 @@ getFuncs(Archive *fout)
ntups = PQntuples(res);
- finfo = (FuncInfo *) pg_malloc0(ntups * sizeof(FuncInfo));
+ finfo = pg_malloc0_array(FuncInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -7092,7 +7081,7 @@ getFuncs(Archive *fout)
finfo[i].argtypes = NULL;
else
{
- finfo[i].argtypes = (Oid *) pg_malloc(finfo[i].nargs * sizeof(Oid));
+ finfo[i].argtypes = pg_malloc_array(Oid, finfo[i].nargs);
parseOidArray(PQgetvalue(res, i, i_proargtypes),
finfo[i].argtypes, finfo[i].nargs);
}
@@ -7134,14 +7123,14 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages,
(relkind == RELKIND_MATVIEW ||
relkind == RELKIND_FOREIGN_TABLE))
{
- RelStatsInfo *info = pg_malloc0(sizeof(RelStatsInfo));
+ RelStatsInfo *info = pg_malloc0_object(RelStatsInfo);
DumpableObject *dobj = &info->dobj;
dobj->objType = DO_REL_STATS;
dobj->catId.tableoid = 0;
dobj->catId.oid = 0;
AssignDumpId(dobj);
- dobj->dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
+ dobj->dependencies = pg_malloc_object(DumpId);
dobj->dependencies[0] = rel->dumpId;
dobj->nDeps = 1;
dobj->allocDeps = 1;
@@ -7426,7 +7415,7 @@ getTables(Archive *fout, int *numTables)
* only one, because we don't yet know which tables might be inheritance
* ancestors of the target table.
*/
- tblinfo = (TableInfo *) pg_malloc0(ntups * sizeof(TableInfo));
+ tblinfo = pg_malloc0_array(TableInfo, ntups);
i_reltableoid = PQfnumber(res, "tableoid");
i_reloid = PQfnumber(res, "oid");
@@ -7758,7 +7747,7 @@ getInherits(Archive *fout, int *numInherits)
*numInherits = ntups;
- inhinfo = (InhInfo *) pg_malloc(ntups * sizeof(InhInfo));
+ inhinfo = pg_malloc_array(InhInfo, ntups);
i_inhrelid = PQfnumber(res, "inhrelid");
i_inhparent = PQfnumber(res, "inhparent");
@@ -8070,7 +8059,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_indstatcols = PQfnumber(res, "indstatcols");
i_indstatvals = PQfnumber(res, "indstatvals");
- indxinfo = (IndxInfo *) pg_malloc(ntups * sizeof(IndxInfo));
+ indxinfo = pg_malloc_array(IndxInfo, ntups);
/*
* Outer loop iterates once per table, not once per row. Incrementing of
@@ -8136,7 +8125,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
indxinfo[j].indreloptions = pg_strdup(PQgetvalue(res, j, i_indreloptions));
indxinfo[j].indstatcols = pg_strdup(PQgetvalue(res, j, i_indstatcols));
indxinfo[j].indstatvals = pg_strdup(PQgetvalue(res, j, i_indstatvals));
- indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnattrs * sizeof(Oid));
+ indxinfo[j].indkeys = pg_malloc_array(Oid, indxinfo[j].indnattrs);
parseOidArray(PQgetvalue(res, j, i_indkey),
indxinfo[j].indkeys, indxinfo[j].indnattrs);
indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't');
@@ -8174,7 +8163,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
*/
ConstraintInfo *constrinfo;
- constrinfo = (ConstraintInfo *) pg_malloc(sizeof(ConstraintInfo));
+ constrinfo = pg_malloc_object(ConstraintInfo);
constrinfo->dobj.objType = DO_CONSTRAINT;
constrinfo->dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_contableoid));
constrinfo->dobj.catId.oid = atooid(PQgetvalue(res, j, i_conoid));
@@ -8265,7 +8254,7 @@ getExtendedStatistics(Archive *fout)
i_stxrelid = PQfnumber(res, "stxrelid");
i_stattarget = PQfnumber(res, "stxstattarget");
- statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+ statsextinfo = pg_malloc_array(StatsExtInfo, ntups);
for (i = 0; i < ntups; i++)
{
@@ -8376,7 +8365,7 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
i_conindid = PQfnumber(res, "conindid");
i_condef = PQfnumber(res, "condef");
- constrinfo = (ConstraintInfo *) pg_malloc(ntups * sizeof(ConstraintInfo));
+ constrinfo = pg_malloc_array(ConstraintInfo, ntups);
curtblindx = -1;
for (int j = 0; j < ntups; j++)
@@ -8537,7 +8526,7 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
i_convalidated = PQfnumber(res, "convalidated");
i_contype = PQfnumber(res, "contype");
- constrinfo = (ConstraintInfo *) pg_malloc(ntups * sizeof(ConstraintInfo));
+ constrinfo = pg_malloc_array(ConstraintInfo, ntups);
tyinfo->domChecks = constrinfo;
/* 'i' tracks result rows; 'j' counts CHECK constraints */
@@ -8625,7 +8614,7 @@ getRules(Archive *fout)
ntups = PQntuples(res);
- ruleinfo = (RuleInfo *) pg_malloc(ntups * sizeof(RuleInfo));
+ ruleinfo = pg_malloc_array(RuleInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -8831,7 +8820,7 @@ getTriggers(Archive *fout, TableInfo tblinfo[], int numTables)
i_tgispartition = PQfnumber(res, "tgispartition");
i_tgdef = PQfnumber(res, "tgdef");
- tginfo = (TriggerInfo *) pg_malloc(ntups * sizeof(TriggerInfo));
+ tginfo = pg_malloc_array(TriggerInfo, ntups);
/*
* Outer loop iterates once per table, not once per row. Incrementing of
@@ -8928,7 +8917,7 @@ getEventTriggers(Archive *fout)
ntups = PQntuples(res);
- evtinfo = (EventTriggerInfo *) pg_malloc(ntups * sizeof(EventTriggerInfo));
+ evtinfo = pg_malloc_array(EventTriggerInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -9002,7 +8991,7 @@ getProcLangs(Archive *fout)
ntups = PQntuples(res);
- planginfo = (ProcLangInfo *) pg_malloc(ntups * sizeof(ProcLangInfo));
+ planginfo = pg_malloc_array(ProcLangInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -9094,7 +9083,7 @@ getCasts(Archive *fout)
ntups = PQntuples(res);
- castinfo = (CastInfo *) pg_malloc(ntups * sizeof(CastInfo));
+ castinfo = pg_malloc_array(CastInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -9193,7 +9182,7 @@ getTransforms(Archive *fout)
ntups = PQntuples(res);
- transforminfo = (TransformInfo *) pg_malloc(ntups * sizeof(TransformInfo));
+ transforminfo = pg_malloc_array(TransformInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -9309,12 +9298,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
* pg_shdepend so that the columns names are collected for the
* corresponding COPY commands. Restoring the data for those catalogs
* is faster than restoring the equivalent set of large object
- * commands. We can only do this for upgrades from v12 and newer; in
- * older versions, pg_largeobject_metadata was created WITH OIDS, so
- * the OID column is hidden and won't be dumped.
+ * commands.
*/
if (!tbinfo->interesting &&
- !(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 &&
+ !(fout->dopt->binary_upgrade &&
(tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId ||
tbinfo->dobj.catId.oid == SharedDependRelationId)))
continue;
@@ -9455,7 +9442,18 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"(pt.classoid = co.tableoid AND pt.objoid = co.oid)\n");
appendPQExpBufferStr(q,
- "WHERE a.attnum > 0::pg_catalog.int2\n"
+ "WHERE a.attnum > 0::pg_catalog.int2\n");
+
+ /*
+ * For binary upgrades from dopt->binary_upgrade && fout->remoteVersion < 120000)
+ appendPQExpBufferStr(q,
+ "OR (a.attnum = -2::pg_catalog.int2 AND src.tbloid = "
+ CppAsString2(LargeObjectMetadataRelationId) ")\n");
+
+ appendPQExpBufferStr(q,
"ORDER BY a.attrelid, a.attnum");
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
@@ -9523,7 +9521,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
/* cross-check that we only got requested tables */
if (tbinfo->relkind == RELKIND_SEQUENCE ||
(!tbinfo->interesting &&
- !(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 &&
+ !(fout->dopt->binary_upgrade &&
(tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId ||
tbinfo->dobj.catId.oid == SharedDependRelationId))))
pg_fatal("unexpected column data for table \"%s\"",
@@ -9531,33 +9529,35 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
/* Save data for this table */
tbinfo->numatts = numatts;
- tbinfo->attnames = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->atttypnames = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->attstattarget = (int *) pg_malloc(numatts * sizeof(int));
- tbinfo->attstorage = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->typstorage = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->attidentity = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->attgenerated = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->attisdropped = (bool *) pg_malloc(numatts * sizeof(bool));
- tbinfo->attlen = (int *) pg_malloc(numatts * sizeof(int));
- tbinfo->attalign = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->attislocal = (bool *) pg_malloc(numatts * sizeof(bool));
- tbinfo->attoptions = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->attcollation = (Oid *) pg_malloc(numatts * sizeof(Oid));
- tbinfo->attcompression = (char *) pg_malloc(numatts * sizeof(char));
- tbinfo->attfdwoptions = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->attmissingval = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->notnull_constrs = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->notnull_comment = (char **) pg_malloc(numatts * sizeof(char *));
- tbinfo->notnull_invalid = (bool *) pg_malloc(numatts * sizeof(bool));
- tbinfo->notnull_noinh = (bool *) pg_malloc(numatts * sizeof(bool));
- tbinfo->notnull_islocal = (bool *) pg_malloc(numatts * sizeof(bool));
- tbinfo->attrdefs = (AttrDefInfo **) pg_malloc(numatts * sizeof(AttrDefInfo *));
+ tbinfo->attnames = pg_malloc_array(char *, numatts);
+ tbinfo->atttypnames = pg_malloc_array(char *, numatts);
+ tbinfo->attstattarget = pg_malloc_array(int, numatts);
+ tbinfo->attstorage = pg_malloc_array(char, numatts);
+ tbinfo->typstorage = pg_malloc_array(char, numatts);
+ tbinfo->attidentity = pg_malloc_array(char, numatts);
+ tbinfo->attgenerated = pg_malloc_array(char, numatts);
+ tbinfo->attisdropped = pg_malloc_array(bool, numatts);
+ tbinfo->attlen = pg_malloc_array(int, numatts);
+ tbinfo->attalign = pg_malloc_array(char, numatts);
+ tbinfo->attislocal = pg_malloc_array(bool, numatts);
+ tbinfo->attoptions = pg_malloc_array(char *, numatts);
+ tbinfo->attcollation = pg_malloc_array(Oid, numatts);
+ tbinfo->attcompression = pg_malloc_array(char, numatts);
+ tbinfo->attfdwoptions = pg_malloc_array(char *, numatts);
+ tbinfo->attmissingval = pg_malloc_array(char *, numatts);
+ tbinfo->notnull_constrs = pg_malloc_array(char *, numatts);
+ tbinfo->notnull_comment = pg_malloc_array(char *, numatts);
+ tbinfo->notnull_invalid = pg_malloc_array(bool, numatts);
+ tbinfo->notnull_noinh = pg_malloc_array(bool, numatts);
+ tbinfo->notnull_islocal = pg_malloc_array(bool, numatts);
+ tbinfo->attrdefs = pg_malloc_array(AttrDefInfo *, numatts);
hasdefaults = false;
for (int j = 0; j < numatts; j++, r++)
{
- if (j + 1 != atoi(PQgetvalue(res, r, i_attnum)))
+ if (j + 1 != atoi(PQgetvalue(res, r, i_attnum)) &&
+ !(fout->dopt->binary_upgrade && fout->remoteVersion < 120000 &&
+ tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId))
pg_fatal("invalid column numbering in table \"%s\"",
tbinfo->dobj.name);
tbinfo->attnames[j] = pg_strdup(PQgetvalue(res, r, i_attname));
@@ -9637,7 +9637,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
numDefaults = PQntuples(res);
- attrdefs = (AttrDefInfo *) pg_malloc(numDefaults * sizeof(AttrDefInfo));
+ attrdefs = pg_malloc_array(AttrDefInfo, numDefaults);
curtblindx = -1;
for (int j = 0; j < numDefaults; j++)
@@ -9773,7 +9773,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
numConstrs = PQntuples(res);
- constrs = (ConstraintInfo *) pg_malloc(numConstrs * sizeof(ConstraintInfo));
+ constrs = pg_malloc_array(ConstraintInfo, numConstrs);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -9872,7 +9872,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
numConstrs = PQntuples(res);
- constrs = (ConstraintInfo *) pg_malloc(numConstrs * sizeof(ConstraintInfo));
+ constrs = pg_malloc_array(ConstraintInfo, numConstrs);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10200,7 +10200,7 @@ getTSParsers(Archive *fout)
ntups = PQntuples(res);
- prsinfo = (TSParserInfo *) pg_malloc(ntups * sizeof(TSParserInfo));
+ prsinfo = pg_malloc_array(TSParserInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10267,7 +10267,7 @@ getTSDictionaries(Archive *fout)
ntups = PQntuples(res);
- dictinfo = (TSDictInfo *) pg_malloc(ntups * sizeof(TSDictInfo));
+ dictinfo = pg_malloc_array(TSDictInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10331,7 +10331,7 @@ getTSTemplates(Archive *fout)
ntups = PQntuples(res);
- tmplinfo = (TSTemplateInfo *) pg_malloc(ntups * sizeof(TSTemplateInfo));
+ tmplinfo = pg_malloc_array(TSTemplateInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10390,7 +10390,7 @@ getTSConfigurations(Archive *fout)
ntups = PQntuples(res);
- cfginfo = (TSConfigInfo *) pg_malloc(ntups * sizeof(TSConfigInfo));
+ cfginfo = pg_malloc_array(TSConfigInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10462,7 +10462,7 @@ getForeignDataWrappers(Archive *fout)
ntups = PQntuples(res);
- fdwinfo = (FdwInfo *) pg_malloc(ntups * sizeof(FdwInfo));
+ fdwinfo = pg_malloc_array(FdwInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10545,7 +10545,7 @@ getForeignServers(Archive *fout)
ntups = PQntuples(res);
- srvinfo = (ForeignServerInfo *) pg_malloc(ntups * sizeof(ForeignServerInfo));
+ srvinfo = pg_malloc_array(ForeignServerInfo, ntups);
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
@@ -10643,7 +10643,7 @@ getDefaultACLs(Archive *fout)
ntups = PQntuples(res);
- daclinfo = (DefaultACLInfo *) pg_malloc(ntups * sizeof(DefaultACLInfo));
+ daclinfo = pg_malloc_array(DefaultACLInfo, ntups);
i_oid = PQfnumber(res, "oid");
i_tableoid = PQfnumber(res, "tableoid");
@@ -10742,7 +10742,7 @@ collectRoleNames(Archive *fout)
nrolenames = PQntuples(res);
- rolenames = (RoleNameItem *) pg_malloc(nrolenames * sizeof(RoleNameItem));
+ rolenames = pg_malloc_array(RoleNameItem, nrolenames);
for (i = 0; i < nrolenames; i++)
{
@@ -11619,7 +11619,7 @@ collectComments(Archive *fout)
ntups = PQntuples(res);
- comments = (CommentItem *) pg_malloc(ntups * sizeof(CommentItem));
+ comments = pg_malloc_array(CommentItem, ntups);
ncomments = 0;
dobj = NULL;
@@ -13696,7 +13696,7 @@ dumpFunc(Archive *fout, const FuncInfo *finfo)
if (*protrftypes)
{
- Oid *typeids = pg_malloc(FUNC_MAX_ARGS * sizeof(Oid));
+ Oid *typeids = pg_malloc_array(Oid, FUNC_MAX_ARGS);
int i;
appendPQExpBufferStr(q, " TRANSFORM ");
@@ -16823,7 +16823,7 @@ collectSecLabels(Archive *fout)
ntups = PQntuples(res);
- seclabels = (SecLabelItem *) pg_malloc(ntups * sizeof(SecLabelItem));
+ seclabels = pg_malloc_array(SecLabelItem, ntups);
nseclabels = 0;
dobj = NULL;
@@ -19191,7 +19191,7 @@ collectSequences(Archive *fout)
res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
nsequences = PQntuples(res);
- sequences = (SequenceItem *) pg_malloc(nsequences * sizeof(SequenceItem));
+ sequences = pg_malloc_array(SequenceItem, nsequences);
for (int i = 0; i < nsequences; i++)
{
@@ -19269,7 +19269,7 @@ dumpSequence(Archive *fout, const TableInfo *tbinfo)
PQntuples(res)),
tbinfo->dobj.name, PQntuples(res));
- seq = pg_malloc0(sizeof(SequenceItem));
+ seq = pg_malloc0_object(SequenceItem);
seq->seqtype = parse_sequence_type(PQgetvalue(res, 0, 0));
seq->startv = strtoi64(PQgetvalue(res, 0, 1), NULL, 10);
seq->incby = strtoi64(PQgetvalue(res, 0, 2), NULL, 10);
@@ -20373,7 +20373,7 @@ createBoundaryObjects(void)
{
DumpableObject *dobjs;
- dobjs = (DumpableObject *) pg_malloc(2 * sizeof(DumpableObject));
+ dobjs = pg_malloc_array(DumpableObject, 2);
dobjs[0].objType = DO_PRE_DATA_BOUNDARY;
dobjs[0].catId = nilCatalogId;
@@ -20547,7 +20547,7 @@ BuildArchiveDependencies(Archive *fout)
continue;
/* Set up work array */
allocDeps = 64;
- dependencies = (DumpId *) pg_malloc(allocDeps * sizeof(DumpId));
+ dependencies = pg_malloc_array(DumpId, allocDeps);
nDeps = 0;
/* Recursively find all dumpable dependencies */
findDumpableDependencies(AH, dobj,
@@ -20555,8 +20555,7 @@ BuildArchiveDependencies(Archive *fout)
/* And save 'em ... */
if (nDeps > 0)
{
- dependencies = (DumpId *) pg_realloc(dependencies,
- nDeps * sizeof(DumpId));
+ dependencies = pg_realloc_array(dependencies, DumpId, nDeps);
te->dependencies = dependencies;
te->nDeps = nDeps;
}
@@ -20590,8 +20589,7 @@ findDumpableDependencies(ArchiveHandle *AH, const DumpableObject *dobj,
if (*nDeps >= *allocDeps)
{
*allocDeps *= 2;
- *dependencies = (DumpId *) pg_realloc(*dependencies,
- *allocDeps * sizeof(DumpId));
+ *dependencies = pg_realloc_array(*dependencies, DumpId, *allocDeps);
}
(*dependencies)[*nDeps] = depid;
(*nDeps)++;
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 4c4b14e5fc7..6deceef23f3 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -722,6 +722,7 @@ typedef struct _SubscriptionInfo
char *subconninfo;
char *subslotname;
char *subsynccommit;
+ char *subwalrcvtimeout;
char *subpublications;
char *suborigin;
char *suboriginremotelsn;
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 24bed6681de..03e5c1c1116 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -572,7 +572,7 @@ sortDumpableObjects(DumpableObject **objs, int numObjs,
preDataBoundId = preBoundaryId;
postDataBoundId = postBoundaryId;
- ordering = (DumpableObject **) pg_malloc(numObjs * sizeof(DumpableObject *));
+ ordering = pg_malloc_array(DumpableObject *, numObjs);
while (!TopoSort(objs, numObjs, ordering, &nOrdering))
findDependencyLoops(ordering, nOrdering, numObjs);
@@ -651,8 +651,8 @@ TopoSort(DumpableObject **objs,
* We also make a map showing the input-order index of the item with
* dumpId j.
*/
- beforeConstraints = (int *) pg_malloc0((maxDumpId + 1) * sizeof(int));
- idMap = (int *) pg_malloc((maxDumpId + 1) * sizeof(int));
+ beforeConstraints = pg_malloc0_array(int, (maxDumpId + 1));
+ idMap = pg_malloc_array(int, (maxDumpId + 1));
for (i = 0; i < numObjs; i++)
{
obj = objs[i];
@@ -787,9 +787,9 @@ findDependencyLoops(DumpableObject **objs, int nObjs, int totObjs)
bool fixedloop;
int i;
- processed = (bool *) pg_malloc0((getMaxDumpId() + 1) * sizeof(bool));
- searchFailed = (DumpId *) pg_malloc0((getMaxDumpId() + 1) * sizeof(DumpId));
- workspace = (DumpableObject **) pg_malloc(totObjs * sizeof(DumpableObject *));
+ processed = pg_malloc0_array(bool, (getMaxDumpId() + 1));
+ searchFailed = pg_malloc0_array(DumpId, (getMaxDumpId() + 1));
+ workspace = pg_malloc_array(DumpableObject *, totObjs);
fixedloop = false;
for (i = 0; i < nObjs; i++)
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 30fecd0c252..98389d2034c 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -1140,7 +1140,7 @@ dumpRoleMembership(PGconn *conn)
}
remaining = end - start;
- done = pg_malloc0(remaining * sizeof(bool));
+ done = pg_malloc0_array(bool, remaining);
ht = rolename_create(remaining, NULL);
/*
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index b2c4b9db395..85dc43d4cdb 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -913,10 +913,10 @@ RewriteControlFile(void)
ControlFile.state = DB_SHUTDOWNED;
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
- ControlFile.minRecoveryPoint = 0;
+ ControlFile.minRecoveryPoint = InvalidXLogRecPtr;
ControlFile.minRecoveryPointTLI = 0;
- ControlFile.backupStartPoint = 0;
- ControlFile.backupEndPoint = 0;
+ ControlFile.backupStartPoint = InvalidXLogRecPtr;
+ ControlFile.backupEndPoint = InvalidXLogRecPtr;
ControlFile.backupEndRequired = false;
/*
@@ -1077,6 +1077,8 @@ KillExistingArchiveStatus(void)
if (closedir(xldir))
pg_fatal("could not close directory \"%s\": %m", ARCHSTATDIR);
+
+#undef ARCHSTATDIR
}
/*
@@ -1111,7 +1113,10 @@ KillExistingWALSummaries(void)
pg_fatal("could not read directory \"%s\": %m", WALSUMMARYDIR);
if (closedir(xldir))
- pg_fatal("could not close directory \"%s\": %m", ARCHSTATDIR);
+ pg_fatal("could not close directory \"%s\": %m", WALSUMMARYDIR);
+
+#undef WALSUMMARY_NHEXCHARS
+#undef WALSUMMARYDIR
}
/*
@@ -1147,7 +1152,7 @@ WriteEmptyXLOG(void)
/* Insert the initial checkpoint record */
recptr = (char *) page + SizeOfXLogLongPHD;
record = (XLogRecord *) recptr;
- record->xl_prev = 0;
+ record->xl_prev = InvalidXLogRecPtr;
record->xl_xid = InvalidTransactionId;
record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 356e23a3080..5cfb676f41f 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -327,7 +327,7 @@ slurpFile(const char *datadir, const char *path, size_t *filesize)
fullpath);
if (fstat(fd, &statbuf) < 0)
- pg_fatal("could not open file \"%s\" for reading: %m",
+ pg_fatal("could not stat file \"%s\": %m",
fullpath);
len = statbuf.st_size;
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 31693843b3c..d0aafd7e7a6 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -377,7 +377,7 @@ main(int argc, char **argv)
{
pg_log_info("source and target cluster are on the same timeline");
rewind_needed = false;
- target_wal_endrec = 0;
+ target_wal_endrec = InvalidXLogRecPtr;
}
else
{
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index a8d20a92a98..5c73773bf0e 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -622,7 +622,7 @@ check_and_dump_old_cluster(void)
{
/*
* Logical replication slots can be migrated since PG17. See comments
- * atop get_old_cluster_logical_slot_infos().
+ * in get_db_rel_and_slot_infos().
*/
check_old_cluster_for_valid_slots();
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index 47e8d1039a2..ad4b1530e6d 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -29,7 +29,7 @@ static void free_rel_infos(RelInfoArr *rel_arr);
static void print_db_infos(DbInfoArr *db_arr);
static void print_rel_infos(RelInfoArr *rel_arr);
static void print_slot_infos(LogicalSlotInfoArr *slot_arr);
-static char *get_old_cluster_logical_slot_infos_query(void);
+static const char *get_old_cluster_logical_slot_infos_query(ClusterInfo *cluster);
static void process_old_cluster_logical_slot_infos(DbInfo *dbinfo, PGresult *res, void *arg);
@@ -281,7 +281,6 @@ get_db_rel_and_slot_infos(ClusterInfo *cluster)
{
UpgradeTask *task = upgrade_task_create();
char *rel_infos_query = NULL;
- char *logical_slot_infos_query = NULL;
if (cluster->dbarr.dbs != NULL)
free_db_and_rel_infos(&cluster->dbarr);
@@ -306,20 +305,15 @@ get_db_rel_and_slot_infos(ClusterInfo *cluster)
*/
if (cluster == &old_cluster &&
GET_MAJOR_VERSION(cluster->major_version) > 1600)
- {
- logical_slot_infos_query = get_old_cluster_logical_slot_infos_query();
upgrade_task_add_step(task,
- logical_slot_infos_query,
+ get_old_cluster_logical_slot_infos_query(cluster),
process_old_cluster_logical_slot_infos,
true, NULL);
- }
upgrade_task_run(task, cluster);
upgrade_task_free(task);
pg_free(rel_infos_query);
- if (logical_slot_infos_query)
- pg_free(logical_slot_infos_query);
if (cluster == &old_cluster)
pg_log(PG_VERBOSE, "\nsource databases:");
@@ -681,17 +675,15 @@ process_rel_infos(DbInfo *dbinfo, PGresult *res, void *arg)
* get_db_rel_and_slot_infos()'s UpgradeTask. The status of each logical slot
* is checked in check_old_cluster_for_valid_slots().
*/
-static char *
-get_old_cluster_logical_slot_infos_query(void)
+static const char *
+get_old_cluster_logical_slot_infos_query(ClusterInfo *cluster)
{
/*
* Fetch the logical replication slot information. The check whether the
* slot is considered caught up is done by an upgrade function. This
* regards the slot as caught up if we don't find any decodable changes.
- * See binary_upgrade_logical_slot_has_caught_up().
- *
- * Note that we can't ensure whether the slot is caught up during
- * live_check as the new WAL records could be generated.
+ * The implementation of this check varies depending on the server
+ * version.
*
* We intentionally skip checking the WALs for invalidated slots as the
* corresponding WALs could have been removed for such slots.
@@ -701,21 +693,80 @@ get_old_cluster_logical_slot_infos_query(void)
* started and stopped several times causing any temporary slots to be
* removed.
*/
- return psprintf("SELECT slot_name, plugin, two_phase, failover, "
- "%s as caught_up, invalidation_reason IS NOT NULL as invalid "
- "FROM pg_catalog.pg_replication_slots "
- "WHERE slot_type = 'logical' AND "
- "database = current_database() AND "
- "temporary IS FALSE;",
- user_opts.live_check ? "FALSE" :
- "(CASE WHEN invalidation_reason IS NOT NULL THEN FALSE "
- "ELSE (SELECT pg_catalog.binary_upgrade_logical_slot_has_caught_up(slot_name)) "
- "END)");
+
+ if (user_opts.live_check)
+ {
+ /*
+ * We skip the caught-up check during live_check. We cannot verify
+ * whether the slot is caught up in this mode, as new WAL records
+ * could be generated concurrently.
+ */
+ return "SELECT slot_name, plugin, two_phase, failover, "
+ "FALSE as caught_up, "
+ "invalidation_reason IS NOT NULL as invalid "
+ "FROM pg_catalog.pg_replication_slots "
+ "WHERE slot_type = 'logical' AND "
+ "database = current_database() AND "
+ "temporary IS FALSE";
+ }
+ else if (GET_MAJOR_VERSION(cluster->major_version) >= 1900)
+ {
+ /*
+ * For PG19 and later, we optimize the slot caught-up check to avoid
+ * reading the same WAL stream multiple times: execute the caught-up
+ * check only for the slot with the minimum confirmed_flush_lsn, and
+ * apply the same result to all other slots in the same database. This
+ * limits the check to at most one logical slot per database. We also
+ * use the maximum confirmed_flush_lsn among all logical slots on the
+ * database as an early scan cutoff; finding a decodable WAL record
+ * beyond this point implies that no slot has caught up.
+ *
+ * Note that we don't distinguish slots based on their output plugin.
+ * If a plugin applies replication origin filters, we might get a
+ * false positive (i.e., erroneously considering a slot caught up).
+ * However, such cases are very rare, and the impact of a false
+ * positive is minimal.
+ */
+ return "WITH check_caught_up AS ( "
+ " SELECT pg_catalog.binary_upgrade_check_logical_slot_pending_wal(slot_name, "
+ " MAX(confirmed_flush_lsn) OVER ()) as last_pending_wal "
+ " FROM pg_replication_slots "
+ " WHERE slot_type = 'logical' AND "
+ " database = current_database() AND "
+ " temporary IS FALSE AND "
+ " invalidation_reason IS NULL "
+ " ORDER BY confirmed_flush_lsn ASC "
+ " LIMIT 1 "
+ ") "
+ "SELECT slot_name, plugin, two_phase, failover, "
+ "CASE WHEN invalidation_reason IS NOT NULL THEN FALSE "
+ "ELSE last_pending_wal IS NULL OR "
+ " confirmed_flush_lsn > last_pending_wal "
+ "END as caught_up, "
+ "invalidation_reason IS NOT NULL as invalid "
+ "FROM pg_catalog.pg_replication_slots, check_caught_up "
+ "WHERE slot_type = 'logical' AND "
+ "database = current_database() AND "
+ "temporary IS FALSE ";
+ }
+
+ /*
+ * For PG18 and earlier, we call
+ * binary_upgrade_logical_slot_has_caught_up() for each logical slot.
+ */
+ return "SELECT slot_name, plugin, two_phase, failover, "
+ "CASE WHEN invalidation_reason IS NOT NULL THEN FALSE "
+ "ELSE (SELECT pg_catalog.binary_upgrade_logical_slot_has_caught_up(slot_name)) "
+ "END as caught_up, "
+ "invalidation_reason IS NOT NULL as invalid "
+ "FROM pg_catalog.pg_replication_slots "
+ "WHERE slot_type = 'logical' AND "
+ "database = current_database() AND "
+ "temporary IS FALSE ";
}
/*
- * Callback function for processing results of the query returned by
- * get_old_cluster_logical_slot_infos_query(), which is used for
+ * Callback function for processing results of the query, which is used for
* get_db_rel_and_slot_infos()'s UpgradeTask. This function stores the logical
* slot information for later use.
*/
@@ -768,7 +819,7 @@ process_old_cluster_logical_slot_infos(DbInfo *dbinfo, PGresult *res, void *arg)
*
* Note: this function always returns 0 if the old_cluster is PG16 and prior
* because we gather slot information only for cluster versions greater than or
- * equal to PG17. See get_old_cluster_logical_slot_infos().
+ * equal to PG17. See get_db_rel_and_slot_infos().
*/
int
count_old_cluster_logical_slots(void)
diff --git a/src/bin/pg_upgrade/t/003_logical_slots.pl b/src/bin/pg_upgrade/t/003_logical_slots.pl
index b9abc3a2e21..15e6d267f2f 100644
--- a/src/bin/pg_upgrade/t/003_logical_slots.pl
+++ b/src/bin/pg_upgrade/t/003_logical_slots.pl
@@ -64,6 +64,7 @@
'postgres', qq[
SELECT pg_create_logical_replication_slot('test_slot1', 'test_decoding');
SELECT pg_create_logical_replication_slot('test_slot2', 'test_decoding');
+ SELECT pg_create_logical_replication_slot('test_slot3', 'test_decoding');
]);
$oldpub->stop();
@@ -77,7 +78,7 @@
[@pg_upgrade_cmd],
1,
[
- qr/"max_replication_slots" \(1\) must be greater than or equal to the number of logical replication slots \(2\) on the old cluster/
+ qr/"max_replication_slots" \(1\) must be greater than or equal to the number of logical replication slots \(3\) on the old cluster/
],
[qr//],
'run of pg_upgrade where the new cluster has insufficient "max_replication_slots"'
@@ -85,29 +86,31 @@
ok(-d $newpub->data_dir . "/pg_upgrade_output.d",
"pg_upgrade_output.d/ not removed after pg_upgrade failure");
-# Set 'max_replication_slots' to match the number of slots (2) present on the
+# Set 'max_replication_slots' to match the number of slots (3) present on the
# old cluster. Both slots will be used for subsequent tests.
-$newpub->append_conf('postgresql.conf', "max_replication_slots = 2");
+$newpub->append_conf('postgresql.conf', "max_replication_slots = 3");
# ------------------------------
# TEST: Confirm pg_upgrade fails when the slot still has unconsumed WAL records
# Preparations for the subsequent test:
-# 1. Generate extra WAL records. At this point neither test_slot1 nor
-# test_slot2 has consumed them.
+# 1. Generate extra WAL records. At this point none of the slots has consumed them.
#
# 2. Advance the slot test_slot2 up to the current WAL location, but test_slot1
# still has unconsumed WAL records.
#
# 3. Emit a non-transactional message. This will cause test_slot2 to detect the
# unconsumed WAL record.
+#
+# 4. Advance the slot test_slots3 up to the current WAL location.
$oldpub->start;
$oldpub->safe_psql(
'postgres', qq[
CREATE TABLE tbl AS SELECT generate_series(1, 10) AS a;
SELECT pg_replication_slot_advance('test_slot2', pg_current_wal_lsn());
- SELECT count(*) FROM pg_logical_emit_message('false', 'prefix', 'This is a non-transactional message');
+ SELECT count(*) FROM pg_logical_emit_message('false', 'prefix', 'This is a non-transactional message', true);
+ SELECT pg_replication_slot_advance('test_slot3', pg_current_wal_lsn());
]);
$oldpub->stop;
@@ -138,8 +141,9 @@
},
$newpub->data_dir . "/pg_upgrade_output.d");
-# Check the file content. Both slots should be reporting that they have
-# unconsumed WAL records.
+# Check the file content. While both test_slot1 and test_slot2 should be reporting
+# that they have unconsumed WAL records, test_slot3 should not be reported as
+# it has caught up.
like(
slurp_file($slots_filename),
qr/The slot \"test_slot1\" has not consumed the WAL yet/m,
@@ -148,6 +152,10 @@
slurp_file($slots_filename),
qr/The slot \"test_slot2\" has not consumed the WAL yet/m,
'the previous test failed due to unconsumed WALs');
+unlike(
+ slurp_file($slots_filename),
+ qr/test_slot3/m,
+ 'caught-up slot is not reported');
# ------------------------------
@@ -162,6 +170,7 @@
'postgres', qq[
SELECT * FROM pg_drop_replication_slot('test_slot1');
SELECT * FROM pg_drop_replication_slot('test_slot2');
+ SELECT * FROM pg_drop_replication_slot('test_slot3');
CREATE PUBLICATION regress_pub FOR ALL TABLES;
]);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 58735871c17..cb4e986092e 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -3394,7 +3394,7 @@ readCommandResponse(CState *st, MetaCommand meta, char *varprefix)
commandError(st, PQresultErrorMessage(res));
goto error;
}
- /* fall through */
+ pg_fallthrough;
default:
/* anything else is unexpected */
@@ -3607,7 +3607,7 @@ getTransactionStatus(PGconn *con)
/* PQTRANS_UNKNOWN is expected given a broken connection */
if (PQstatus(con) == CONNECTION_BAD)
return TSTATUS_CONN_ERROR;
- /* fall through */
+ pg_fallthrough;
case PQTRANS_ACTIVE:
default:
@@ -5720,7 +5720,7 @@ postprocess_sql_command(Command *my_command)
break;
case QUERY_PREPARED:
my_command->prepname = psprintf("P_%d", prepnum++);
- /* fall through */
+ pg_fallthrough;
case QUERY_EXTENDED:
if (!parseQuery(my_command))
exit(1);
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 3584c4e1428..571a6a003d5 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -6806,7 +6806,7 @@ describeSubscriptions(const char *pattern, bool verbose)
printQueryOpt myopt = pset.popt;
static const bool translate_columns[] = {false, false, false, false,
false, false, false, false, false, false, false, false, false, false,
- false, false, false, false};
+ false, false, false, false, false};
if (pset.sversion < 100000)
{
@@ -6895,6 +6895,11 @@ describeSubscriptions(const char *pattern, bool verbose)
gettext_noop("Synchronous commit"),
gettext_noop("Conninfo"));
+ if (pset.sversion >= 190000)
+ appendPQExpBuffer(&buf,
+ ", subwalrcvtimeout AS \"%s\"\n",
+ gettext_noop("Receiver timeout"));
+
/* Skip LSN is only supported in v15 and higher */
if (pset.sversion >= 150000)
appendPQExpBuffer(&buf,
diff --git a/src/bin/psql/prompt.c b/src/bin/psql/prompt.c
index 891cd6374f0..9725d53dfe7 100644
--- a/src/bin/psql/prompt.c
+++ b/src/bin/psql/prompt.c
@@ -44,6 +44,8 @@
* or a ! if session is not connected to a database;
* in prompt2 -, *, ', or ";
* in prompt3 nothing
+ * %i - "standby" or "primary" depending on the server's in_hot_standby
+ * status, or "?" if unavailable (empty if unknown)
* %x - transaction status: empty, *, !, ? (unknown or no connection)
* %l - The line number inside the current statement, starting from 1.
* %? - the error code of the last query (not yet implemented)
@@ -258,7 +260,23 @@ get_prompt(promptStatus_t status, ConditionalStack cstack)
break;
}
break;
+ case 'i':
+ if (pset.db)
+ {
+ const char *hs = PQparameterStatus(pset.db, "in_hot_standby");
+ if (hs)
+ {
+ if (strcmp(hs, "on") == 0)
+ strlcpy(buf, "standby", sizeof(buf));
+ else
+ strlcpy(buf, "primary", sizeof(buf));
+ }
+ /* Use ? for versions that don't report in_hot_standby */
+ else
+ buf[0] = '?';
+ }
+ break;
case 'x':
if (!pset.db)
buf[0] = '?';
diff --git a/src/bin/psql/t/010_tab_completion.pl b/src/bin/psql/t/010_tab_completion.pl
index 7104aba2394..1d2e5f5b92a 100644
--- a/src/bin/psql/t/010_tab_completion.pl
+++ b/src/bin/psql/t/010_tab_completion.pl
@@ -77,8 +77,10 @@
# for possible debugging purposes.
my $historyfile = "${PostgreSQL::Test::Utils::log_path}/010_psql_history.txt";
-# fire up an interactive psql session
+# fire up an interactive psql session and configure it such that each query
+# restarts the timer
my $h = $node->interactive_psql('postgres', history_file => $historyfile);
+$h->set_query_timer_restart();
# Simple test case: type something and see if psql responds as expected
sub check_completion
@@ -88,9 +90,6 @@ sub check_completion
# report test failures from caller location
local $Test::Builder::Level = $Test::Builder::Level + 1;
- # restart per-command timer
- $h->{timeout}->start($PostgreSQL::Test::Utils::timeout_default);
-
# send the data to be sent and wait for its result
my $out = $h->query_until($pattern, $send);
my $okay = ($out =~ $pattern && !$h->{timeout}->is_expired);
diff --git a/src/bin/psql/t/030_pager.pl b/src/bin/psql/t/030_pager.pl
index cf81fb1603c..d3f964639d3 100644
--- a/src/bin/psql/t/030_pager.pl
+++ b/src/bin/psql/t/030_pager.pl
@@ -40,8 +40,40 @@
$node->init;
$node->start;
-# fire up an interactive psql session
+# create a view we'll use below
+$node->safe_psql(
+ 'postgres', 'create view public.view_030_pager as select
+1 as a,
+2 as b,
+3 as c,
+4 as d,
+5 as e,
+6 as f,
+7 as g,
+8 as h,
+9 as i,
+10 as j,
+11 as k,
+12 as l,
+13 as m,
+14 as n,
+15 as o,
+16 as p,
+17 as q,
+18 as r,
+19 as s,
+20 as t,
+21 as u,
+22 as v,
+23 as w,
+24 as x,
+25 as y,
+26 as z');
+
+# fire up an interactive psql session and configure it such that each query
+# restarts the timer
my $h = $node->interactive_psql('postgres');
+$h->set_query_timer_restart();
# set the pty's window size to known values
# (requires undesirable chumminess with the innards of IPC::Run)
@@ -58,9 +90,6 @@ sub do_command
# report test failures from caller location
local $Test::Builder::Level = $Test::Builder::Level + 1;
- # restart per-command timer
- $h->{timeout}->start($PostgreSQL::Test::Utils::timeout_default);
-
# send the data to be sent and wait for its result
my $out = $h->query_until($pattern, $send);
my $okay = ($out =~ $pattern && !$h->{timeout}->is_expired);
@@ -77,25 +106,28 @@ sub do_command
#
# Note that interactive_psql starts psql with --no-align --tuples-only,
# and that the output string will include psql's prompts and command echo.
+# So we have to test for patterns that can't match the command itself,
+# and we can't assume the match will extend across a whole line (there
+# might be a prompt ahead of it in the output).
do_command(
"SELECT 'test' AS t FROM generate_series(1,23);\n",
- qr/^test\r?$/m,
+ qr/test\r?$/m,
"execute SELECT query that needs no pagination");
do_command(
"SELECT 'test' AS t FROM generate_series(1,24);\n",
- qr/^ *24\r?$/m,
+ qr/24\r?$/m,
"execute SELECT query that needs pagination");
do_command(
"\\pset expanded\nSELECT generate_series(1,20) as g;\n",
- qr/^ *39\r?$/m,
+ qr/39\r?$/m,
"execute SELECT query that needs pagination in expanded mode");
do_command(
- "\\pset tuples_only off\n\\d+ information_schema.referential_constraints\n",
- qr/^ *\d+\r?$/m,
+ "\\pset tuples_only off\n\\d+ public.view_030_pager\n",
+ qr/55\r?$/m,
"execute command with footer that needs pagination");
# send psql an explicit \q to shut it down, else pty won't close properly
diff --git a/src/common/hashfn.c b/src/common/hashfn.c
index 0efe95568c0..c7a0626f96f 100644
--- a/src/common/hashfn.c
+++ b/src/common/hashfn.c
@@ -178,13 +178,13 @@ hash_bytes(const unsigned char *k, int keylen)
{
case 11:
c += ((uint32) k[10] << 8);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 24);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ka[1];
@@ -192,22 +192,22 @@ hash_bytes(const unsigned char *k, int keylen)
break;
case 7:
b += ((uint32) k[6] << 8);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 16);
- /* fall through */
+ pg_fallthrough;
case 5:
b += ((uint32) k[4] << 24);
- /* fall through */
+ pg_fallthrough;
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 8);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 16);
- /* fall through */
+ pg_fallthrough;
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
@@ -217,13 +217,13 @@ hash_bytes(const unsigned char *k, int keylen)
{
case 11:
c += ((uint32) k[10] << 24);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 8);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ka[1];
@@ -231,22 +231,22 @@ hash_bytes(const unsigned char *k, int keylen)
break;
case 7:
b += ((uint32) k[6] << 16);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 8);
- /* fall through */
+ pg_fallthrough;
case 5:
b += k[4];
- /* fall through */
+ pg_fallthrough;
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 16);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 8);
- /* fall through */
+ pg_fallthrough;
case 1:
a += k[0];
/* case 0: nothing left to add */
@@ -280,35 +280,35 @@ hash_bytes(const unsigned char *k, int keylen)
{
case 11:
c += ((uint32) k[10] << 8);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 24);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += k[7];
- /* fall through */
+ pg_fallthrough;
case 7:
b += ((uint32) k[6] << 8);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 16);
- /* fall through */
+ pg_fallthrough;
case 5:
b += ((uint32) k[4] << 24);
- /* fall through */
+ pg_fallthrough;
case 4:
a += k[3];
- /* fall through */
+ pg_fallthrough;
case 3:
a += ((uint32) k[2] << 8);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 16);
- /* fall through */
+ pg_fallthrough;
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
@@ -318,35 +318,35 @@ hash_bytes(const unsigned char *k, int keylen)
{
case 11:
c += ((uint32) k[10] << 24);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 8);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ((uint32) k[7] << 24);
- /* fall through */
+ pg_fallthrough;
case 7:
b += ((uint32) k[6] << 16);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 8);
- /* fall through */
+ pg_fallthrough;
case 5:
b += k[4];
- /* fall through */
+ pg_fallthrough;
case 4:
a += ((uint32) k[3] << 24);
- /* fall through */
+ pg_fallthrough;
case 3:
a += ((uint32) k[2] << 16);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 8);
- /* fall through */
+ pg_fallthrough;
case 1:
a += k[0];
/* case 0: nothing left to add */
@@ -417,13 +417,13 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
{
case 11:
c += ((uint32) k[10] << 8);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 24);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ka[1];
@@ -431,22 +431,22 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
break;
case 7:
b += ((uint32) k[6] << 8);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 16);
- /* fall through */
+ pg_fallthrough;
case 5:
b += ((uint32) k[4] << 24);
- /* fall through */
+ pg_fallthrough;
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 8);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 16);
- /* fall through */
+ pg_fallthrough;
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
@@ -456,13 +456,13 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
{
case 11:
c += ((uint32) k[10] << 24);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 8);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ka[1];
@@ -470,22 +470,22 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
break;
case 7:
b += ((uint32) k[6] << 16);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 8);
- /* fall through */
+ pg_fallthrough;
case 5:
b += k[4];
- /* fall through */
+ pg_fallthrough;
case 4:
a += ka[0];
break;
case 3:
a += ((uint32) k[2] << 16);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 8);
- /* fall through */
+ pg_fallthrough;
case 1:
a += k[0];
/* case 0: nothing left to add */
@@ -519,35 +519,35 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
{
case 11:
c += ((uint32) k[10] << 8);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 24);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += k[7];
- /* fall through */
+ pg_fallthrough;
case 7:
b += ((uint32) k[6] << 8);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 16);
- /* fall through */
+ pg_fallthrough;
case 5:
b += ((uint32) k[4] << 24);
- /* fall through */
+ pg_fallthrough;
case 4:
a += k[3];
- /* fall through */
+ pg_fallthrough;
case 3:
a += ((uint32) k[2] << 8);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 16);
- /* fall through */
+ pg_fallthrough;
case 1:
a += ((uint32) k[0] << 24);
/* case 0: nothing left to add */
@@ -557,35 +557,35 @@ hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
{
case 11:
c += ((uint32) k[10] << 24);
- /* fall through */
+ pg_fallthrough;
case 10:
c += ((uint32) k[9] << 16);
- /* fall through */
+ pg_fallthrough;
case 9:
c += ((uint32) k[8] << 8);
- /* fall through */
+ pg_fallthrough;
case 8:
/* the lowest byte of c is reserved for the length */
b += ((uint32) k[7] << 24);
- /* fall through */
+ pg_fallthrough;
case 7:
b += ((uint32) k[6] << 16);
- /* fall through */
+ pg_fallthrough;
case 6:
b += ((uint32) k[5] << 8);
- /* fall through */
+ pg_fallthrough;
case 5:
b += k[4];
- /* fall through */
+ pg_fallthrough;
case 4:
a += ((uint32) k[3] << 24);
- /* fall through */
+ pg_fallthrough;
case 3:
a += ((uint32) k[2] << 16);
- /* fall through */
+ pg_fallthrough;
case 2:
a += ((uint32) k[1] << 8);
- /* fall through */
+ pg_fallthrough;
case 1:
a += k[0];
/* case 0: nothing left to add */
diff --git a/src/common/wchar.c b/src/common/wchar.c
index 5631e2c9363..e7b6595b042 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -63,6 +63,9 @@
* subset to the ASCII routines to ensure consistency.
*/
+/* No error-reporting facility. Ignore incomplete trailing byte sequence. */
+#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break
+
/*
* SQL/ASCII
*/
@@ -108,22 +111,24 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte
- * KANA") */
+ if (*from == SS2) /* JIS X 0201 (so called "1 byte KANA") */
{
+ MB2CHAR_NEED_AT_LEAST(len, 2);
from++;
*to = (SS2 << 8) | *from++;
len -= 2;
}
- else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
+ else if (*from == SS3) /* JIS X 0212 KANJI */
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
- else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */
+ else if (IS_HIGHBIT_SET(*from)) /* JIS X 0208 KANJI */
{
+ MB2CHAR_NEED_AT_LEAST(len, 2);
*to = *from++ << 8;
*to |= *from++;
len -= 2;
@@ -235,22 +240,25 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
+ if (*from == SS2) /* code set 2 (unused?) */
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = (SS2 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
- else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */
+ else if (*from == SS3) /* code set 3 (unused ?) */
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
- else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */
+ else if (IS_HIGHBIT_SET(*from)) /* code set 1 */
{
+ MB2CHAR_NEED_AT_LEAST(len, 2);
*to = *from++ << 8;
*to |= *from++;
len -= 2;
@@ -267,12 +275,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
return cnt;
}
+/*
+ * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for
+ * EUC_CN), but mb2wchar_with_len does. Tell a coherent story for code that
+ * relies on agreement between mb2wchar_with_len and mblen. Invalid text
+ * datums (e.g. from shared catalogs) reach this.
+ */
static int
pg_euccn_mblen(const unsigned char *s)
{
int len;
- if (IS_HIGHBIT_SET(*s))
+ if (*s == SS2)
+ len = 3;
+ else if (*s == SS3)
+ len = 3;
+ else if (IS_HIGHBIT_SET(*s))
len = 2;
else
len = 1;
@@ -302,23 +320,26 @@ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 4) /* code set 2 */
+ if (*from == SS2) /* code set 2 */
{
+ MB2CHAR_NEED_AT_LEAST(len, 4);
from++;
*to = (((uint32) SS2) << 24) | (*from++ << 16);
*to |= *from++ << 8;
*to |= *from++;
len -= 4;
}
- else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
+ else if (*from == SS3) /* code set 3 (unused?) */
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = (SS3 << 16) | (*from++ << 8);
*to |= *from++;
len -= 3;
}
- else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */
+ else if (IS_HIGHBIT_SET(*from)) /* code set 2 */
{
+ MB2CHAR_NEED_AT_LEAST(len, 2);
*to = *from++ << 8;
*to |= *from++;
len -= 2;
@@ -455,8 +476,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
}
else if ((*from & 0xe0) == 0xc0)
{
- if (len < 2)
- break; /* drop trailing incomplete char */
+ MB2CHAR_NEED_AT_LEAST(len, 2);
c1 = *from++ & 0x1f;
c2 = *from++ & 0x3f;
*to = (c1 << 6) | c2;
@@ -464,8 +484,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
}
else if ((*from & 0xf0) == 0xe0)
{
- if (len < 3)
- break; /* drop trailing incomplete char */
+ MB2CHAR_NEED_AT_LEAST(len, 3);
c1 = *from++ & 0x0f;
c2 = *from++ & 0x3f;
c3 = *from++ & 0x3f;
@@ -474,8 +493,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
}
else if ((*from & 0xf8) == 0xf0)
{
- if (len < 4)
- break; /* drop trailing incomplete char */
+ MB2CHAR_NEED_AT_LEAST(len, 4);
c1 = *from++ & 0x07;
c2 = *from++ & 0x3f;
c3 = *from++ & 0x3f;
@@ -677,28 +695,32 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
while (len > 0 && *from)
{
- if (IS_LC1(*from) && len >= 2)
+ if (IS_LC1(*from))
{
+ MB2CHAR_NEED_AT_LEAST(len, 2);
*to = *from++ << 16;
*to |= *from++;
len -= 2;
}
- else if (IS_LCPRV1(*from) && len >= 3)
+ else if (IS_LCPRV1(*from))
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = *from++ << 16;
*to |= *from++;
len -= 3;
}
- else if (IS_LC2(*from) && len >= 3)
+ else if (IS_LC2(*from))
{
+ MB2CHAR_NEED_AT_LEAST(len, 3);
*to = *from++ << 16;
*to |= *from++ << 8;
*to |= *from++;
len -= 3;
}
- else if (IS_LCPRV2(*from) && len >= 4)
+ else if (IS_LCPRV2(*from))
{
+ MB2CHAR_NEED_AT_LEAST(len, 4);
from++;
*to = *from++ << 16;
*to |= *from++ << 8;
@@ -1999,12 +2021,12 @@ pg_utf8_islegal(const unsigned char *source, int length)
a = source[3];
if (a < 0x80 || a > 0xBF)
return false;
- /* FALL THRU */
+ pg_fallthrough;
case 3:
a = source[2];
if (a < 0x80 || a > 0xBF)
return false;
- /* FALL THRU */
+ pg_fallthrough;
case 2:
a = source[1];
switch (*source)
@@ -2030,7 +2052,7 @@ pg_utf8_islegal(const unsigned char *source, int length)
return false;
break;
}
- /* FALL THRU */
+ pg_fallthrough;
case 1:
a = *source;
if (a >= 0x80 && a < 0xC2)
@@ -2064,7 +2086,7 @@ pg_encoding_set_invalid(int encoding, char *dst)
const pg_wchar_tbl pg_wchar_table[] = {
[PG_SQL_ASCII] = {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1},
[PG_EUC_JP] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
- [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2},
+ [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3},
[PG_EUC_KR] = {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3},
[PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},
[PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
diff --git a/src/include/Makefile b/src/include/Makefile
index 4ef060e9050..ac673f4cf17 100644
--- a/src/include/Makefile
+++ b/src/include/Makefile
@@ -105,6 +105,7 @@ uninstall:
clean:
rm -f utils/fmgroids.h utils/fmgrprotos.h utils/guc_tables.inc.c utils/errcodes.h utils/header-stamp
+ rm -f utils/pgstat_wait_event.c utils/wait_event_funcs_data.c
rm -f storage/lwlocknames.h utils/probes.h utils/wait_event_types.h
rm -f nodes/nodetags.h nodes/header-stamp
$(MAKE) -C catalog clean
diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h
index 6db3a29191e..fbd98181a3a 100644
--- a/src/include/access/detoast.h
+++ b/src/include/access/detoast.h
@@ -14,7 +14,7 @@
/*
* Macro to fetch the possibly-unaligned contents of an EXTERNAL datum
- * into a local "struct varatt_external" toast pointer. This should be
+ * into a local "varatt_external" toast pointer. This should be
* just a memcpy, but some versions of gcc seem to produce broken code
* that assumes the datum contents are aligned. Introducing an explicit
* intermediate "varattrib_1b_e *" variable seems to fix it.
@@ -41,7 +41,7 @@ do { \
* in compressed format.
* ----------
*/
-extern struct varlena *detoast_external_attr(struct varlena *attr);
+extern varlena *detoast_external_attr(varlena *attr);
/* ----------
* detoast_attr() -
@@ -50,7 +50,7 @@ extern struct varlena *detoast_external_attr(struct varlena *attr);
* it as needed.
* ----------
*/
-extern struct varlena *detoast_attr(struct varlena *attr);
+extern varlena *detoast_attr(varlena *attr);
/* ----------
* detoast_attr_slice() -
@@ -59,9 +59,9 @@ extern struct varlena *detoast_attr(struct varlena *attr);
* (Handles all cases for attribute storage)
* ----------
*/
-extern struct varlena *detoast_attr_slice(struct varlena *attr,
- int32 sliceoffset,
- int32 slicelength);
+extern varlena *detoast_attr_slice(varlena *attr,
+ int32 sliceoffset,
+ int32 slicelength);
/* ----------
* toast_raw_datum_size -
diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h
index 21baa0834b7..725c0ce7554 100644
--- a/src/include/access/heaptoast.h
+++ b/src/include/access/heaptoast.h
@@ -144,6 +144,6 @@ extern HeapTuple toast_build_flattened_tuple(TupleDesc tupleDesc,
*/
extern void heap_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
- int32 slicelength, struct varlena *result);
+ int32 slicelength, varlena *result);
#endif /* HEAPTOAST_H */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index d406825ff22..75f8b159b8a 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -357,20 +357,6 @@ HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
return (tup->t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN;
}
-static inline void
-HeapTupleHeaderSetXminCommitted(HeapTupleHeaderData *tup)
-{
- Assert(!HeapTupleHeaderXminInvalid(tup));
- tup->t_infomask |= HEAP_XMIN_COMMITTED;
-}
-
-static inline void
-HeapTupleHeaderSetXminInvalid(HeapTupleHeaderData *tup)
-{
- Assert(!HeapTupleHeaderXminCommitted(tup));
- tup->t_infomask |= HEAP_XMIN_INVALID;
-}
-
static inline void
HeapTupleHeaderSetXminFrozen(HeapTupleHeaderData *tup)
{
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index e2ec5289d4d..119593b7b46 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -750,7 +750,7 @@ typedef struct TableAmRoutine
int32 attrsize,
int32 sliceoffset,
int32 slicelength,
- struct varlena *result);
+ varlena *result);
/* ------------------------------------------------------------------------
@@ -868,6 +868,27 @@ extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
* ----------------------------------------------------------------------------
*/
+/*
+ * A wrapper around the Table Access Method scan_begin callback, to centralize
+ * error checking. All calls to ->scan_begin() should go through this
+ * function.
+ */
+static TableScanDesc
+table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys,
+ ScanKeyData *key, ParallelTableScanDesc pscan,
+ uint32 flags)
+{
+ /*
+ * We don't allow scans to be started while CheckXidAlive is set, except
+ * via systable_beginscan() et al. See detailed comments in xact.c where
+ * these variables are declared.
+ */
+ if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+ elog(ERROR, "scan started during logical decoding");
+
+ return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, pscan, flags);
+}
+
/*
* Start a scan of `rel`. Returned tuples pass a visibility test of
* `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
@@ -879,7 +900,7 @@ table_beginscan(Relation rel, Snapshot snapshot,
uint32 flags = SO_TYPE_SEQSCAN |
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+ return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
}
/*
@@ -908,7 +929,7 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
if (allow_sync)
flags |= SO_ALLOW_SYNC;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+ return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
}
/*
@@ -923,8 +944,7 @@ table_beginscan_bm(Relation rel, Snapshot snapshot,
{
uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
- NULL, flags);
+ return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
}
/*
@@ -949,7 +969,7 @@ table_beginscan_sampling(Relation rel, Snapshot snapshot,
if (allow_pagemode)
flags |= SO_ALLOW_PAGEMODE;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+ return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
}
/*
@@ -962,7 +982,7 @@ table_beginscan_tid(Relation rel, Snapshot snapshot)
{
uint32 flags = SO_TYPE_TIDSCAN;
- return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+ return table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
}
/*
@@ -975,7 +995,7 @@ table_beginscan_analyze(Relation rel)
{
uint32 flags = SO_TYPE_ANALYZE;
- return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
+ return table_beginscan_common(rel, NULL, 0, NULL, NULL, flags);
}
/*
@@ -1025,14 +1045,6 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS
Assert(direction == ForwardScanDirection ||
direction == BackwardScanDirection);
- /*
- * We don't expect direct calls to table_scan_getnextslot with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_scan_getnextslot call during logical decoding");
-
return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
}
@@ -1053,7 +1065,7 @@ table_beginscan_tidrange(Relation rel, Snapshot snapshot,
TableScanDesc sscan;
uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
- sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+ sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
/* Set the range of TIDs to scan */
sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
@@ -1166,6 +1178,14 @@ table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
static inline IndexFetchTableData *
table_index_fetch_begin(Relation rel)
{
+ /*
+ * We don't allow scans to be started while CheckXidAlive is set, except
+ * via systable_beginscan() et al. See detailed comments in xact.c where
+ * these variables are declared.
+ */
+ if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
+ elog(ERROR, "scan started during logical decoding");
+
return rel->rd_tableam->index_fetch_begin(rel);
}
@@ -1219,14 +1239,6 @@ table_index_fetch_tuple(struct IndexFetchTableData *scan,
TupleTableSlot *slot,
bool *call_again, bool *all_dead)
{
- /*
- * We don't expect direct calls to table_index_fetch_tuple with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_index_fetch_tuple call during logical decoding");
-
return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
slot, call_again,
all_dead);
@@ -1491,8 +1503,8 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* slot - newly constructed tuple data to store
* tmfd - filled in failure cases (see below)
* lockmode - filled with lock mode acquired on tuple
- * update_indexes - in success cases this is set to true if new index entries
- * are required for this tuple
+ * update_indexes - in success cases this is set if new index entries
+ * are required for this tuple; see TU_UpdateIndexes
*
* Normal, successful return value is TM_Ok, which means we did actually
* update it. Failure return codes are TM_SelfModified, TM_Updated, and
@@ -1894,7 +1906,7 @@ table_relation_toast_am(Relation rel)
static inline void
table_relation_fetch_toast_slice(Relation toastrel, Oid valueid,
int32 attrsize, int32 sliceoffset,
- int32 slicelength, struct varlena *result)
+ int32 slicelength, varlena *result)
{
toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
attrsize,
@@ -1947,14 +1959,6 @@ table_scan_bitmap_next_tuple(TableScanDesc scan,
uint64 *lossy_pages,
uint64 *exact_pages)
{
- /*
- * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
-
return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
slot,
recheck,
@@ -1975,13 +1979,6 @@ static inline bool
table_scan_sample_next_block(TableScanDesc scan,
SampleScanState *scanstate)
{
- /*
- * We don't expect direct calls to table_scan_sample_next_block with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_scan_sample_next_block call during logical decoding");
return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
}
@@ -1998,13 +1995,6 @@ table_scan_sample_next_tuple(TableScanDesc scan,
SampleScanState *scanstate,
TupleTableSlot *slot)
{
- /*
- * We don't expect direct calls to table_scan_sample_next_tuple with valid
- * CheckXidAlive for catalog or regular tables. See detailed comments in
- * xact.c where these variables are declared.
- */
- if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
- elog(ERROR, "unexpected table_scan_sample_next_tuple call during logical decoding");
return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
slot);
}
diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h
index 4b42f7a047f..5f3ffa9ab2d 100644
--- a/src/include/access/toast_compression.h
+++ b/src/include/access/toast_compression.h
@@ -54,19 +54,19 @@ typedef enum ToastCompressionId
/* pglz compression/decompression routines */
-extern struct varlena *pglz_compress_datum(const struct varlena *value);
-extern struct varlena *pglz_decompress_datum(const struct varlena *value);
-extern struct varlena *pglz_decompress_datum_slice(const struct varlena *value,
- int32 slicelength);
+extern varlena *pglz_compress_datum(const varlena *value);
+extern varlena *pglz_decompress_datum(const varlena *value);
+extern varlena *pglz_decompress_datum_slice(const varlena *value,
+ int32 slicelength);
/* lz4 compression/decompression routines */
-extern struct varlena *lz4_compress_datum(const struct varlena *value);
-extern struct varlena *lz4_decompress_datum(const struct varlena *value);
-extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value,
- int32 slicelength);
+extern varlena *lz4_compress_datum(const varlena *value);
+extern varlena *lz4_decompress_datum(const varlena *value);
+extern varlena *lz4_decompress_datum_slice(const varlena *value,
+ int32 slicelength);
/* other stuff */
-extern ToastCompressionId toast_get_compression_id(struct varlena *attr);
+extern ToastCompressionId toast_get_compression_id(varlena *attr);
extern char CompressionNameToMethod(const char *compression);
extern const char *GetCompressionMethodName(char method);
diff --git a/src/include/access/toast_helper.h b/src/include/access/toast_helper.h
index 9bd6bfaffe5..e8ecb995cb3 100644
--- a/src/include/access/toast_helper.h
+++ b/src/include/access/toast_helper.h
@@ -29,7 +29,7 @@
*/
typedef struct
{
- struct varlena *tai_oldexternal;
+ varlena *tai_oldexternal;
int32 tai_size;
uint8 tai_colflags;
char tai_compression;
diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h
index 75690e0bc82..d382db34262 100644
--- a/src/include/access/toast_internals.h
+++ b/src/include/access/toast_internals.h
@@ -50,7 +50,7 @@ extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock);
extern void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
extern Datum toast_save_datum(Relation rel, Datum value,
- struct varlena *oldexternal, int options);
+ varlena *oldexternal, int options);
extern int toast_open_indexes(Relation toastrel,
LOCKMODE lock,
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index 3e5530658c9..d64c18b950b 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -71,6 +71,43 @@ fetch_att(const void *T, bool attbyval, int attlen)
}
#endif /* FRONTEND */
+/*
+ * typalign_to_alignby: map a TYPALIGN_xxx value to the numeric alignment
+ * value it represents. (We store TYPALIGN_xxx codes not the real alignment
+ * values mainly so that initial catalog contents can be machine-independent.)
+ */
+static inline uint8
+typalign_to_alignby(char typalign)
+{
+ uint8 alignby;
+
+ switch (typalign)
+ {
+ case TYPALIGN_CHAR:
+ alignby = sizeof(char);
+ break;
+ case TYPALIGN_SHORT:
+ alignby = ALIGNOF_SHORT;
+ break;
+ case TYPALIGN_INT:
+ alignby = ALIGNOF_INT;
+ break;
+ case TYPALIGN_DOUBLE:
+ alignby = ALIGNOF_DOUBLE;
+ break;
+ default:
+#ifndef FRONTEND
+ elog(ERROR, "invalid typalign value: %c", typalign);
+#else
+ fprintf(stderr, "invalid typalign value: %c\n", typalign);
+ exit(1);
+#endif
+ alignby = 0;
+ break;
+ }
+ return alignby;
+}
+
/*
* att_align_datum aligns the given offset as needed for a datum of alignment
* requirement attalign and typlen attlen. attdatum is the Datum variable
@@ -139,19 +176,11 @@ fetch_att(const void *T, bool attbyval, int attlen)
* * within arrays and multiranges, we unconditionally align varlenas (XXX this
* should be revisited, probably).
*
- * The attalign cases are tested in what is hopefully something like their
- * frequency of occurrence.
+ * In performance-critical loops, avoid using this macro; instead use
+ * att_nominal_alignby with a pre-computed alignby value.
*/
#define att_align_nominal(cur_offset, attalign) \
-( \
- ((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \
- (((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \
- (((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \
- ( \
- AssertMacro((attalign) == TYPALIGN_SHORT), \
- SHORTALIGN(cur_offset) \
- ))) \
-)
+ att_nominal_alignby(cur_offset, typalign_to_alignby(attalign))
/*
* Similar to att_align_nominal, but accepts a number of bytes, typically from
diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h
index f896dbe149f..d77b894cb65 100644
--- a/src/include/access/xlogdefs.h
+++ b/src/include/access/xlogdefs.h
@@ -44,7 +44,7 @@ typedef uint64 XLogRecPtr;
* To avoid breaking translatable messages, we're directly applying the
* LSN format instead of using a macro.
*/
-#define LSN_FORMAT_ARGS(lsn) (AssertVariableIsOfTypeMacro((lsn), XLogRecPtr), (uint32) ((lsn) >> 32)), ((uint32) (lsn))
+#define LSN_FORMAT_ARGS(lsn) (StaticAssertVariableIsOfTypeMacro((lsn), XLogRecPtr), (uint32) ((lsn) >> 32)), ((uint32) (lsn))
/*
* XLogSegNo - physical log file sequence number.
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index 51680522afc..21447a3d661 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -53,7 +53,8 @@ extern void boot_get_type_io_data(Oid typid,
char *typdelim,
Oid *typioparam,
Oid *typinput,
- Oid *typoutput);
+ Oid *typoutput,
+ Oid *typcollation);
union YYSTYPE;
typedef void *yyscan_t;
diff --git a/src/include/c.h b/src/include/c.h
index 48e4087c09c..7ee4751992f 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -132,6 +132,18 @@
#define pg_attribute_unused()
#endif
+/*
+ * pg_fallthrough indicates that the fall through from the previous case is
+ * intentional.
+ */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || (defined(__cplusplus) && __cplusplus >= 201703L)
+#define pg_fallthrough [[fallthrough]]
+#elif __has_attribute(fallthrough)
+#define pg_fallthrough __attribute__((fallthrough))
+#else
+#define pg_fallthrough
+#endif
+
/*
* pg_nodiscard means the compiler should warn if the result of a function
* call is ignored. The name "nodiscard" is chosen in alignment with the C23
@@ -689,7 +701,7 @@ typedef uint64 Oid8;
#define OID8_MAX UINT64_MAX
/* ----------------
- * Variable-length datatypes all share the 'struct varlena' header.
+ * Variable-length datatypes all share the 'varlena' header.
*
* NOTE: for TOASTable types, this is an oversimplification, since the value
* may be compressed or moved out-of-line. However datatype-specific routines
@@ -702,11 +714,11 @@ typedef uint64 Oid8;
* See varatt.h for details of the TOASTed form.
* ----------------
*/
-struct varlena
+typedef struct varlena
{
char vl_len_[4]; /* Do not touch this field directly! */
char vl_dat[FLEXIBLE_ARRAY_MEMBER]; /* Data content is here */
-};
+} varlena;
#define VARHDRSZ ((int32) sizeof(int32))
@@ -715,10 +727,10 @@ struct varlena
* There is no terminating null or anything like that --- the data length is
* always VARSIZE_ANY_EXHDR(ptr).
*/
-typedef struct varlena bytea;
-typedef struct varlena text;
-typedef struct varlena BpChar; /* blank-padded char, ie SQL char(n) */
-typedef struct varlena VarChar; /* var-length char, ie SQL varchar(n) */
+typedef varlena bytea;
+typedef varlena text;
+typedef varlena BpChar; /* blank-padded char, ie SQL char(n) */
+typedef varlena VarChar; /* var-length char, ie SQL varchar(n) */
/*
* Specialized array types. These are physically laid out just the same
@@ -924,25 +936,35 @@ pg_noreturn extern void ExceptionalCondition(const char *conditionName,
*
* If the "condition" (a compile-time-constant expression) evaluates to false,
* throw a compile error using the "errmessage" (a string literal).
- *
+ */
+
+/*
* We require C11 and C++11, so static_assert() is expected to be there.
* StaticAssertDecl() was previously used for portability, but it's now just a
* plain wrapper and doesn't need to be used in new code. static_assert() is
* a "declaration", and so it must be placed where for example a variable
* declaration would be valid. As long as we compile with
* -Wno-declaration-after-statement, that also means it cannot be placed after
- * statements in a function. Macros StaticAssertStmt() and StaticAssertExpr()
- * make it safe to use as a statement or in an expression, respectively.
+ * statements in a function.
+ */
+#define StaticAssertDecl(condition, errmessage) \
+ static_assert(condition, errmessage)
+
+/*
+ * StaticAssertStmt() was previously used to make static assertions work as a
+ * statement, but its use is now deprecated.
+ */
+#define StaticAssertStmt(condition, errmessage) \
+ do { static_assert(condition, errmessage); } while(0)
+
+/*
+ * StaticAssertExpr() is for use in an expression.
*
* For compilers without GCC statement expressions, we fall back on a kluge
* that assumes the compiler will complain about a negative width for a struct
* bit-field. This will not include a helpful error message, but it beats not
* getting an error at all.
*/
-#define StaticAssertDecl(condition, errmessage) \
- static_assert(condition, errmessage)
-#define StaticAssertStmt(condition, errmessage) \
- do { static_assert(condition, errmessage); } while(0)
#ifdef HAVE_STATEMENT_EXPRESSIONS
#define StaticAssertExpr(condition, errmessage) \
((void) ({ static_assert(condition, errmessage); true; }))
@@ -955,26 +977,26 @@ pg_noreturn extern void ExceptionalCondition(const char *conditionName,
/*
* Compile-time checks that a variable (or expression) has the specified type.
*
- * AssertVariableIsOfType() can be used as a statement.
- * AssertVariableIsOfTypeMacro() is intended for use in macros, eg
- * #define foo(x) (AssertVariableIsOfTypeMacro(x, int), bar(x))
+ * StaticAssertVariableIsOfType() can be used as a declaration.
+ * StaticAssertVariableIsOfTypeMacro() is intended for use in macros, eg
+ * #define foo(x) (StaticAssertVariableIsOfTypeMacro(x, int), bar(x))
*
* If we don't have __builtin_types_compatible_p, we can still assert that
* the types have the same size. This is far from ideal (especially on 32-bit
* platforms) but it provides at least some coverage.
*/
#ifdef HAVE__BUILTIN_TYPES_COMPATIBLE_P
-#define AssertVariableIsOfType(varname, typename) \
- StaticAssertStmt(__builtin_types_compatible_p(__typeof__(varname), typename), \
+#define StaticAssertVariableIsOfType(varname, typename) \
+ StaticAssertDecl(__builtin_types_compatible_p(__typeof__(varname), typename), \
CppAsString(varname) " does not have type " CppAsString(typename))
-#define AssertVariableIsOfTypeMacro(varname, typename) \
+#define StaticAssertVariableIsOfTypeMacro(varname, typename) \
(StaticAssertExpr(__builtin_types_compatible_p(__typeof__(varname), typename), \
CppAsString(varname) " does not have type " CppAsString(typename)))
#else /* !HAVE__BUILTIN_TYPES_COMPATIBLE_P */
-#define AssertVariableIsOfType(varname, typename) \
- StaticAssertStmt(sizeof(varname) == sizeof(typename), \
+#define StaticAssertVariableIsOfType(varname, typename) \
+ StaticAssertDecl(sizeof(varname) == sizeof(typename), \
CppAsString(varname) " does not have type " CppAsString(typename))
-#define AssertVariableIsOfTypeMacro(varname, typename) \
+#define StaticAssertVariableIsOfTypeMacro(varname, typename) \
(StaticAssertExpr(sizeof(varname) == sizeof(typename), \
CppAsString(varname) " does not have type " CppAsString(typename)))
#endif /* HAVE__BUILTIN_TYPES_COMPATIBLE_P */
@@ -1140,6 +1162,12 @@ typedef struct PGAlignedXLogBlock
alignas(PG_IO_ALIGN_SIZE) char data[XLOG_BLCKSZ];
} PGAlignedXLogBlock;
+#else /* (g++ < 9) */
+
+/* Allow these types to be used as abstract types when using old g++ */
+typedef struct PGIOAlignedBlock PGIOAlignedBlock;
+typedef struct PGAlignedXLogBlock PGAlignedXLogBlock;
+
#endif /* !(g++ < 9) */
/* msb for char */
diff --git a/src/include/catalog/Makefile b/src/include/catalog/Makefile
index c90022f7c57..24b527230d4 100644
--- a/src/include/catalog/Makefile
+++ b/src/include/catalog/Makefile
@@ -149,6 +149,7 @@ install: all installdirs
ifeq ($(vpath_build),yes)
$(INSTALL_DATA) schemapg.h '$(DESTDIR)$(includedir_server)'/catalog/schemapg.h
$(INSTALL_DATA) syscache_ids.h '$(DESTDIR)$(includedir_server)'/catalog/syscache_ids.h
+ $(INSTALL_DATA) syscache_info.h '$(DESTDIR)$(includedir_server)'/catalog/syscache_info.h
$(INSTALL_DATA) system_fk_info.h '$(DESTDIR)$(includedir_server)'/catalog/system_fk_info.h
for file in $(GENERATED_HEADERS); do \
$(INSTALL_DATA) $$file '$(DESTDIR)$(includedir_server)'/catalog/$$file || exit; \
@@ -160,7 +161,7 @@ installdirs:
uninstall:
rm -f $(addprefix '$(DESTDIR)$(datadir)'/, postgres.bki system_constraints.sql)
- rm -f $(addprefix '$(DESTDIR)$(includedir_server)'/catalog/, schemapg.h syscache_ids.h system_fk_info.h $(GENERATED_HEADERS))
+ rm -f $(addprefix '$(DESTDIR)$(includedir_server)'/catalog/, schemapg.h syscache_ids.h syscache_info.h system_fk_info.h $(GENERATED_HEADERS))
clean:
rm -f bki-stamp $(GENBKI_OUTPUT_FILES)
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index fb577026666..7670eb226f0 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202601261
+#define CATALOG_VERSION_NO 202602201
#endif
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 969fd8b23f9..2f3c1eae3c7 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -186,6 +186,8 @@ extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId,
extern Oid getExtensionOfObject(Oid classId, Oid objectId);
extern List *getAutoExtensionsOfObject(Oid classId, Oid objectId);
+extern Oid getExtensionType(Oid extensionOid, const char *typname);
+
extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId);
extern List *getOwnedSequences(Oid relid);
extern Oid getIdentitySequence(Relation rel, AttrNumber attnum, bool missing_ok);
diff --git a/src/include/catalog/meson.build b/src/include/catalog/meson.build
index b63cd584068..433bcc908ad 100644
--- a/src/include/catalog/meson.build
+++ b/src/include/catalog/meson.build
@@ -115,7 +115,7 @@ output_install = [
dir_data,
dir_include_server / 'catalog',
dir_include_server / 'catalog',
- false,
+ dir_include_server / 'catalog',
dir_include_server / 'catalog',
]
diff --git a/src/include/catalog/objectaddress.h b/src/include/catalog/objectaddress.h
index e2fe9db1161..b549be2d523 100644
--- a/src/include/catalog/objectaddress.h
+++ b/src/include/catalog/objectaddress.h
@@ -17,6 +17,7 @@
#include "nodes/parsenodes.h"
#include "storage/lockdefs.h"
#include "utils/relcache.h"
+#include "utils/syscache.h"
/*
* An ObjectAddress represents a database object of any type.
@@ -57,8 +58,8 @@ extern Oid get_object_namespace(const ObjectAddress *address);
extern bool is_objectclass_supported(Oid class_id);
extern const char *get_object_class_descr(Oid class_id);
extern Oid get_object_oid_index(Oid class_id);
-extern int get_object_catcache_oid(Oid class_id);
-extern int get_object_catcache_name(Oid class_id);
+extern SysCacheIdentifier get_object_catcache_oid(Oid class_id);
+extern SysCacheIdentifier get_object_catcache_name(Oid class_id);
extern AttrNumber get_object_attnum_oid(Oid class_id);
extern AttrNumber get_object_attnum_name(Oid class_id);
extern AttrNumber get_object_attnum_namespace(Oid class_id);
diff --git a/src/include/catalog/pg_constraint.h b/src/include/catalog/pg_constraint.h
index 05933cd9741..d5661b5bdff 100644
--- a/src/include/catalog/pg_constraint.h
+++ b/src/include/catalog/pg_constraint.h
@@ -263,7 +263,7 @@ extern HeapTuple findNotNullConstraintAttnum(Oid relid, AttrNumber attnum);
extern HeapTuple findNotNullConstraint(Oid relid, const char *colname);
extern HeapTuple findDomainNotNullConstraint(Oid typid);
extern AttrNumber extractNotNullColumn(HeapTuple constrTup);
-extern bool AdjustNotNullInheritance(Oid relid, AttrNumber attnum,
+extern bool AdjustNotNullInheritance(Oid relid, AttrNumber attnum, const char *new_conname,
bool is_local, bool is_no_inherit, bool is_notvalid);
extern List *RelationGetNotNullConstraints(Oid relid, bool cooked,
bool include_noinh);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5e5e33f64fc..dac40992cbc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3499,6 +3499,7 @@
{ oid => '6212', descr => 'random value from normal distribution',
proname => 'random_normal', provolatile => 'v', proparallel => 'r',
prorettype => 'float8', proargtypes => 'float8 float8',
+ proargnames => '{mean,stddev}', proargdefaults => '{0,1}',
prosrc => 'drandom_normal' },
{ oid => '6339', descr => 'random integer in range',
proname => 'random', provolatile => 'v', proparallel => 'r',
@@ -6174,6 +6175,7 @@
descr => 'statistics: reset collected statistics shared across the cluster',
proname => 'pg_stat_reset_shared', proisstrict => 'f', provolatile => 'v',
prorettype => 'void', proargtypes => 'text',
+ proargnames => '{target}', proargdefaults => '{NULL}',
prosrc => 'pg_stat_reset_shared' },
{ oid => '3776',
descr => 'statistics: reset collected statistics for a single table or index in the current database or shared across all databases in the cluster',
@@ -6193,6 +6195,7 @@
descr => 'statistics: reset collected statistics for a single SLRU',
proname => 'pg_stat_reset_slru', proisstrict => 'f', provolatile => 'v',
prorettype => 'void', proargtypes => 'text', proargnames => '{target}',
+ proargdefaults => '{NULL}',
prosrc => 'pg_stat_reset_slru' },
{ oid => '6170',
descr => 'statistics: reset collected statistics for a single replication slot',
@@ -6728,20 +6731,24 @@
{ oid => '2096', descr => 'terminate a server process',
proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool',
proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+ proargdefaults => '{0}',
prosrc => 'pg_terminate_backend' },
{ oid => '2172', descr => 'prepare for taking an online backup',
proname => 'pg_backup_start', provolatile => 'v', proparallel => 'r',
prorettype => 'pg_lsn', proargtypes => 'text bool',
+ proargnames => '{label,fast}', proargdefaults => '{false}',
prosrc => 'pg_backup_start' },
{ oid => '2739', descr => 'finish taking an online backup',
proname => 'pg_backup_stop', provolatile => 'v', proparallel => 'r',
prorettype => 'record', proargtypes => 'bool',
proallargtypes => '{bool,pg_lsn,text,text}', proargmodes => '{i,o,o,o}',
proargnames => '{wait_for_archive,lsn,labelfile,spcmapfile}',
+ proargdefaults => '{true}',
prosrc => 'pg_backup_stop' },
{ oid => '3436', descr => 'promote standby server',
proname => 'pg_promote', provolatile => 'v', prorettype => 'bool',
proargtypes => 'bool int4', proargnames => '{wait,wait_seconds}',
+ proargdefaults => '{true,60}',
prosrc => 'pg_promote' },
{ oid => '2848', descr => 'switch to new wal file',
proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn',
@@ -7517,7 +7524,8 @@
{ oid => '1268',
descr => 'parse qualified identifier to array of identifiers',
proname => 'parse_ident', prorettype => '_text', proargtypes => 'text bool',
- proargnames => '{str,strict}', prosrc => 'parse_ident' },
+ proargnames => '{str,strict}', proargdefaults => '{true}',
+ prosrc => 'parse_ident' },
{ oid => '2246', descr => '(internal)',
proname => 'fmgr_internal_validator', provolatile => 's',
@@ -9423,7 +9431,9 @@
proargtypes => 'anyelement', prosrc => 'to_json' },
{ oid => '3261', descr => 'remove object fields with null values from json',
proname => 'json_strip_nulls', prorettype => 'json',
- proargtypes => 'json bool', prosrc => 'json_strip_nulls' },
+ proargtypes => 'json bool',
+ proargnames => '{target,strip_in_arrays}', proargdefaults => '{false}',
+ prosrc => 'json_strip_nulls' },
{ oid => '3947',
proname => 'json_object_field', prorettype => 'json',
@@ -9480,12 +9490,17 @@
{ oid => '3960', descr => 'get record fields from a json object',
proname => 'json_populate_record', proisstrict => 'f', provolatile => 's',
prorettype => 'anyelement', proargtypes => 'anyelement json bool',
+ proargnames => '{base,from_json,use_json_as_text}',
+ proargdefaults => '{false}',
prosrc => 'json_populate_record' },
{ oid => '3961',
descr => 'get set of records with fields from a json array of objects',
proname => 'json_populate_recordset', prorows => '100', proisstrict => 'f',
proretset => 't', provolatile => 's', prorettype => 'anyelement',
- proargtypes => 'anyelement json bool', prosrc => 'json_populate_recordset' },
+ proargtypes => 'anyelement json bool',
+ proargnames => '{base,from_json,use_json_as_text}',
+ proargdefaults => '{false}',
+ prosrc => 'json_populate_recordset' },
{ oid => '3204', descr => 'get record fields from a json object',
proname => 'json_to_record', provolatile => 's', prorettype => 'record',
proargtypes => 'json', prosrc => 'json_to_record' },
@@ -10364,7 +10379,9 @@
prosrc => 'jsonb_build_object_noargs' },
{ oid => '3262', descr => 'remove object fields with null values from jsonb',
proname => 'jsonb_strip_nulls', prorettype => 'jsonb',
- proargtypes => 'jsonb bool', prosrc => 'jsonb_strip_nulls' },
+ proargtypes => 'jsonb bool',
+ proargnames => '{target,strip_in_arrays}', proargdefaults => '{false}',
+ prosrc => 'jsonb_strip_nulls' },
{ oid => '3478',
proname => 'jsonb_object_field', prorettype => 'jsonb',
@@ -10538,16 +10555,25 @@
proargtypes => 'jsonb _text', prosrc => 'jsonb_delete_path' },
{ oid => '5054', descr => 'Set part of a jsonb, handle NULL value',
proname => 'jsonb_set_lax', proisstrict => 'f', prorettype => 'jsonb',
- proargtypes => 'jsonb _text jsonb bool text', prosrc => 'jsonb_set_lax' },
+ proargtypes => 'jsonb _text jsonb bool text',
+ proargnames => '{jsonb_in,path,replacement,create_if_missing,null_value_treatment}',
+ proargdefaults => '{true,use_json_null}',
+ prosrc => 'jsonb_set_lax' },
{ oid => '3305', descr => 'Set part of a jsonb',
proname => 'jsonb_set', prorettype => 'jsonb',
- proargtypes => 'jsonb _text jsonb bool', prosrc => 'jsonb_set' },
+ proargtypes => 'jsonb _text jsonb bool',
+ proargnames => '{jsonb_in,path,replacement,create_if_missing}',
+ proargdefaults => '{true}',
+ prosrc => 'jsonb_set' },
{ oid => '3306', descr => 'Indented text from jsonb',
proname => 'jsonb_pretty', prorettype => 'text', proargtypes => 'jsonb',
prosrc => 'jsonb_pretty' },
{ oid => '3579', descr => 'Insert value into a jsonb',
proname => 'jsonb_insert', prorettype => 'jsonb',
- proargtypes => 'jsonb _text jsonb bool', prosrc => 'jsonb_insert' },
+ proargtypes => 'jsonb _text jsonb bool',
+ proargnames => '{jsonb_in,path,replacement,insert_after}',
+ proargdefaults => '{false}',
+ prosrc => 'jsonb_insert' },
# jsonpath
{ oid => '4001', descr => 'I/O',
@@ -10565,42 +10591,66 @@
{ oid => '4005', descr => 'jsonpath exists test',
proname => 'jsonb_path_exists', prorettype => 'bool',
- proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_exists' },
+ proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
+ prosrc => 'jsonb_path_exists' },
{ oid => '4006', descr => 'jsonpath query',
proname => 'jsonb_path_query', prorows => '1000', proretset => 't',
prorettype => 'jsonb', proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_query' },
{ oid => '4007', descr => 'jsonpath query wrapped into array',
proname => 'jsonb_path_query_array', prorettype => 'jsonb',
proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_query_array' },
{ oid => '4008', descr => 'jsonpath query first item',
proname => 'jsonb_path_query_first', prorettype => 'jsonb',
proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_query_first' },
{ oid => '4009', descr => 'jsonpath match',
proname => 'jsonb_path_match', prorettype => 'bool',
- proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_match' },
+ proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
+ prosrc => 'jsonb_path_match' },
{ oid => '1177', descr => 'jsonpath exists test with timezone',
proname => 'jsonb_path_exists_tz', provolatile => 's', prorettype => 'bool',
proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_exists_tz' },
{ oid => '1179', descr => 'jsonpath query with timezone',
proname => 'jsonb_path_query_tz', prorows => '1000', proretset => 't',
provolatile => 's', prorettype => 'jsonb',
- proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_query_tz' },
+ proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
+ prosrc => 'jsonb_path_query_tz' },
{ oid => '1180', descr => 'jsonpath query wrapped into array with timezone',
proname => 'jsonb_path_query_array_tz', provolatile => 's',
prorettype => 'jsonb', proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_query_array_tz' },
{ oid => '2023', descr => 'jsonpath query first item with timezone',
proname => 'jsonb_path_query_first_tz', provolatile => 's',
prorettype => 'jsonb', proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
prosrc => 'jsonb_path_query_first_tz' },
{ oid => '2030', descr => 'jsonpath match with timezone',
proname => 'jsonb_path_match_tz', provolatile => 's', prorettype => 'bool',
- proargtypes => 'jsonb jsonpath jsonb bool', prosrc => 'jsonb_path_match_tz' },
+ proargtypes => 'jsonb jsonpath jsonb bool',
+ proargnames => '{target,path,vars,silent}',
+ proargdefaults => '{"{}",false}',
+ prosrc => 'jsonb_path_match_tz' },
{ oid => '4010', descr => 'implementation of @? operator',
proname => 'jsonb_path_exists_opr', prorettype => 'bool',
@@ -11411,6 +11461,7 @@
proname => 'make_interval', prorettype => 'interval',
proargtypes => 'int4 int4 int4 int4 int4 int4 float8',
proargnames => '{years,months,weeks,days,hours,mins,secs}',
+ proargdefaults => '{0,0,0,0,0,0,0.0}',
prosrc => 'make_interval' },
# spgist opclasses
@@ -11511,6 +11562,7 @@
proallargtypes => '{name,bool,bool,name,pg_lsn}',
proargmodes => '{i,i,i,o,o}',
proargnames => '{slot_name,immediately_reserve,temporary,slot_name,lsn}',
+ proargdefaults => '{false,false}',
prosrc => 'pg_create_physical_replication_slot' },
{ oid => '4220',
descr => 'copy a physical replication slot, changing temporality',
@@ -11546,6 +11598,7 @@
proallargtypes => '{name,name,bool,bool,bool,name,pg_lsn}',
proargmodes => '{i,i,i,i,i,o,o}',
proargnames => '{slot_name,plugin,temporary,twophase,failover,slot_name,lsn}',
+ proargdefaults => '{false,false,false}',
prosrc => 'pg_create_logical_replication_slot' },
{ oid => '4222',
descr => 'copy a logical replication slot, changing temporality and plugin',
@@ -11578,6 +11631,7 @@
proallargtypes => '{name,pg_lsn,int4,_text,pg_lsn,xid,text}',
proargmodes => '{i,i,i,v,o,o,o}',
proargnames => '{slot_name,upto_lsn,upto_nchanges,options,lsn,xid,data}',
+ proargdefaults => '{"{}"}',
prosrc => 'pg_logical_slot_get_changes' },
{ oid => '3783', descr => 'get binary changes from replication slot',
proname => 'pg_logical_slot_get_binary_changes', procost => '1000',
@@ -11587,6 +11641,7 @@
proallargtypes => '{name,pg_lsn,int4,_text,pg_lsn,xid,bytea}',
proargmodes => '{i,i,i,v,o,o,o}',
proargnames => '{slot_name,upto_lsn,upto_nchanges,options,lsn,xid,data}',
+ proargdefaults => '{"{}"}',
prosrc => 'pg_logical_slot_get_binary_changes' },
{ oid => '3784', descr => 'peek at changes from replication slot',
proname => 'pg_logical_slot_peek_changes', procost => '1000',
@@ -11596,6 +11651,7 @@
proallargtypes => '{name,pg_lsn,int4,_text,pg_lsn,xid,text}',
proargmodes => '{i,i,i,v,o,o,o}',
proargnames => '{slot_name,upto_lsn,upto_nchanges,options,lsn,xid,data}',
+ proargdefaults => '{"{}"}',
prosrc => 'pg_logical_slot_peek_changes' },
{ oid => '3785', descr => 'peek at binary changes from replication slot',
proname => 'pg_logical_slot_peek_binary_changes', procost => '1000',
@@ -11605,6 +11661,7 @@
proallargtypes => '{name,pg_lsn,int4,_text,pg_lsn,xid,bytea}',
proargmodes => '{i,i,i,v,o,o,o}',
proargnames => '{slot_name,upto_lsn,upto_nchanges,options,lsn,xid,data}',
+ proargdefaults => '{"{}"}',
prosrc => 'pg_logical_slot_peek_binary_changes' },
{ oid => '3878', descr => 'advance logical replication slot',
proname => 'pg_replication_slot_advance', provolatile => 'v',
@@ -11615,10 +11672,14 @@
{ oid => '3577', descr => 'emit a textual logical decoding message',
proname => 'pg_logical_emit_message', provolatile => 'v', proparallel => 'u',
prorettype => 'pg_lsn', proargtypes => 'bool text text bool',
+ proargnames => '{transactional,prefix,message,flush}',
+ proargdefaults => '{false}',
prosrc => 'pg_logical_emit_message_text' },
{ oid => '3578', descr => 'emit a binary logical decoding message',
proname => 'pg_logical_emit_message', provolatile => 'v', proparallel => 'u',
prorettype => 'pg_lsn', proargtypes => 'bool text bytea bool',
+ proargnames => '{transactional,prefix,message,flush}',
+ proargdefaults => '{false}',
prosrc => 'pg_logical_emit_message_bytea' },
{ oid => '6344',
descr => 'sync replication slots from the primary to the standby',
@@ -11832,9 +11893,9 @@
proparallel => 'u', prorettype => 'void', proargtypes => 'oid',
prosrc => 'binary_upgrade_set_next_pg_tablespace_oid' },
{ oid => '6312', descr => 'for use by pg_upgrade',
- proname => 'binary_upgrade_logical_slot_has_caught_up', provolatile => 'v',
- proparallel => 'u', prorettype => 'bool', proargtypes => 'name',
- prosrc => 'binary_upgrade_logical_slot_has_caught_up' },
+ proname => 'binary_upgrade_check_logical_slot_pending_wal', provolatile => 'v',
+ proparallel => 'u', prorettype => 'pg_lsn', proargtypes => 'name pg_lsn',
+ prosrc => 'binary_upgrade_check_logical_slot_pending_wal' },
{ oid => '6319',
descr => 'for use by pg_upgrade (relation for pg_subscription_rel)',
proname => 'binary_upgrade_add_sub_rel_state', proisstrict => 'f',
@@ -12268,6 +12329,7 @@
descr => 'configure session to maintain replication progress tracking for the passed in origin',
proname => 'pg_replication_origin_session_setup', provolatile => 'v',
proparallel => 'u', prorettype => 'void', proargtypes => 'text int4',
+ proargnames => '{node_name,pid}', proargdefaults => '{0}',
prosrc => 'pg_replication_origin_session_setup' },
{ oid => '6007', descr => 'teardown configured replication progress tracking',
@@ -12518,10 +12580,12 @@
{ oid => '4350', descr => 'Unicode normalization',
proname => 'normalize', prorettype => 'text', proargtypes => 'text text',
+ proargdefaults => '{NFC}',
prosrc => 'unicode_normalize_func' },
{ oid => '4351', descr => 'check Unicode normalization',
proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text',
+ proargdefaults => '{NFC}',
prosrc => 'unicode_is_normalized' },
{ oid => '6198', descr => 'unescape Unicode characters',
diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h
index f3571d2bfcf..805493d85c5 100644
--- a/src/include/catalog/pg_subscription.h
+++ b/src/include/catalog/pg_subscription.h
@@ -100,6 +100,9 @@ CATALOG(pg_subscription,6100,SubscriptionRelationId) BKI_SHARED_RELATION BKI_ROW
/* Synchronous commit setting for worker */
text subsynccommit BKI_FORCE_NOT_NULL;
+ /* wal_receiver_timeout setting for worker */
+ text subwalrcvtimeout BKI_FORCE_NOT_NULL;
+
/* List of publications subscribed to */
text subpublications[1] BKI_FORCE_NOT_NULL;
@@ -155,6 +158,7 @@ typedef struct Subscription
char *conninfo; /* Connection string to the publisher */
char *slotname; /* Name of the replication slot */
char *synccommit; /* Synchronous commit setting for worker */
+ char *walrcvtimeout; /* wal_receiver_timeout setting for worker */
List *publications; /* List of publication names to subscribe to */
char *origin; /* Only publish data originating from the
* specified origin */
diff --git a/src/include/commands/extension.h b/src/include/commands/extension.h
index 4ebc2bac223..7a76bdebcfa 100644
--- a/src/include/commands/extension.h
+++ b/src/include/commands/extension.h
@@ -52,6 +52,8 @@ extern char *get_extension_name(Oid ext_oid);
extern Oid get_extension_schema(Oid ext_oid);
extern bool extension_file_exists(const char *extensionName);
+extern Oid get_function_sibling_type(Oid funcoid, const char *typname);
+
extern ObjectAddress AlterExtensionNamespace(const char *extensionName, const char *newschema,
Oid *oldschema);
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
index 6966daa2b09..06bdf6d5866 100644
--- a/src/include/common/hashfn_unstable.h
+++ b/src/include/common/hashfn_unstable.h
@@ -158,23 +158,23 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len)
break;
case 7:
hs->accum |= (uint64) k[6] << 8;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 6:
hs->accum |= (uint64) k[5] << 16;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 5:
hs->accum |= (uint64) k[4] << 24;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 4:
memcpy(&lower_four, k, sizeof(lower_four));
hs->accum |= (uint64) lower_four << 32;
break;
case 3:
hs->accum |= (uint64) k[2] << 40;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
hs->accum |= (uint64) k[1] << 48;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
hs->accum |= (uint64) k[0] << 56;
break;
@@ -189,23 +189,23 @@ fasthash_accum(fasthash_state *hs, const char *k, size_t len)
break;
case 7:
hs->accum |= (uint64) k[6] << 48;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 6:
hs->accum |= (uint64) k[5] << 40;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 5:
hs->accum |= (uint64) k[4] << 32;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 4:
memcpy(&lower_four, k, sizeof(lower_four));
hs->accum |= lower_four;
break;
case 3:
hs->accum |= (uint64) k[2] << 16;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 2:
hs->accum |= (uint64) k[1] << 8;
- /* FALLTHROUGH */
+ pg_fallthrough;
case 1:
hs->accum |= (uint64) k[0];
break;
diff --git a/src/include/executor/execdebug.h b/src/include/executor/execdebug.h
index 20ac9be0b92..3e110551914 100644
--- a/src/include/executor/execdebug.h
+++ b/src/include/executor/execdebug.h
@@ -34,22 +34,22 @@
* EXEC_NESTLOOPDEBUG is a flag which turns on debugging of the
* nest loop node by NL_printf() and ENL_printf() in nodeNestloop.c
* ----------------
-#undef EXEC_NESTLOOPDEBUG
*/
+/* #define EXEC_NESTLOOPDEBUG */
/* ----------------
* EXEC_SORTDEBUG is a flag which turns on debugging of
* the ExecSort() stuff by SO_printf() in nodeSort.c
* ----------------
-#undef EXEC_SORTDEBUG
*/
+/* #define EXEC_SORTDEBUG */
/* ----------------
* EXEC_MERGEJOINDEBUG is a flag which turns on debugging of
* the ExecMergeJoin() stuff by MJ_printf() in nodeMergejoin.c
* ----------------
-#undef EXEC_MERGEJOINDEBUG
*/
+/* #define EXEC_MERGEJOINDEBUG */
/* ----------------------------------------------------------------
* #defines controlled by above definitions
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 55a7d930d26..d46ba59895d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -739,12 +739,15 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate);
*/
extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
-extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
- TupleTableSlot *slot, EState *estate,
- bool update,
- bool noDupErr,
- bool *specConflict, List *arbiterIndexes,
- bool onlySummarizing);
+
+/* flags for ExecInsertIndexTuples */
+#define EIIT_IS_UPDATE (1<<0)
+#define EIIT_NO_DUPE_ERROR (1<<1)
+#define EIIT_ONLY_SUMMARIZING (1<<2)
+extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, EState *estate,
+ bits32 options, TupleTableSlot *slot,
+ List *arbiterIndexes,
+ bool *specConflict);
extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot,
EState *estate, ItemPointer conflictTid,
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index eabbc78b280..10d02bdb79f 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -231,22 +231,22 @@ extern void fmgr_symbol(Oid functionId, char **mod, char **fn);
* Note: it'd be nice if these could be macros, but I see no way to do that
* without evaluating the arguments multiple times, which is NOT acceptable.
*/
-extern struct varlena *pg_detoast_datum(struct varlena *datum);
-extern struct varlena *pg_detoast_datum_copy(struct varlena *datum);
-extern struct varlena *pg_detoast_datum_slice(struct varlena *datum,
- int32 first, int32 count);
-extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
+extern varlena *pg_detoast_datum(varlena *datum);
+extern varlena *pg_detoast_datum_copy(varlena *datum);
+extern varlena *pg_detoast_datum_slice(varlena *datum,
+ int32 first, int32 count);
+extern varlena *pg_detoast_datum_packed(varlena *datum);
#define PG_DETOAST_DATUM(datum) \
- pg_detoast_datum((struct varlena *) DatumGetPointer(datum))
+ pg_detoast_datum((varlena *) DatumGetPointer(datum))
#define PG_DETOAST_DATUM_COPY(datum) \
- pg_detoast_datum_copy((struct varlena *) DatumGetPointer(datum))
+ pg_detoast_datum_copy((varlena *) DatumGetPointer(datum))
#define PG_DETOAST_DATUM_SLICE(datum,f,c) \
- pg_detoast_datum_slice((struct varlena *) DatumGetPointer(datum), \
+ pg_detoast_datum_slice((varlena *) DatumGetPointer(datum), \
(int32) (f), (int32) (c))
/* WARNING -- unaligned pointer */
#define PG_DETOAST_DATUM_PACKED(datum) \
- pg_detoast_datum_packed((struct varlena *) DatumGetPointer(datum))
+ pg_detoast_datum_packed((varlena *) DatumGetPointer(datum))
/*
* Support for cleaning up detoasted copies of inputs. This must only
@@ -283,7 +283,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n))
#define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n))
/* use this if you want the raw, possibly-toasted input datum: */
-#define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n))
+#define PG_GETARG_RAW_VARLENA_P(n) ((varlena *) PG_GETARG_POINTER(n))
/* use this if you want the input datum de-toasted: */
#define PG_GETARG_VARLENA_P(n) PG_DETOAST_DATUM(PG_GETARG_DATUM(n))
/* and this if you can handle 1-byte-header datums: */
diff --git a/src/include/lib/ilist.h b/src/include/lib/ilist.h
index d49ec0ffbc5..fc298a6c1d7 100644
--- a/src/include/lib/ilist.h
+++ b/src/include/lib/ilist.h
@@ -591,8 +591,8 @@ dlist_tail_node(dlist_head *head)
* This is used to convert a dlist_node * back to its containing struct.
*/
#define dlist_container(type, membername, ptr) \
- (AssertVariableIsOfTypeMacro(ptr, dlist_node *), \
- AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (StaticAssertVariableIsOfTypeMacro(ptr, dlist_node *), \
+ StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
((type *) ((char *) (ptr) - offsetof(type, membername))))
/*
@@ -601,7 +601,7 @@ dlist_tail_node(dlist_head *head)
* The list must not be empty.
*/
#define dlist_head_element(type, membername, lhead) \
- (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
(type *) dlist_head_element_off(lhead, offsetof(type, membername)))
/*
@@ -610,7 +610,7 @@ dlist_tail_node(dlist_head *head)
* The list must not be empty.
*/
#define dlist_tail_element(type, membername, lhead) \
- (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
((type *) dlist_tail_element_off(lhead, offsetof(type, membername))))
/*
@@ -621,8 +621,8 @@ dlist_tail_node(dlist_head *head)
* It is *not* allowed to manipulate the list during iteration.
*/
#define dlist_foreach(iter, lhead) \
- for (AssertVariableIsOfTypeMacro(iter, dlist_iter), \
- AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, dlist_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, dlist_head *), \
(iter).end = &(lhead)->head, \
(iter).cur = (iter).end->next ? (iter).end->next : (iter).end; \
(iter).cur != (iter).end; \
@@ -638,8 +638,8 @@ dlist_tail_node(dlist_head *head)
* fine to insert or delete adjacent nodes.
*/
#define dlist_foreach_modify(iter, lhead) \
- for (AssertVariableIsOfTypeMacro(iter, dlist_mutable_iter), \
- AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, dlist_mutable_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, dlist_head *), \
(iter).end = &(lhead)->head, \
(iter).cur = (iter).end->next ? (iter).end->next : (iter).end, \
(iter).next = (iter).cur->next; \
@@ -652,8 +652,8 @@ dlist_tail_node(dlist_head *head)
* It is *not* allowed to manipulate the list during iteration.
*/
#define dlist_reverse_foreach(iter, lhead) \
- for (AssertVariableIsOfTypeMacro(iter, dlist_iter), \
- AssertVariableIsOfTypeMacro(lhead, dlist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, dlist_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, dlist_head *), \
(iter).end = &(lhead)->head, \
(iter).cur = (iter).end->prev ? (iter).end->prev : (iter).end; \
(iter).cur != (iter).end; \
@@ -953,7 +953,7 @@ dclist_count(const dclist_head *head)
* The list must not be empty.
*/
#define dclist_head_element(type, membername, lhead) \
- (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
(type *) dclist_head_element_off(lhead, offsetof(type, membername)))
/*
@@ -962,7 +962,7 @@ dclist_count(const dclist_head *head)
* The list must not be empty.
*/
#define dclist_tail_element(type, membername, lhead) \
- (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
+ (StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, dlist_node), \
((type *) dclist_tail_element_off(lhead, offsetof(type, membername))))
@@ -1104,8 +1104,8 @@ slist_delete_current(slist_mutable_iter *iter)
* This is used to convert a slist_node * back to its containing struct.
*/
#define slist_container(type, membername, ptr) \
- (AssertVariableIsOfTypeMacro(ptr, slist_node *), \
- AssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
+ (StaticAssertVariableIsOfTypeMacro(ptr, slist_node *), \
+ StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
((type *) ((char *) (ptr) - offsetof(type, membername))))
/*
@@ -1114,7 +1114,7 @@ slist_delete_current(slist_mutable_iter *iter)
* The list must not be empty.
*/
#define slist_head_element(type, membername, lhead) \
- (AssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
+ (StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, slist_node), \
(type *) slist_head_element_off(lhead, offsetof(type, membername)))
/*
@@ -1130,8 +1130,8 @@ slist_delete_current(slist_mutable_iter *iter)
* not safe.)
*/
#define slist_foreach(iter, lhead) \
- for (AssertVariableIsOfTypeMacro(iter, slist_iter), \
- AssertVariableIsOfTypeMacro(lhead, slist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, slist_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, slist_head *), \
(iter).cur = (lhead)->head.next; \
(iter).cur != NULL; \
(iter).cur = (iter).cur->next)
@@ -1146,8 +1146,8 @@ slist_delete_current(slist_mutable_iter *iter)
* deletion of nodes adjacent to the current node would misbehave.
*/
#define slist_foreach_modify(iter, lhead) \
- for (AssertVariableIsOfTypeMacro(iter, slist_mutable_iter), \
- AssertVariableIsOfTypeMacro(lhead, slist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, slist_mutable_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, slist_head *), \
(iter).prev = &(lhead)->head, \
(iter).cur = (iter).prev->next, \
(iter).next = (iter).cur ? (iter).cur->next : NULL; \
diff --git a/src/include/lib/pairingheap.h b/src/include/lib/pairingheap.h
index b93ea5b638d..f1582c98626 100644
--- a/src/include/lib/pairingheap.h
+++ b/src/include/lib/pairingheap.h
@@ -41,16 +41,16 @@ typedef struct pairingheap_node
* This is used to convert a pairingheap_node * back to its containing struct.
*/
#define pairingheap_container(type, membername, ptr) \
- (AssertVariableIsOfTypeMacro(ptr, pairingheap_node *), \
- AssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
+ (StaticAssertVariableIsOfTypeMacro(ptr, pairingheap_node *), \
+ StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
((type *) ((char *) (ptr) - offsetof(type, membername))))
/*
* Like pairingheap_container, but used when the pointer is 'const ptr'
*/
#define pairingheap_const_container(type, membername, ptr) \
- (AssertVariableIsOfTypeMacro(ptr, const pairingheap_node *), \
- AssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
+ (StaticAssertVariableIsOfTypeMacro(ptr, const pairingheap_node *), \
+ StaticAssertVariableIsOfTypeMacro(((type *) NULL)->membername, pairingheap_node), \
((const type *) ((const char *) (ptr) - offsetof(type, membername))))
/*
diff --git a/src/include/lib/sort_template.h b/src/include/lib/sort_template.h
index e02aa73cd4d..22b2092d03b 100644
--- a/src/include/lib/sort_template.h
+++ b/src/include/lib/sort_template.h
@@ -311,6 +311,14 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n
DO_CHECK_FOR_INTERRUPTS();
if (n < 7)
{
+ /*
+ * Not strictly necessary, but a caller may pass a NULL pointer input
+ * and zero length, and this silences warnings about applying offsets
+ * to NULL pointers.
+ */
+ if (n < 2)
+ return;
+
for (pm = a + ST_POINTER_STEP; pm < a + n * ST_POINTER_STEP;
pm += ST_POINTER_STEP)
for (pl = pm; pl > a && DO_COMPARE(pl - ST_POINTER_STEP, pl) > 0;
@@ -387,29 +395,23 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n
if (d1 <= d2)
{
/* Recurse on left partition, then iterate on right partition */
- if (d1 > ST_POINTER_STEP)
- DO_SORT(a, d1 / ST_POINTER_STEP);
- if (d2 > ST_POINTER_STEP)
- {
- /* Iterate rather than recurse to save stack space */
- /* DO_SORT(pn - d2, d2 / ST_POINTER_STEP) */
- a = pn - d2;
- n = d2 / ST_POINTER_STEP;
- goto loop;
- }
+ DO_SORT(a, d1 / ST_POINTER_STEP);
+
+ /* Iterate rather than recurse to save stack space */
+ /* DO_SORT(pn - d2, d2 / ST_POINTER_STEP) */
+ a = pn - d2;
+ n = d2 / ST_POINTER_STEP;
+ goto loop;
}
else
{
/* Recurse on right partition, then iterate on left partition */
- if (d2 > ST_POINTER_STEP)
- DO_SORT(pn - d2, d2 / ST_POINTER_STEP);
- if (d1 > ST_POINTER_STEP)
- {
- /* Iterate rather than recurse to save stack space */
- /* DO_SORT(a, d1 / ST_POINTER_STEP) */
- n = d1 / ST_POINTER_STEP;
- goto loop;
- }
+ DO_SORT(pn - d2, d2 / ST_POINTER_STEP);
+
+ /* Iterate rather than recurse to save stack space */
+ /* DO_SORT(a, d1 / ST_POINTER_STEP) */
+ n = d1 / ST_POINTER_STEP;
+ goto loop;
}
}
#endif
diff --git a/src/include/libpq/crypt.h b/src/include/libpq/crypt.h
index f01886e1098..ebef0d0f78c 100644
--- a/src/include/libpq/crypt.h
+++ b/src/include/libpq/crypt.h
@@ -25,6 +25,9 @@
*/
#define MAX_ENCRYPTED_PASSWORD_LEN (512)
+/* Threshold for password expiration warnings. */
+extern PGDLLIMPORT int password_expiration_warning_threshold;
+
/* Enables deprecation warnings for MD5 passwords. */
extern PGDLLIMPORT bool md5_password_warnings;
diff --git a/src/include/libpq/pqcomm.h b/src/include/libpq/pqcomm.h
index 1bbe5b9ee45..a29c9c94d79 100644
--- a/src/include/libpq/pqcomm.h
+++ b/src/include/libpq/pqcomm.h
@@ -104,6 +104,16 @@ is_unixsock_path(const char *path)
*/
#define PG_PROTOCOL_RESERVED_31 PG_PROTOCOL(3,1)
+/*
+ * PG_PROTOCOL_GREASE is an intentionally unsupported protocol version used
+ * for "greasing" (the practice of sending valid, but extraneous or otherwise
+ * unusual, messages to keep peer implementations honest). This helps ensure
+ * that servers properly implement protocol version negotiation. Version 3.9999
+ * was chosen since it is safely within the valid range, it is representable
+ * via PQfullProtocolVersion, and it is unlikely to ever be needed in practice.
+ */
+#define PG_PROTOCOL_GREASE PG_PROTOCOL(3,9999)
+
/*
* A client can send a cancel-current-operation request to the postmaster.
* This is uglier than sending it directly to the client's backend, but it
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index a5b7b49e4b5..e1655fe61d6 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -695,7 +695,14 @@ extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
extern size_t pg_wchar_strlen(const pg_wchar *str);
+extern int pg_mblen_cstr(const char *mbstr);
+extern int pg_mblen_range(const char *mbstr, const char *end);
+extern int pg_mblen_with_len(const char *mbstr, int limit);
+extern int pg_mblen_unbounded(const char *mbstr);
+
+/* deprecated */
extern int pg_mblen(const char *mbstr);
+
extern int pg_dsplen(const char *mbstr);
extern int pg_mbstrlen(const char *mbstr);
extern int pg_mbstrlen_with_len(const char *mbstr, int limit);
diff --git a/src/include/meson.build b/src/include/meson.build
index b940c5cd3d6..7d734d92dab 100644
--- a/src/include/meson.build
+++ b/src/include/meson.build
@@ -173,6 +173,7 @@ install_subdir('catalog',
exclude_files: [
'.gitignore',
'Makefile',
+ 'README',
'duplicate_oids',
'meson.build',
'reformat_dat_file.pl',
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index db559b39c4d..f16f35659b9 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -507,6 +507,7 @@ extern void InitPostgres(const char *in_dbname, Oid dboid,
bits32 flags,
char *out_dbname);
extern void BaseInit(void);
+extern void StoreConnectionWarning(char *msg, char *detail);
/* in utils/init/miscinit.c */
extern PGDLLIMPORT bool IgnoreSystemIndexes;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index f8053d9e572..63c067d5aae 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -424,19 +424,20 @@ typedef struct JunkFilter
} JunkFilter;
/*
- * OnConflictSetState
+ * OnConflictActionState
*
- * Executor state of an ON CONFLICT DO UPDATE operation.
+ * Executor state of an ON CONFLICT DO SELECT/UPDATE operation.
*/
-typedef struct OnConflictSetState
+typedef struct OnConflictActionState
{
NodeTag type;
TupleTableSlot *oc_Existing; /* slot to store existing target tuple in */
TupleTableSlot *oc_ProjSlot; /* CONFLICT ... SET ... projection target */
ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */
+ LockClauseStrength oc_LockStrength; /* lock strength for DO SELECT */
ExprState *oc_WhereClause; /* state for the WHERE clause */
-} OnConflictSetState;
+} OnConflictActionState;
/* ----------------
* MergeActionState information
@@ -581,8 +582,8 @@ typedef struct ResultRelInfo
/* list of arbiter indexes to use to check conflicts */
List *ri_onConflictArbiterIndexes;
- /* ON CONFLICT evaluation state */
- OnConflictSetState *ri_onConflict;
+ /* ON CONFLICT evaluation state for DO SELECT/UPDATE */
+ OnConflictActionState *ri_onConflict;
/* for MERGE, lists of MergeActionState (one per MergeMatchKind) */
List *ri_MergeActions[NUM_MERGE_MATCH_KINDS];
diff --git a/src/include/nodes/lockoptions.h b/src/include/nodes/lockoptions.h
index 22864454c3e..7961444eed1 100644
--- a/src/include/nodes/lockoptions.h
+++ b/src/include/nodes/lockoptions.h
@@ -20,7 +20,8 @@
*/
typedef enum LockClauseStrength
{
- LCS_NONE, /* no such clause - only used in PlanRowMark */
+ LCS_NONE, /* no such clause - only used in PlanRowMark
+ * and ON CONFLICT DO SELECT */
LCS_FORKEYSHARE, /* FOR KEY SHARE */
LCS_FORSHARE, /* FOR SHARE */
LCS_FORNOKEYUPDATE, /* FOR NO KEY UPDATE */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index b6ad28618ab..59a7df31aba 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -428,6 +428,7 @@ typedef enum OnConflictAction
ONCONFLICT_NONE, /* No "ON CONFLICT" clause */
ONCONFLICT_NOTHING, /* ON CONFLICT ... DO NOTHING */
ONCONFLICT_UPDATE, /* ON CONFLICT ... DO UPDATE */
+ ONCONFLICT_SELECT, /* ON CONFLICT ... DO SELECT */
} OnConflictAction;
/*
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 646d6ced763..0aec49bdd22 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -200,7 +200,7 @@ typedef struct Query
/* OVERRIDING clause */
OverridingKind override pg_node_attr(query_jumble_ignore);
- OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
+ OnConflictExpr *onConflict; /* ON CONFLICT DO NOTHING/SELECT/UPDATE */
/*
* The following three fields describe the contents of the RETURNING list
@@ -1417,7 +1417,8 @@ typedef enum WCOKind
WCO_VIEW_CHECK, /* WCO on an auto-updatable view */
WCO_RLS_INSERT_CHECK, /* RLS INSERT WITH CHECK policy */
WCO_RLS_UPDATE_CHECK, /* RLS UPDATE WITH CHECK policy */
- WCO_RLS_CONFLICT_CHECK, /* RLS ON CONFLICT DO UPDATE USING policy */
+ WCO_RLS_CONFLICT_CHECK, /* RLS ON CONFLICT DO SELECT/UPDATE USING
+ * policy */
WCO_RLS_MERGE_UPDATE_CHECK, /* RLS MERGE UPDATE USING policy */
WCO_RLS_MERGE_DELETE_CHECK, /* RLS MERGE DELETE USING policy */
} WCOKind;
@@ -1679,9 +1680,10 @@ typedef struct InferClause
typedef struct OnConflictClause
{
NodeTag type;
- OnConflictAction action; /* DO NOTHING or UPDATE? */
+ OnConflictAction action; /* DO NOTHING, SELECT, or UPDATE */
InferClause *infer; /* Optional index inference clause */
- List *targetList; /* the target list (of ResTarget) */
+ LockClauseStrength lockStrength; /* lock strength for DO SELECT */
+ List *targetList; /* target list (of ResTarget) for DO UPDATE */
Node *whereClause; /* qualifications */
ParseLoc location; /* token location, or -1 if unknown */
} OnConflictClause;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index fb808823acf..27758ec16fe 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -208,6 +208,9 @@ typedef struct PlannerGlobal
/* "flat" list of RTEPermissionInfos */
List *finalrteperminfos;
+ /* list of SubPlanRTInfo nodes */
+ List *subrtinfos;
+
/* "flat" list of PlanRowMarks */
List *finalrowmarks;
@@ -229,6 +232,9 @@ typedef struct PlannerGlobal
/* type OIDs for PARAM_EXEC Params */
List *paramExecTypes;
+ /* info about nodes elided from the plan during setrefs processing */
+ List *elidedNodes;
+
/* highest PlaceHolderVar ID assigned */
Index lastPHId;
@@ -1406,6 +1412,8 @@ typedef struct IndexOptInfo
bool nullsnotdistinct;
/* is uniqueness enforced immediately? */
bool immediate;
+ /* true if paths using this index should be marked disabled */
+ bool disabled;
/* true if index doesn't really exist */
bool hypothetical;
@@ -2244,6 +2252,12 @@ typedef struct CustomPath
* For partial Append, 'subpaths' contains non-partial subpaths followed by
* partial subpaths.
*
+ * Whenever accumulate_append_subpath() allows us to consolidate multiple
+ * levels of Append paths down to one, we store the RTI sets for the omitted
+ * paths in child_append_relid_sets. This is not necessary for planning or
+ * execution; we do it for the benefit of code that wants to inspect the
+ * final plan and understand how it came to be.
+ *
* Note: it is possible for "subpaths" to contain only one, or even no,
* elements. These cases are optimized during create_append_plan.
* In particular, an AppendPath with no subpaths is a "dummy" path that
@@ -2259,6 +2273,7 @@ typedef struct AppendPath
/* Index of first partial path in subpaths; list_length(subpaths) if none */
int first_partial_path;
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
+ List *child_append_relid_sets;
} AppendPath;
#define IS_DUMMY_APPEND(p) \
@@ -2275,12 +2290,15 @@ extern bool is_dummy_rel(RelOptInfo *rel);
/*
* MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
* results from several member plans to produce similarly-sorted output.
+ *
+ * child_append_relid_sets has the same meaning here as for AppendPath.
*/
typedef struct MergeAppendPath
{
Path path;
List *subpaths; /* list of component Paths */
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
+ List *child_append_relid_sets;
} MergeAppendPath;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4bc6fb5670e..8c9321aab8c 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -131,6 +131,9 @@ typedef struct PlannedStmt
*/
List *subplans;
+ /* a list of SubPlanRTInfo objects */
+ List *subrtinfos;
+
/* indices of subplans that require REWIND */
Bitmapset *rewindPlanIDs;
@@ -149,6 +152,9 @@ typedef struct PlannedStmt
/* non-null if this is utility stmt */
Node *utilityStmt;
+ /* info about nodes elided from the plan during setrefs processing */
+ List *elidedNodes;
+
/*
* DefElem objects added by extensions, e.g. using planner_shutdown_hook
*
@@ -362,11 +368,13 @@ typedef struct ModifyTable
OnConflictAction onConflictAction;
/* List of ON CONFLICT arbiter index OIDs */
List *arbiterIndexes;
+ /* lock strength for ON CONFLICT DO SELECT */
+ LockClauseStrength onConflictLockStrength;
/* INSERT ON CONFLICT DO UPDATE targetlist */
List *onConflictSet;
/* target column numbers for onConflictSet */
List *onConflictCols;
- /* WHERE for ON CONFLICT UPDATE */
+ /* WHERE for ON CONFLICT DO SELECT/UPDATE */
Node *onConflictWhere;
/* RTI of the EXCLUDED pseudo relation */
Index exclRelRTI;
@@ -388,9 +396,16 @@ struct PartitionPruneInfo; /* forward reference to struct below */
typedef struct Append
{
Plan plan;
+
/* RTIs of appendrel(s) formed by this node */
Bitmapset *apprelids;
+
+ /* sets of RTIs of appendrels consolidated into this node */
+ List *child_append_relid_sets;
+
+ /* plans to run */
List *appendplans;
+
/* # of asynchronous plans */
int nasyncplans;
@@ -420,6 +435,10 @@ typedef struct MergeAppend
/* RTIs of appendrel(s) formed by this node */
Bitmapset *apprelids;
+ /* sets of RTIs of appendrels consolidated into this node */
+ List *child_append_relid_sets;
+
+ /* plans to run */
List *mergeplans;
/* these fields are just like the sort-key info in struct Sort: */
@@ -1821,4 +1840,35 @@ typedef enum MonotonicFunction
MONOTONICFUNC_BOTH = MONOTONICFUNC_INCREASING | MONOTONICFUNC_DECREASING,
} MonotonicFunction;
+/*
+ * SubPlanRTInfo
+ *
+ * Information about which range table entries came from which subquery
+ * planning cycles.
+ */
+typedef struct SubPlanRTInfo
+{
+ NodeTag type;
+ char *plan_name;
+ Index rtoffset;
+ bool dummy;
+} SubPlanRTInfo;
+
+/*
+ * ElidedNode
+ *
+ * Information about nodes elided from the final plan tree: trivial subquery
+ * scans, and single-child Append and MergeAppend nodes.
+ *
+ * plan_node_id is that of the surviving plan node, the sole child of the
+ * one which was elided.
+ */
+typedef struct ElidedNode
+{
+ NodeTag type;
+ int plan_node_id;
+ NodeTag elided_type;
+ Bitmapset *relids;
+} ElidedNode;
+
#endif /* PLANNODES_H */
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 5211cadc258..384df50c80a 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -20,6 +20,7 @@
#include "access/attnum.h"
#include "access/cmptype.h"
#include "nodes/bitmapset.h"
+#include "nodes/lockoptions.h"
#include "nodes/pg_list.h"
@@ -2370,7 +2371,7 @@ typedef struct FromExpr
typedef struct OnConflictExpr
{
NodeTag type;
- OnConflictAction action; /* DO NOTHING or UPDATE? */
+ OnConflictAction action; /* DO NOTHING, SELECT, or UPDATE */
/* Arbiter */
List *arbiterElems; /* unique index arbiter list (of
@@ -2378,9 +2379,14 @@ typedef struct OnConflictExpr
Node *arbiterWhere; /* unique index arbiter WHERE clause */
Oid constraint; /* pg_constraint OID for arbiter */
- /* ON CONFLICT UPDATE */
+ /* ON CONFLICT DO SELECT */
+ LockClauseStrength lockStrength; /* strength of lock for DO SELECT */
+
+ /* ON CONFLICT DO UPDATE */
List *onConflictSet; /* List of ON CONFLICT SET TargetEntrys */
- Node *onConflictWhere; /* qualifiers to restrict UPDATE to */
+
+ /* both ON CONFLICT DO SELECT and UPDATE */
+ Node *onConflictWhere; /* qualifiers to restrict SELECT/UPDATE */
int exclRelIndex; /* RT index of 'excluded' relation */
List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} OnConflictExpr;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 224750859c3..b8b2204eeb5 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -17,6 +17,26 @@
#include "nodes/bitmapset.h"
#include "nodes/pathnodes.h"
+/* Hook for plugins to get control in get_relation_info() */
+typedef void (*build_simple_rel_hook_type) (PlannerInfo *root,
+ RelOptInfo *rel,
+ RangeTblEntry *rte);
+extern PGDLLIMPORT build_simple_rel_hook_type build_simple_rel_hook;
+
+/*
+ * Everything in subpaths or partial_subpaths will become part of the
+ * Append node's subpaths list. Partial and non-partial subpaths can be
+ * mixed in the same Append node only if it is parallel-aware.
+ *
+ * See the comments for AppendPath for the meaning and purpose of the
+ * child_append_relid_sets field.
+ */
+typedef struct AppendPathInput
+{
+ List *subpaths;
+ List *partial_subpaths;
+ List *child_append_relid_sets;
+} AppendPathInput;
/* Hook for plugins to get control during joinrel setup */
typedef void (*joinrel_setup_hook_type) (PlannerInfo *root,
@@ -41,7 +61,7 @@ extern bool add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
List *pathkeys, Relids required_outer);
extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
- int disabled_nodes,
+ int disabled_nodes, Cost startup_cost,
Cost total_cost, List *pathkeys);
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
@@ -78,14 +98,16 @@ extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
List *tidrangequals,
Relids required_outer,
int parallel_workers);
+
extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
- List *subpaths, List *partial_subpaths,
+ AppendPathInput input,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
double rows);
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
+ List *child_append_relid_sets,
List *pathkeys,
Relids required_outer);
extern GroupResultPath *create_group_result_path(PlannerInfo *root,
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 8d7cc6d9886..09baf1a6916 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -17,14 +17,6 @@
#include "nodes/pathnodes.h"
#include "utils/relcache.h"
-/* Hook for plugins to get control in get_relation_info() */
-typedef void (*get_relation_info_hook_type) (PlannerInfo *root,
- Oid relationObjectId,
- bool inhparent,
- RelOptInfo *rel);
-extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook;
-
-
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h
index ae3f7f2edb6..80509773c01 100644
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -35,6 +35,7 @@ extern PGDLLIMPORT planner_hook_type planner_hook;
/* Hook for plugins to get control after PlannerGlobal is initialized */
typedef void (*planner_setup_hook_type) (PlannerGlobal *glob, Query *parse,
const char *query_string,
+ int cursorOptions,
double *tuple_fraction,
ExplainState *es);
extern PGDLLIMPORT planner_setup_hook_type planner_setup_hook;
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index a9bffb8a78f..f23e21f318b 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -153,10 +153,6 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
*
* p_grouping_nsitem: the ParseNamespaceItem that represents the grouping step.
*
- * p_is_insert: true to process assignment expressions like INSERT, false
- * to process them like UPDATE. (Note this can change intra-statement, for
- * cases like INSERT ON CONFLICT UPDATE.)
- *
* p_windowdefs: list of WindowDefs representing WINDOW and OVER clauses.
* We collect these while transforming expressions and then transform them
* afterwards (so that any resjunk tlist items needed for the sort/group
@@ -209,7 +205,6 @@ struct ParseState
Relation p_target_relation; /* INSERT/UPDATE/DELETE/MERGE target rel */
ParseNamespaceItem *p_target_nsitem; /* target rel's NSItem, or NULL */
ParseNamespaceItem *p_grouping_nsitem; /* NSItem for grouping, or NULL */
- bool p_is_insert; /* process assignment like INSERT not UPDATE */
List *p_windowdefs; /* raw representations of window clauses */
ParseExprKind p_expr_kind; /* what kind of expression we're parsing */
int p_next_resno; /* next targetlist resno to assign */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 35761f509ec..789663edd93 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -276,46 +276,73 @@ pg_ceil_log2_64(uint64 num)
return pg_leftmost_one_pos64(num - 1) + 1;
}
-extern int pg_popcount32_portable(uint32 word);
-extern int pg_popcount64_portable(uint64 word);
extern uint64 pg_popcount_portable(const char *buf, int bytes);
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
-#ifdef HAVE_X86_64_POPCNTQ
+#if defined(HAVE_X86_64_POPCNTQ) || defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK)
/*
- * Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
+ * Attempt to use specialized CPU instructions, but perform a runtime check
* first.
*/
-extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
-extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
-#elif defined(USE_NEON)
-/* Use the Neon version of pg_popcount{32,64} without function pointer. */
-extern int pg_popcount32(uint32 word);
-extern int pg_popcount64(uint64 word);
-
-/*
- * We can try to use an SVE-optimized pg_popcount() on some systems For that,
- * we do use a function pointer.
- */
-#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK
-extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
-extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
#else
+/* Use a portable implementation -- no need for a function pointer. */
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+
#endif
-#else
-/* Use a portable implementation -- no need for a function pointer. */
-extern int pg_popcount32(uint32 word);
-extern int pg_popcount64(uint64 word);
-extern uint64 pg_popcount_optimized(const char *buf, int bytes);
-extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+/*
+ * pg_popcount32
+ * Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+ return __builtin_popcount(word);
+#else /* !HAVE__BUILTIN_POPCOUNT */
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += pg_number_of_ones[word & 255];
+ word >>= 8;
+ }
+
+ return result;
+#endif /* HAVE__BUILTIN_POPCOUNT */
+}
+/*
+ * pg_popcount64
+ * Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+#if SIZEOF_LONG == 8
+ return __builtin_popcountl(word);
+#elif SIZEOF_LONG_LONG == 8
+ return __builtin_popcountll(word);
+#else
+#error "cannot find integer of the same size as uint64_t"
#endif
+#else /* !HAVE__BUILTIN_POPCOUNT */
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += pg_number_of_ones[word & 255];
+ word >>= 8;
+ }
+
+ return result;
+#endif /* HAVE__BUILTIN_POPCOUNT */
+}
/*
* Returns the number of 1-bits in buf.
@@ -333,13 +360,7 @@ pg_popcount(const char *buf, int bytes)
* We set the threshold to the point at which we'll first use special
* instructions in the optimized version.
*/
-#if SIZEOF_VOID_P >= 8
- int threshold = 8;
-#else
- int threshold = 4;
-#endif
-
- if (bytes < threshold)
+ if (bytes < 8)
{
uint64 popcnt = 0;
@@ -364,13 +385,7 @@ pg_popcount_masked(const char *buf, int bytes, bits8 mask)
* We set the threshold to the point at which we'll first use special
* instructions in the optimized version.
*/
-#if SIZEOF_VOID_P >= 8
- int threshold = 8;
-#else
- int threshold = 4;
-#endif
-
- if (bytes < threshold)
+ if (bytes < 8)
{
uint64 popcnt = 0;
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 8b92f453e7a..a7a6584e762 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -533,9 +533,9 @@ Float8GetDatum(float8 X)
*/
#define Int64GetDatumFast(X) \
- (AssertVariableIsOfTypeMacro(X, int64), Int64GetDatum(X))
+ (StaticAssertVariableIsOfTypeMacro(X, int64), Int64GetDatum(X))
#define Float8GetDatumFast(X) \
- (AssertVariableIsOfTypeMacro(X, double), Float8GetDatum(X))
+ (StaticAssertVariableIsOfTypeMacro(X, double), Float8GetDatum(X))
/* ----------------------------------------------------------------
diff --git a/src/include/postmaster/proctypelist.h b/src/include/postmaster/proctypelist.h
index 0b99eaabfd0..feac19ba207 100644
--- a/src/include/postmaster/proctypelist.h
+++ b/src/include/postmaster/proctypelist.h
@@ -25,27 +25,27 @@
*/
/*
- * List of process types (symbol, description, Main function, shmem_attach)
- * entries.
+ * List of process types (symbol, category, description, Main function,
+ * shmem_attach) entries.
*/
-/* bktype, description, main_func, shmem_attach */
-PG_PROCTYPE(B_ARCHIVER, gettext_noop("archiver"), PgArchiverMain, true)
-PG_PROCTYPE(B_AUTOVAC_LAUNCHER, gettext_noop("autovacuum launcher"), AutoVacLauncherMain, true)
-PG_PROCTYPE(B_AUTOVAC_WORKER, gettext_noop("autovacuum worker"), AutoVacWorkerMain, true)
-PG_PROCTYPE(B_BACKEND, gettext_noop("client backend"), BackendMain, true)
-PG_PROCTYPE(B_BG_WORKER, gettext_noop("background worker"), BackgroundWorkerMain, true)
-PG_PROCTYPE(B_BG_WRITER, gettext_noop("background writer"), BackgroundWriterMain, true)
-PG_PROCTYPE(B_CHECKPOINTER, gettext_noop("checkpointer"), CheckpointerMain, true)
-PG_PROCTYPE(B_DEAD_END_BACKEND, gettext_noop("dead-end client backend"), BackendMain, true)
-PG_PROCTYPE(B_INVALID, gettext_noop("unrecognized"), NULL, false)
-PG_PROCTYPE(B_IO_WORKER, gettext_noop("io worker"), IoWorkerMain, true)
-PG_PROCTYPE(B_LOGGER, gettext_noop("syslogger"), SysLoggerMain, false)
-PG_PROCTYPE(B_SLOTSYNC_WORKER, gettext_noop("slotsync worker"), ReplSlotSyncWorkerMain, true)
-PG_PROCTYPE(B_STANDALONE_BACKEND, gettext_noop("standalone backend"), NULL, false)
-PG_PROCTYPE(B_STARTUP, gettext_noop("startup"), StartupProcessMain, true)
-PG_PROCTYPE(B_WAL_RECEIVER, gettext_noop("walreceiver"), WalReceiverMain, true)
-PG_PROCTYPE(B_WAL_SENDER, gettext_noop("walsender"), NULL, true)
-PG_PROCTYPE(B_WAL_SUMMARIZER, gettext_noop("walsummarizer"), WalSummarizerMain, true)
-PG_PROCTYPE(B_WAL_WRITER, gettext_noop("walwriter"), WalWriterMain, true)
+/* bktype, bkcategory, description, main_func, shmem_attach */
+PG_PROCTYPE(B_ARCHIVER, "archiver", gettext_noop("archiver"), PgArchiverMain, true)
+PG_PROCTYPE(B_AUTOVAC_LAUNCHER, "autovacuum", gettext_noop("autovacuum launcher"), AutoVacLauncherMain, true)
+PG_PROCTYPE(B_AUTOVAC_WORKER, "autovacuum", gettext_noop("autovacuum worker"), AutoVacWorkerMain, true)
+PG_PROCTYPE(B_BACKEND, "backend", gettext_noop("client backend"), BackendMain, true)
+PG_PROCTYPE(B_BG_WORKER, "bgworker", gettext_noop("background worker"), BackgroundWorkerMain, true)
+PG_PROCTYPE(B_BG_WRITER, "bgwriter", gettext_noop("background writer"), BackgroundWriterMain, true)
+PG_PROCTYPE(B_CHECKPOINTER, "checkpointer", gettext_noop("checkpointer"), CheckpointerMain, true)
+PG_PROCTYPE(B_DEAD_END_BACKEND, "backend", gettext_noop("dead-end client backend"), BackendMain, true)
+PG_PROCTYPE(B_INVALID, "postmaster", gettext_noop("unrecognized"), NULL, false)
+PG_PROCTYPE(B_IO_WORKER, "ioworker", gettext_noop("io worker"), IoWorkerMain, true)
+PG_PROCTYPE(B_LOGGER, "syslogger", gettext_noop("syslogger"), SysLoggerMain, false)
+PG_PROCTYPE(B_SLOTSYNC_WORKER, "slotsyncworker", gettext_noop("slotsync worker"), ReplSlotSyncWorkerMain, true)
+PG_PROCTYPE(B_STANDALONE_BACKEND, "backend", gettext_noop("standalone backend"), NULL, false)
+PG_PROCTYPE(B_STARTUP, "startup", gettext_noop("startup"), StartupProcessMain, true)
+PG_PROCTYPE(B_WAL_RECEIVER, "walreceiver", gettext_noop("walreceiver"), WalReceiverMain, true)
+PG_PROCTYPE(B_WAL_SENDER, "walsender", gettext_noop("walsender"), NULL, true)
+PG_PROCTYPE(B_WAL_SUMMARIZER, "walsummarizer", gettext_noop("walsummarizer"), WalSummarizerMain, true)
+PG_PROCTYPE(B_WAL_WRITER, "walwriter", gettext_noop("walwriter"), WalWriterMain, true)
diff --git a/src/include/replication/logical.h b/src/include/replication/logical.h
index 7f03537bda7..bc9d4ece672 100644
--- a/src/include/replication/logical.h
+++ b/src/include/replication/logical.h
@@ -148,7 +148,8 @@ extern bool filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, ReplOriginI
extern void ResetLogicalStreamingState(void);
extern void UpdateDecodingStats(LogicalDecodingContext *ctx);
-extern bool LogicalReplicationSlotHasPendingWal(XLogRecPtr end_of_wal);
+extern XLogRecPtr LogicalReplicationSlotCheckPendingWal(XLogRecPtr end_of_wal,
+ XLogRecPtr scan_cutoff_lsn);
extern XLogRecPtr LogicalSlotAdvanceAndCheckSnapState(XLogRecPtr moveto,
bool *found_consistent_snapshot);
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index f465e430cc6..4b4709f6e2c 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -185,8 +185,11 @@ typedef struct ReplicationSlot
/* is this slot defined */
bool in_use;
- /* Who is streaming out changes for this slot? 0 in unused slots. */
- pid_t active_pid;
+ /*
+ * Who is streaming out changes for this slot? INVALID_PROC_NUMBER in
+ * unused slots.
+ */
+ ProcNumber active_proc;
/* any outstanding modifications? */
bool just_dirtied;
@@ -212,7 +215,7 @@ typedef struct ReplicationSlot
/* is somebody performing io on this slot? */
LWLock io_in_progress_lock;
- /* Condition variable signaled when active_pid changes */
+ /* Condition variable signaled when active_proc changes */
ConditionVariable active_cv;
/* all the remaining data is only used for logical slots */
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
index c1285fdd1bc..33fb7f552b4 100644
--- a/src/include/replication/worker_internal.h
+++ b/src/include/replication/worker_internal.h
@@ -289,7 +289,8 @@ extern void ProcessSyncingTablesForApply(XLogRecPtr current_lsn);
extern void ProcessSequencesForSync(void);
pg_noreturn extern void FinishSyncWorker(void);
-extern void InvalidateSyncingRelStates(Datum arg, int cacheid, uint32 hashvalue);
+extern void InvalidateSyncingRelStates(Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
extern void launch_sync_worker(LogicalRepWorkerType wtype, int nsyncworkers,
Oid relid, TimestampTz *last_start_time);
extern void ProcessSyncingRelations(XLogRecPtr current_lsn);
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 413233bcd39..8ac466fd346 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -55,12 +55,23 @@ typedef int File;
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
+enum FileExtendMethod
+{
+#ifdef HAVE_POSIX_FALLOCATE
+ FILE_EXTEND_METHOD_POSIX_FALLOCATE,
+#endif
+ FILE_EXTEND_METHOD_WRITE_ZEROS,
+};
+
+/* Default to the first available file_extend_method. */
+#define DEFAULT_FILE_EXTEND_METHOD 0
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
+extern PGDLLIMPORT int file_extend_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
index 3aeada554b2..10c7b065861 100644
--- a/src/include/storage/pg_shmem.h
+++ b/src/include/storage/pg_shmem.h
@@ -32,9 +32,9 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */
#define PGShmemMagic 679834894
pid_t creatorPID; /* PID of creating process (set but unread) */
Size totalsize; /* total size of segment */
- Size freeoffset; /* offset to first free space */
+ Size content_offset; /* offset to the data, i.e. size of this
+ * header */
dsm_handle dsm_control; /* ID of dynamic shared memory control seg */
- void *index; /* pointer to ShmemIndex table */
#ifndef WIN32 /* Windows doesn't have useful inode#s */
dev_t device; /* device data directory is on */
ino_t inode; /* inode number of data directory */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 039bc8353be..23e5cd98161 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -17,6 +17,7 @@
#include "access/clog.h"
#include "access/xlogdefs.h"
#include "lib/ilist.h"
+#include "miscadmin.h"
#include "storage/latch.h"
#include "storage/lock.h"
#include "storage/pg_sema.h"
@@ -166,7 +167,7 @@ typedef enum
* but its myProcLocks[] lists are valid.
*
* We allow many fields of this struct to be accessed without locks, such as
- * delayChkptFlags and isRegularBackend. However, keep in mind that writing
+ * delayChkptFlags and backendType. However, keep in mind that writing
* mirrored ones (see below) requires holding ProcArrayLock or XidGenLock in
* at least shared mode, so that pgxactoff does not change concurrently.
*
@@ -233,14 +234,17 @@ struct PGPROC
Oid tempNamespaceId; /* OID of temp schema this backend is
* using */
- bool isRegularBackend; /* true if it's a regular backend. */
+ BackendType backendType; /* what kind of process is this? */
/*
* While in hot standby mode, shows that a conflict signal has been sent
* for the current transaction. Set/cleared while holding ProcArrayLock,
* though not required. Accessed without lock, if needed.
+ *
+ * This is a bitmask; each bit corresponds to a RecoveryConflictReason
+ * enum value.
*/
- bool recoveryConflictPending;
+ pg_atomic_uint32 pendingRecoveryConflicts;
/*
* Info about LWLock the process is currently waiting for, if any.
@@ -416,6 +420,16 @@ typedef struct PROC_HDR
/* Length of allProcs array */
uint32 allProcCount;
+
+ /*
+ * This spinlock protects the below freelists of PGPROC structures. We
+ * cannot use an LWLock because the LWLock manager depends on already
+ * having a PGPROC and a wait semaphore! But these structures are touched
+ * relatively infrequently (only at backend startup or shutdown) and not
+ * for very long, so a spinlock is okay.
+ */
+ slock_t freeProcsLock;
+
/* Head of list of free PGPROC structures */
dlist_head freeProcs;
/* Head of list of autovacuum & special worker free PGPROC structures */
@@ -424,6 +438,7 @@ typedef struct PROC_HDR
dlist_head bgworkerFreeProcs;
/* Head of list of walsender free PGPROC structures */
dlist_head walsenderFreeProcs;
+
/* First pgproc waiting for group XID clear */
pg_atomic_uint32 procArrayGroupFirst;
/* First pgproc waiting for group transaction status update */
@@ -485,7 +500,6 @@ extern PGDLLIMPORT int IdleSessionTimeout;
extern PGDLLIMPORT bool log_lock_waits;
#ifdef EXEC_BACKEND
-extern PGDLLIMPORT slock_t *ProcStructLock;
extern PGDLLIMPORT PGPROC *AuxiliaryProcs;
#endif
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index da7b5e78d30..c5ab1574fe3 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -77,14 +77,15 @@ extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin,
bool excludeXmin0, bool allDbs, int excludeVacuum,
int *nvxids);
extern VirtualTransactionId *GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid);
-extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode);
-extern pid_t SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
- bool conflictPending);
+
+extern bool SignalRecoveryConflict(PGPROC *proc, pid_t pid, RecoveryConflictReason reason);
+extern bool SignalRecoveryConflictWithVirtualXID(VirtualTransactionId vxid, RecoveryConflictReason reason);
+extern void SignalRecoveryConflictWithDatabase(Oid databaseid, RecoveryConflictReason reason);
+
extern bool MinimumActiveBackends(int min);
extern int CountDBBackends(Oid databaseid);
extern int CountDBConnections(Oid databaseid);
-extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending);
extern int CountUserBackends(Oid roleid);
extern bool CountOtherDBBackends(Oid databaseId,
int *nbackends, int *nprepared);
diff --git a/src/include/storage/proclist.h b/src/include/storage/proclist.h
index 965609145e4..9caf109a845 100644
--- a/src/include/storage/proclist.h
+++ b/src/include/storage/proclist.h
@@ -204,8 +204,8 @@ proclist_pop_head_node_offset(proclist_head *list, size_t node_offset)
* node with proclist_delete(list, iter.cur, node_offset).
*/
#define proclist_foreach_modify(iter, lhead, link_member) \
- for (AssertVariableIsOfTypeMacro(iter, proclist_mutable_iter), \
- AssertVariableIsOfTypeMacro(lhead, proclist_head *), \
+ for (StaticAssertVariableIsOfTypeMacro(iter, proclist_mutable_iter), \
+ StaticAssertVariableIsOfTypeMacro(lhead, proclist_head *), \
(iter).cur = (lhead)->head, \
(iter).next = (iter).cur == INVALID_PROC_NUMBER ? INVALID_PROC_NUMBER : \
proclist_node_get((iter).cur, \
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index e52b8eb7697..348fba53a93 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -36,20 +36,12 @@ typedef enum
PROCSIG_BARRIER, /* global barrier interrupt */
PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */
PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */
-
- /* Recovery conflict reasons */
- PROCSIG_RECOVERY_CONFLICT_FIRST,
- PROCSIG_RECOVERY_CONFLICT_DATABASE = PROCSIG_RECOVERY_CONFLICT_FIRST,
- PROCSIG_RECOVERY_CONFLICT_TABLESPACE,
- PROCSIG_RECOVERY_CONFLICT_LOCK,
- PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
- PROCSIG_RECOVERY_CONFLICT_LOGICALSLOT,
- PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
- PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
- PROCSIG_RECOVERY_CONFLICT_LAST = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+ PROCSIG_RECOVERY_CONFLICT, /* backend is blocking recovery, check
+ * PGPROC->pendingRecoveryConflicts for the
+ * reason */
} ProcSignalReason;
-#define NUM_PROCSIGNALS (PROCSIG_RECOVERY_CONFLICT_LAST + 1)
+#define NUM_PROCSIGNALS (PROCSIG_RECOVERY_CONFLICT + 1)
typedef enum
{
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 2522cae0c31..3d9070e79d4 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -119,6 +119,10 @@
* gcc from thinking it can cache the values of shared-memory fields
* across the asm code. Add "cc" if your asm code changes the condition
* code register, and also list any temp registers the code uses.
+ *
+ * If you need branch target labels within the asm block, include "%="
+ * in the label names to make them distinct across multiple asm blocks
+ * within a source file.
*----------
*/
@@ -147,11 +151,11 @@ tas(volatile slock_t *lock)
* leave it alone.
*/
__asm__ __volatile__(
- " cmpb $0,%1 \n"
- " jne 1f \n"
- " lock \n"
- " xchgb %0,%1 \n"
- "1: \n"
+ " cmpb $0,%1 \n"
+ " jne TAS%=_out \n"
+ " lock \n"
+ " xchgb %0,%1 \n"
+ "TAS%=_out: \n"
: "+q"(_res), "+m"(*lock)
: /* no inputs */
: "memory", "cc");
@@ -421,17 +425,17 @@ tas(volatile slock_t *lock)
__asm__ __volatile__(
" lwarx %0,0,%3,1 \n"
" cmpwi %0,0 \n"
-" bne 1f \n"
+" bne TAS%=_fail \n"
" addi %0,%0,1 \n"
" stwcx. %0,0,%3 \n"
-" beq 2f \n"
-"1: \n"
+" beq TAS%=_ok \n"
+"TAS%=_fail: \n"
" li %1,1 \n"
-" b 3f \n"
-"2: \n"
+" b TAS%=_out \n"
+"TAS%=_ok: \n"
" lwsync \n"
" li %1,0 \n"
-"3: \n"
+"TAS%=_out: \n"
: "=&b"(_t), "=r"(_res), "+m"(*lock)
: "r"(lock)
: "memory", "cc");
diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h
index e71a51dfe84..89d45287c17 100644
--- a/src/include/storage/shmem.h
+++ b/src/include/storage/shmem.h
@@ -29,8 +29,7 @@
extern PGDLLIMPORT slock_t *ShmemLock;
typedef struct PGShmemHeader PGShmemHeader; /* avoid including
* storage/pg_shmem.h here */
-extern void InitShmemAccess(PGShmemHeader *seghdr);
-extern void InitShmemAllocation(void);
+extern void InitShmemAllocator(PGShmemHeader *seghdr);
extern void *ShmemAlloc(Size size);
extern void *ShmemAllocNoError(Size size);
extern bool ShmemAddrIsValid(const void *addr);
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index 7b10932635a..c63a4f2cc6a 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -16,7 +16,6 @@
#include "datatype/timestamp.h"
#include "storage/lock.h"
-#include "storage/procsignal.h"
#include "storage/relfilelocator.h"
#include "storage/standbydefs.h"
@@ -25,6 +24,45 @@ extern PGDLLIMPORT int max_standby_archive_delay;
extern PGDLLIMPORT int max_standby_streaming_delay;
extern PGDLLIMPORT bool log_recovery_conflict_waits;
+/* Recovery conflict reasons */
+typedef enum
+{
+ /* Backend is connected to a database that is being dropped */
+ RECOVERY_CONFLICT_DATABASE,
+
+ /* Backend is using a tablespace that is being dropped */
+ RECOVERY_CONFLICT_TABLESPACE,
+
+ /* Backend is holding a lock that is blocking recovery */
+ RECOVERY_CONFLICT_LOCK,
+
+ /* Backend is holding a snapshot that is blocking recovery */
+ RECOVERY_CONFLICT_SNAPSHOT,
+
+ /* Backend is using a logical replication slot that must be invalidated */
+ RECOVERY_CONFLICT_LOGICALSLOT,
+
+ /* Backend is holding a pin on a buffer that is blocking recovery */
+ RECOVERY_CONFLICT_BUFFERPIN,
+
+ /*
+ * The backend is requested to check for deadlocks. The startup process
+ * doesn't check for deadlock directly, because we want to kill one of the
+ * other backends instead of the startup process.
+ */
+ RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+
+ /*
+ * Like RECOVERY_CONFLICT_STARTUP_DEADLOCK is, but the suspected deadlock
+ * involves a buffer pin that some other backend is holding. That needs
+ * special checking because the normal deadlock detector doesn't track the
+ * buffer pins.
+ */
+ RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK,
+} RecoveryConflictReason;
+
+#define NUM_RECOVERY_CONFLICT_REASONS (RECOVERY_CONFLICT_BUFFERPIN_DEADLOCK + 1)
+
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
@@ -43,7 +81,7 @@ extern void CheckRecoveryConflictDeadlock(void);
extern void StandbyDeadLockHandler(void);
extern void StandbyTimeoutHandler(void);
extern void StandbyLockTimeoutHandler(void);
-extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
+extern void LogRecoveryConflict(RecoveryConflictReason reason, TimestampTz wait_start,
TimestampTz now, VirtualTransactionId *wait_list,
bool still_waiting);
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index 54ddee875ed..5bc5bcfb20d 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -74,7 +74,7 @@ extern void die(SIGNAL_ARGS);
pg_noreturn extern void quickdie(SIGNAL_ARGS);
extern void StatementCancelHandler(SIGNAL_ARGS);
pg_noreturn extern void FloatExceptionHandler(SIGNAL_ARGS);
-extern void HandleRecoveryConflictInterrupt(ProcSignalReason reason);
+extern void HandleRecoveryConflictInterrupt(void);
extern void ProcessClientReadInterrupt(bool blocked);
extern void ProcessClientWriteInterrupt(bool blocked);
diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h
index cea417a91b5..6e2d67ee4a5 100644
--- a/src/include/tsearch/ts_locale.h
+++ b/src/include/tsearch/ts_locale.h
@@ -37,10 +37,34 @@ typedef struct
/* The second argument of t_iseq() must be a plain ASCII character */
#define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c))
-#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s))
+/* Copy multibyte character of known byte length, return byte length. */
+static inline int
+ts_copychar_with_len(void *dest, const void *src, int length)
+{
+ memcpy(dest, src, length);
+ return length;
+}
+
+/* Copy multibyte character from null-terminated string, return byte length. */
+static inline int
+ts_copychar_cstr(void *dest, const void *src)
+{
+ return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src));
+}
+
+/* Historical macro for the above. */
+#define COPYCHAR ts_copychar_cstr
+
+#define GENERATE_T_ISCLASS_DECL(character_class) \
+extern int t_is##character_class##_with_len(const char *ptr, int len); \
+extern int t_is##character_class##_cstr(const char *ptr); \
+extern int t_is##character_class##_unbounded(const char *ptr); \
+\
+/* deprecated */ \
+extern int t_is##character_class(const char *ptr);
-extern int t_isalpha(const char *ptr);
-extern int t_isalnum(const char *ptr);
+GENERATE_T_ISCLASS_DECL(alnum);
+GENERATE_T_ISCLASS_DECL(alpha);
extern bool tsearch_readline_begin(tsearch_readline_state *stp,
const char *filename);
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index b0d1dbab6da..3eb0770f9c2 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state,
extern void close_tsvector_parser(TSVectorParseState state);
/* phrase operator begins with '<' */
-#define ISOPERATOR(x) \
- ( pg_mblen(x) == 1 && ( *(x) == '!' || \
- *(x) == '&' || \
- *(x) == '|' || \
- *(x) == '(' || \
- *(x) == ')' || \
- *(x) == '<' \
- ) )
+#define ISOPERATOR(x) (*(x) == '!' || \
+ *(x) == '&' || \
+ *(x) == '|' || \
+ *(x) == '(' || \
+ *(x) == ')' || \
+ *(x) == '<')
/* parse_tsquery */
diff --git a/src/include/utils/.gitignore b/src/include/utils/.gitignore
index 30f921429c6..ff6f61cd7ee 100644
--- a/src/include/utils/.gitignore
+++ b/src/include/utils/.gitignore
@@ -4,4 +4,6 @@
/probes.h
/errcodes.h
/header-stamp
+/pgstat_wait_event.c
+/wait_event_funcs_data.c
/wait_event_types.h
diff --git a/src/include/utils/arrayaccess.h b/src/include/utils/arrayaccess.h
index abb8659de02..a325ae52574 100644
--- a/src/include/utils/arrayaccess.h
+++ b/src/include/utils/arrayaccess.h
@@ -22,8 +22,8 @@
* Functions for iterating through elements of a flat or expanded array.
* These require a state struct "array_iter iter".
*
- * Use "array_iter_setup(&iter, arrayptr);" to prepare to iterate, and
- * "datumvar = array_iter_next(&iter, &isnullvar, index, ...);" to fetch
+ * Use "array_iter_setup(&iter, arrayptr, ...);" to prepare to iterate,
+ * and "datumvar = array_iter_next(&iter, &isnullvar, index);" to fetch
* the next element into datumvar/isnullvar.
* "index" must be the zero-origin element number; we make caller provide
* this since caller is generally counting the elements anyway. Despite
@@ -42,11 +42,17 @@ typedef struct array_iter
char *dataptr; /* Current spot in the data area */
bits8 *bitmapptr; /* Current byte of the nulls bitmap, or NULL */
int bitmask; /* mask for current bit in nulls bitmap */
+
+ /* Fields used in both cases: data about array's element type */
+ int elmlen;
+ bool elmbyval;
+ uint8 elmalignby;
} array_iter;
static inline void
-array_iter_setup(array_iter *it, AnyArrayType *a)
+array_iter_setup(array_iter *it, AnyArrayType *a,
+ int elmlen, bool elmbyval, char elmalign)
{
if (VARATT_IS_EXPANDED_HEADER(a))
{
@@ -75,11 +81,13 @@ array_iter_setup(array_iter *it, AnyArrayType *a)
it->bitmapptr = ARR_NULLBITMAP((ArrayType *) a);
}
it->bitmask = 1;
+ it->elmlen = elmlen;
+ it->elmbyval = elmbyval;
+ it->elmalignby = typalign_to_alignby(elmalign);
}
static inline Datum
-array_iter_next(array_iter *it, bool *isnull, int i,
- int elmlen, bool elmbyval, char elmalign)
+array_iter_next(array_iter *it, bool *isnull, int i)
{
Datum ret;
@@ -98,10 +106,11 @@ array_iter_next(array_iter *it, bool *isnull, int i,
else
{
*isnull = false;
- ret = fetch_att(it->dataptr, elmbyval, elmlen);
- it->dataptr = att_addlength_pointer(it->dataptr, elmlen,
+ ret = fetch_att(it->dataptr, it->elmbyval, it->elmlen);
+ it->dataptr = att_addlength_pointer(it->dataptr, it->elmlen,
it->dataptr);
- it->dataptr = (char *) att_align_nominal(it->dataptr, elmalign);
+ it->dataptr = (char *) att_nominal_alignby(it->dataptr,
+ it->elmalignby);
}
it->bitmask <<= 1;
if (it->bitmask == 0x100)
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 781e48c0c10..ddd06304e97 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -331,7 +331,6 @@ extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
int buflen);
extern int64 pgstat_get_my_query_id(void);
extern int64 pgstat_get_my_plan_id(void);
-extern BackendType pgstat_get_backend_type_by_proc_number(ProcNumber procNumber);
/* ----------
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index cf57819ebdc..5dcd788ff80 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -68,6 +68,7 @@ extern char *pg_ultostr(char *str, uint32 value);
/* oid.c */
extern oidvector *buildoidvector(const Oid *oids, int n);
+extern void check_valid_oidvector(const oidvector *oidArray);
extern Oid oidparse(Node *node);
extern int oid_cmp(const void *p1, const void *p2);
diff --git a/src/include/utils/freepage.h b/src/include/utils/freepage.h
index 8c0e0edd791..2681fd6d5ea 100644
--- a/src/include/utils/freepage.h
+++ b/src/include/utils/freepage.h
@@ -65,7 +65,7 @@ struct FreePageManager
/* Macros to convert between page numbers (expressed as Size) and pointers. */
#define fpm_page_to_pointer(base, page) \
- (AssertVariableIsOfTypeMacro(page, Size), \
+ (StaticAssertVariableIsOfTypeMacro(page, Size), \
(base) + FPM_PAGE_SIZE * (page))
#define fpm_pointer_to_page(base, ptr) \
(((Size) (((char *) (ptr)) - (base))) / FPM_PAGE_SIZE)
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index bf39878c43e..c46203fabfe 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -295,7 +295,7 @@ extern PGDLLIMPORT bool log_duration;
extern PGDLLIMPORT int log_parameter_max_length;
extern PGDLLIMPORT int log_parameter_max_length_on_error;
extern PGDLLIMPORT int log_min_error_statement;
-extern PGDLLIMPORT int log_min_messages;
+extern PGDLLIMPORT int log_min_messages[];
extern PGDLLIMPORT int client_min_messages;
extern PGDLLIMPORT int log_min_duration_sample;
extern PGDLLIMPORT int log_min_duration_statement;
@@ -344,6 +344,7 @@ extern PGDLLIMPORT const struct config_enum_entry archive_mode_options[];
extern PGDLLIMPORT const struct config_enum_entry dynamic_shared_memory_options[];
extern PGDLLIMPORT const struct config_enum_entry io_method_options[];
extern PGDLLIMPORT const struct config_enum_entry recovery_target_action_options[];
+extern PGDLLIMPORT const struct config_enum_entry server_message_level_options[];
extern PGDLLIMPORT const struct config_enum_entry wal_level_options[];
extern PGDLLIMPORT const struct config_enum_entry wal_sync_method_options[];
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index b6ecb0e769f..9c90670d9b8 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -177,5 +177,7 @@ extern void assign_wal_sync_method(int new_wal_sync_method, void *extra);
extern bool check_synchronized_standby_slots(char **newval, void **extra,
GucSource source);
extern void assign_synchronized_standby_slots(const char *newval, void *extra);
+extern bool check_log_min_messages(char **newval, void **extra, GucSource source);
+extern void assign_log_min_messages(const char *newval, void *extra);
#endif /* GUC_HOOKS_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 0e937fec9e9..5f64fb20477 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -17,6 +17,7 @@
#include "access/htup.h"
#include "storage/relfilelocator.h"
#include "utils/relcache.h"
+#include "utils/syscache.h"
extern PGDLLIMPORT int debug_discard_caches;
@@ -38,7 +39,8 @@ extern PGDLLIMPORT int debug_discard_caches;
#endif /* not DISCARD_CACHES_ENABLED */
-typedef void (*SyscacheCallbackFunction) (Datum arg, int cacheid, uint32 hashvalue);
+typedef void (*SyscacheCallbackFunction) (Datum arg, SysCacheIdentifier cacheid,
+ uint32 hashvalue);
typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid);
typedef void (*RelSyncCallbackFunction) (Datum arg, Oid relid);
@@ -81,7 +83,7 @@ extern void CacheInvalidateSmgr(RelFileLocatorBackend rlocator);
extern void CacheInvalidateRelmap(Oid databaseId);
-extern void CacheRegisterSyscacheCallback(int cacheid,
+extern void CacheRegisterSyscacheCallback(SysCacheIdentifier cacheid,
SyscacheCallbackFunction func,
Datum arg);
@@ -91,7 +93,7 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
extern void CacheRegisterRelSyncCallback(RelSyncCallbackFunction func,
Datum arg);
-extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
+extern void CallSyscacheCallbacks(SysCacheIdentifier cacheid, uint32 hashvalue);
extern void CallRelSyncCallbacks(Oid relid);
diff --git a/src/include/utils/meson.build b/src/include/utils/meson.build
index 318a6aec0d0..fd3a2352df5 100644
--- a/src/include/utils/meson.build
+++ b/src/include/utils/meson.build
@@ -79,8 +79,6 @@ generated_backend_headers += fmgrtab_target[1]
# autoconf generates the file there, ensure we get a conflict
generated_sources_ac += {
- 'src/backend/utils': fmgrtab_output + ['errcodes.h', 'probes.h', 'fmgr-stamp'],
+ 'src/backend/utils': fmgrtab_output + ['errcodes.h', 'wait_event_types.h', 'probes.h', 'fmgr-stamp'],
'src/include/utils': ['header-stamp'],
}
-
-generated_sources_ac += {'src/backend/utils/activity': ['wait_event_types.h']}
diff --git a/src/include/utils/relptr.h b/src/include/utils/relptr.h
index aeb17fa24a5..94975f2f237 100644
--- a/src/include/utils/relptr.h
+++ b/src/include/utils/relptr.h
@@ -40,12 +40,12 @@
#ifdef HAVE_TYPEOF
#define relptr_access(base, rp) \
- (AssertVariableIsOfTypeMacro(base, char *), \
+ (StaticAssertVariableIsOfTypeMacro(base, char *), \
(typeof((rp).relptr_type)) ((rp).relptr_off == 0 ? NULL : \
(base) + (rp).relptr_off - 1))
#else
#define relptr_access(base, rp) \
- (AssertVariableIsOfTypeMacro(base, char *), \
+ (StaticAssertVariableIsOfTypeMacro(base, char *), \
(void *) ((rp).relptr_off == 0 ? NULL : (base) + (rp).relptr_off - 1))
#endif
@@ -70,12 +70,12 @@ relptr_store_eval(char *base, char *val)
#ifdef HAVE_TYPEOF
#define relptr_store(base, rp, val) \
- (AssertVariableIsOfTypeMacro(base, char *), \
- AssertVariableIsOfTypeMacro(val, typeof((rp).relptr_type)), \
+ (StaticAssertVariableIsOfTypeMacro(base, char *), \
+ StaticAssertVariableIsOfTypeMacro(val, typeof((rp).relptr_type)), \
(rp).relptr_off = relptr_store_eval((base), (char *) (val)))
#else
#define relptr_store(base, rp, val) \
- (AssertVariableIsOfTypeMacro(base, char *), \
+ (StaticAssertVariableIsOfTypeMacro(base, char *), \
(rp).relptr_off = relptr_store_eval((base), (char *) (val)))
#endif
diff --git a/src/include/utils/sortsupport.h b/src/include/utils/sortsupport.h
index 0083756bbdb..a8f8f9f026a 100644
--- a/src/include/utils/sortsupport.h
+++ b/src/include/utils/sortsupport.h
@@ -229,107 +229,6 @@ ApplySortComparator(Datum datum1, bool isNull1,
return compare;
}
-static inline int
-ApplyUnsignedSortComparator(Datum datum1, bool isNull1,
- Datum datum2, bool isNull2,
- SortSupport ssup)
-{
- int compare;
-
- if (isNull1)
- {
- if (isNull2)
- compare = 0; /* NULL "=" NULL */
- else if (ssup->ssup_nulls_first)
- compare = -1; /* NULL "<" NOT_NULL */
- else
- compare = 1; /* NULL ">" NOT_NULL */
- }
- else if (isNull2)
- {
- if (ssup->ssup_nulls_first)
- compare = 1; /* NOT_NULL ">" NULL */
- else
- compare = -1; /* NOT_NULL "<" NULL */
- }
- else
- {
- compare = datum1 < datum2 ? -1 : datum1 > datum2 ? 1 : 0;
- if (ssup->ssup_reverse)
- INVERT_COMPARE_RESULT(compare);
- }
-
- return compare;
-}
-
-static inline int
-ApplySignedSortComparator(Datum datum1, bool isNull1,
- Datum datum2, bool isNull2,
- SortSupport ssup)
-{
- int compare;
-
- if (isNull1)
- {
- if (isNull2)
- compare = 0; /* NULL "=" NULL */
- else if (ssup->ssup_nulls_first)
- compare = -1; /* NULL "<" NOT_NULL */
- else
- compare = 1; /* NULL ">" NOT_NULL */
- }
- else if (isNull2)
- {
- if (ssup->ssup_nulls_first)
- compare = 1; /* NOT_NULL ">" NULL */
- else
- compare = -1; /* NOT_NULL "<" NULL */
- }
- else
- {
- compare = DatumGetInt64(datum1) < DatumGetInt64(datum2) ? -1 :
- DatumGetInt64(datum1) > DatumGetInt64(datum2) ? 1 : 0;
- if (ssup->ssup_reverse)
- INVERT_COMPARE_RESULT(compare);
- }
-
- return compare;
-}
-
-static inline int
-ApplyInt32SortComparator(Datum datum1, bool isNull1,
- Datum datum2, bool isNull2,
- SortSupport ssup)
-{
- int compare;
-
- if (isNull1)
- {
- if (isNull2)
- compare = 0; /* NULL "=" NULL */
- else if (ssup->ssup_nulls_first)
- compare = -1; /* NULL "<" NOT_NULL */
- else
- compare = 1; /* NULL ">" NOT_NULL */
- }
- else if (isNull2)
- {
- if (ssup->ssup_nulls_first)
- compare = 1; /* NOT_NULL ">" NULL */
- else
- compare = -1; /* NOT_NULL "<" NULL */
- }
- else
- {
- compare = DatumGetInt32(datum1) < DatumGetInt32(datum2) ? -1 :
- DatumGetInt32(datum1) > DatumGetInt32(datum2) ? 1 : 0;
- if (ssup->ssup_reverse)
- INVERT_COMPARE_RESULT(compare);
- }
-
- return compare;
-}
-
/*
* Apply a sort comparator function and return a 3-way comparison using full,
* authoritative comparator. This takes care of handling reverse-sort and
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 13f49af9ed4..81e5933708e 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -25,35 +25,35 @@
extern void InitCatalogCache(void);
extern void InitCatalogCachePhase2(void);
-extern HeapTuple SearchSysCache(int cacheId,
+extern HeapTuple SearchSysCache(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4);
/*
* The use of argument specific numbers is encouraged. They're faster, and
* insulates the caller from changes in the maximum number of keys.
*/
-extern HeapTuple SearchSysCache1(int cacheId,
+extern HeapTuple SearchSysCache1(SysCacheIdentifier cacheId,
Datum key1);
-extern HeapTuple SearchSysCache2(int cacheId,
+extern HeapTuple SearchSysCache2(SysCacheIdentifier cacheId,
Datum key1, Datum key2);
-extern HeapTuple SearchSysCache3(int cacheId,
+extern HeapTuple SearchSysCache3(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3);
-extern HeapTuple SearchSysCache4(int cacheId,
+extern HeapTuple SearchSysCache4(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4);
extern void ReleaseSysCache(HeapTuple tuple);
-extern HeapTuple SearchSysCacheLocked1(int cacheId,
+extern HeapTuple SearchSysCacheLocked1(SysCacheIdentifier cacheId,
Datum key1);
/* convenience routines */
-extern HeapTuple SearchSysCacheCopy(int cacheId,
+extern HeapTuple SearchSysCacheCopy(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4);
-extern HeapTuple SearchSysCacheLockedCopy1(int cacheId,
+extern HeapTuple SearchSysCacheLockedCopy1(SysCacheIdentifier cacheId,
Datum key1);
-extern bool SearchSysCacheExists(int cacheId,
+extern bool SearchSysCacheExists(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4);
-extern Oid GetSysCacheOid(int cacheId, AttrNumber oidcol,
+extern Oid GetSysCacheOid(SysCacheIdentifier cacheId, AttrNumber oidcol,
Datum key1, Datum key2, Datum key3, Datum key4);
extern HeapTuple SearchSysCacheAttName(Oid relid, const char *attname);
@@ -63,21 +63,21 @@ extern bool SearchSysCacheExistsAttName(Oid relid, const char *attname);
extern HeapTuple SearchSysCacheAttNum(Oid relid, int16 attnum);
extern HeapTuple SearchSysCacheCopyAttNum(Oid relid, int16 attnum);
-extern Datum SysCacheGetAttr(int cacheId, HeapTuple tup,
+extern Datum SysCacheGetAttr(SysCacheIdentifier cacheId, HeapTuple tup,
AttrNumber attributeNumber, bool *isNull);
-extern Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup,
+extern Datum SysCacheGetAttrNotNull(SysCacheIdentifier cacheId, HeapTuple tup,
AttrNumber attributeNumber);
-extern uint32 GetSysCacheHashValue(int cacheId,
+extern uint32 GetSysCacheHashValue(SysCacheIdentifier cacheId,
Datum key1, Datum key2, Datum key3, Datum key4);
/* list-search interface. Users of this must import catcache.h too */
struct catclist;
-extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
+extern struct catclist *SearchSysCacheList(SysCacheIdentifier cacheId, int nkeys,
Datum key1, Datum key2, Datum key3);
-extern void SysCacheInvalidate(int cacheId, uint32 hashValue);
+extern void SysCacheInvalidate(SysCacheIdentifier cacheId, uint32 hashValue);
extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
extern bool RelationHasSysCache(Oid relid);
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 5fe229e211b..da68f45acf2 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -116,6 +116,7 @@ typedef struct
void *tuple; /* the tuple itself */
Datum datum1; /* value of first key column */
bool isnull1; /* is first key column NULL? */
+ uint8 curbyte; /* chunk of datum1 for current radix sort pass */
int srctape; /* source tape number */
} SortTuple;
diff --git a/src/include/utils/varbit.h b/src/include/utils/varbit.h
index 82be976f5c5..20cb14d75b0 100644
--- a/src/include/utils/varbit.h
+++ b/src/include/utils/varbit.h
@@ -20,7 +20,7 @@
#include "fmgr.h"
/*
- * Modeled on struct varlena from c.h, but data type is bits8.
+ * Modeled on varlena from c.h, but data type is bits8.
*
* Caution: if bit_len is not a multiple of BITS_PER_BYTE, the low-order
* bits of the last byte of bit_dat[] are unused and MUST be zeroes.
diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h
index 03acb255449..023fdeb4531 100644
--- a/src/include/utils/xml.h
+++ b/src/include/utils/xml.h
@@ -20,7 +20,7 @@
#include "nodes/execnodes.h"
#include "nodes/primnodes.h"
-typedef struct varlena xmltype;
+typedef varlena xmltype;
typedef enum
{
diff --git a/src/include/varatt.h b/src/include/varatt.h
index fd7d5912f7d..000bdf33b92 100644
--- a/src/include/varatt.h
+++ b/src/include/varatt.h
@@ -16,7 +16,7 @@
#define VARATT_H
/*
- * struct varatt_external is a traditional "TOAST pointer", that is, the
+ * varatt_external is a traditional "TOAST pointer", that is, the
* information needed to fetch a Datum stored out-of-line in a TOAST table.
* The data is compressed if and only if the external size stored in
* va_extinfo is less than va_rawsize - VARHDRSZ.
@@ -36,7 +36,7 @@ typedef struct varatt_external
* compression method */
Oid va_valueid; /* Unique ID of value within TOAST table */
Oid va_toastrelid; /* RelID of TOAST table containing it */
-} varatt_external;
+} varatt_external;
/*
* These macros define the "saved size" portion of va_extinfo. Its remaining
@@ -46,27 +46,27 @@ typedef struct varatt_external
#define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1)
/*
- * struct varatt_indirect is a "TOAST pointer" representing an out-of-line
+ * varatt_indirect is a "TOAST pointer" representing an out-of-line
* Datum that's stored in memory, not in an external toast relation.
* The creator of such a Datum is entirely responsible that the referenced
* storage survives for as long as referencing pointer Datums can exist.
*
- * Note that just as for struct varatt_external, this struct is stored
+ * Note that just as for varatt_external, this struct is stored
* unaligned within any containing tuple.
*/
typedef struct varatt_indirect
{
- struct varlena *pointer; /* Pointer to in-memory varlena */
-} varatt_indirect;
+ varlena *pointer; /* Pointer to in-memory varlena */
+} varatt_indirect;
/*
- * struct varatt_expanded is a "TOAST pointer" representing an out-of-line
+ * varatt_expanded is a "TOAST pointer" representing an out-of-line
* Datum that is stored in memory, in some type-specific, not necessarily
* physically contiguous format that is convenient for computation not
* storage. APIs for this, in particular the definition of struct
* ExpandedObjectHeader, are in src/include/utils/expandeddatum.h.
*
- * Note that just as for struct varatt_external, this struct is stored
+ * Note that just as for varatt_external, this struct is stored
* unaligned within any containing tuple.
*/
typedef struct ExpandedObjectHeader ExpandedObjectHeader;
@@ -502,15 +502,15 @@ VARDATA_COMPRESSED_GET_COMPRESS_METHOD(const void *PTR)
return ((const varattrib_4b *) PTR)->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS;
}
-/* Same for external Datums; but note argument is a struct varatt_external */
+/* Same for external Datums; but note argument is a varatt_external */
static inline Size
-VARATT_EXTERNAL_GET_EXTSIZE(struct varatt_external toast_pointer)
+VARATT_EXTERNAL_GET_EXTSIZE(varatt_external toast_pointer)
{
return toast_pointer.va_extinfo & VARLENA_EXTSIZE_MASK;
}
static inline uint32
-VARATT_EXTERNAL_GET_COMPRESS_METHOD(struct varatt_external toast_pointer)
+VARATT_EXTERNAL_GET_COMPRESS_METHOD(varatt_external toast_pointer)
{
return toast_pointer.va_extinfo >> VARLENA_EXTSIZE_BITS;
}
@@ -533,7 +533,7 @@ VARATT_EXTERNAL_GET_COMPRESS_METHOD(struct varatt_external toast_pointer)
* actually saves space, so we expect either equality or less-than.
*/
static inline bool
-VARATT_EXTERNAL_IS_COMPRESSED(struct varatt_external toast_pointer)
+VARATT_EXTERNAL_IS_COMPRESSED(varatt_external toast_pointer)
{
return VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) <
(Size) (toast_pointer.va_rawsize - VARHDRSZ);
diff --git a/src/interfaces/ecpg/pgtypeslib/interval.c b/src/interfaces/ecpg/pgtypeslib/interval.c
index 936a6883816..e452a088f9e 100644
--- a/src/interfaces/ecpg/pgtypeslib/interval.c
+++ b/src/interfaces/ecpg/pgtypeslib/interval.c
@@ -184,7 +184,7 @@ DecodeISO8601Interval(char *str,
continue;
}
/* Else fall through to extended alternative format */
- /* FALLTHROUGH */
+ pg_fallthrough;
case '-': /* ISO 8601 4.4.3.3 Alternative Format,
* Extended */
if (havefield)
@@ -263,7 +263,7 @@ DecodeISO8601Interval(char *str,
return 0;
}
/* Else fall through to extended alternative format */
- /* FALLTHROUGH */
+ pg_fallthrough;
case ':': /* ISO 8601 4.4.3.3 Alternative Format,
* Extended */
if (havefield)
@@ -391,7 +391,7 @@ DecodeInterval(char **field, int *ftype, int nf, /* int range, */
tmask = DTK_M(TZ);
break;
}
- /* FALL THROUGH */
+ pg_fallthrough;
case DTK_DATE:
case DTK_NUMBER:
diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c
index 103428033ef..90bbb2eba1f 100644
--- a/src/interfaces/libpq/fe-protocol3.c
+++ b/src/interfaces/libpq/fe-protocol3.c
@@ -1451,7 +1451,19 @@ pqGetNegotiateProtocolVersion3(PGconn *conn)
if (pqGetInt(&num, 4, conn) != 0)
goto eof;
- /* Check the protocol version */
+ /*
+ * Check the protocol version.
+ *
+ * PG_PROTOCOL_GREASE is intentionally unsupported and reserved. It's
+ * higher than any real version, so check for that first, to get the most
+ * specific error message. Then check the upper and lower bounds.
+ */
+ if (their_version == PG_PROTOCOL_GREASE)
+ {
+ libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requested \"grease\" protocol version 3.9999");
+ goto failure;
+ }
+
if (their_version > conn->pversion)
{
libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requested downgrade to a higher-numbered version");
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 399fe7adf62..31d5b48d3f9 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -379,7 +379,7 @@ pqsecure_raw_write(PGconn *conn, const void *ptr, size_t len)
/* Set flag for EPIPE */
REMEMBER_EPIPE(spinfo, true);
- /* FALL THRU */
+ pg_fallthrough;
case ECONNRESET:
conn->write_failed = true;
diff --git a/src/interfaces/libpq/libpq_check.pl b/src/interfaces/libpq/libpq_check.pl
index 8a2e29b2d9a..833f5315c3c 100755
--- a/src/interfaces/libpq/libpq_check.pl
+++ b/src/interfaces/libpq/libpq_check.pl
@@ -31,12 +31,9 @@
sub create_stamp_file
{
- if (!(-f $stamp_file))
- {
- open my $fh, '>', $stamp_file
- or die "can't open $stamp_file: $!";
- close $fh;
- }
+ open my $fh, '>', $stamp_file
+ or die "can't open $stamp_file: $!";
+ close $fh;
}
# Skip on Windows and Solaris
diff --git a/src/pl/plpgsql/src/expected/plpgsql_domain.out b/src/pl/plpgsql/src/expected/plpgsql_domain.out
index 516c2b9e08e..11c012ea024 100644
--- a/src/pl/plpgsql/src/expected/plpgsql_domain.out
+++ b/src/pl/plpgsql/src/expected/plpgsql_domain.out
@@ -395,3 +395,16 @@ SELECT * FROM test_assign_ordered_named_pairs(1,2,0); -- should fail someday
{"(1,2)"}
(1 row)
+CREATE FUNCTION test_null_ordered_named_pair()
+ RETURNS ordered_named_pair AS $$
+declare v ordered_named_pair;
+begin
+return v;
+end
+$$ LANGUAGE plpgsql;
+SELECT * FROM test_null_ordered_named_pair();
+ i | j
+---+---
+ |
+(1 row)
+
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 75325117ec9..84552e32c87 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -3230,7 +3230,7 @@ exec_stmt_return(PLpgSQL_execstate *estate, PLpgSQL_stmt_return *stmt)
/* fulfill promise if needed, then handle like regular var */
plpgsql_fulfill_promise(estate, (PLpgSQL_var *) retvar);
- /* FALL THRU */
+ pg_fallthrough;
case PLPGSQL_DTYPE_VAR:
{
@@ -3255,28 +3255,14 @@ exec_stmt_return(PLpgSQL_execstate *estate, PLpgSQL_stmt_return *stmt)
}
break;
- case PLPGSQL_DTYPE_REC:
- {
- PLpgSQL_rec *rec = (PLpgSQL_rec *) retvar;
-
- /* If record is empty, we return NULL not a row of nulls */
- if (rec->erh && !ExpandedRecordIsEmpty(rec->erh))
- {
- estate->retval = ExpandedRecordGetDatum(rec->erh);
- estate->retisnull = false;
- estate->rettype = rec->rectypeid;
- }
- }
- break;
-
case PLPGSQL_DTYPE_ROW:
+ case PLPGSQL_DTYPE_REC:
{
- PLpgSQL_row *row = (PLpgSQL_row *) retvar;
+ /* exec_eval_datum can handle these cases */
int32 rettypmod;
- /* We get here if there are multiple OUT parameters */
exec_eval_datum(estate,
- (PLpgSQL_datum *) row,
+ retvar,
&estate->rettype,
&rettypmod,
&estate->retval,
@@ -3376,7 +3362,7 @@ exec_stmt_return_next(PLpgSQL_execstate *estate,
/* fulfill promise if needed, then handle like regular var */
plpgsql_fulfill_promise(estate, (PLpgSQL_var *) retvar);
- /* FALL THRU */
+ pg_fallthrough;
case PLPGSQL_DTYPE_VAR:
{
@@ -5313,7 +5299,7 @@ exec_eval_datum(PLpgSQL_execstate *estate,
/* fulfill promise if needed, then handle like regular var */
plpgsql_fulfill_promise(estate, (PLpgSQL_var *) datum);
- /* FALL THRU */
+ pg_fallthrough;
case PLPGSQL_DTYPE_VAR:
{
@@ -8818,7 +8804,7 @@ assign_simple_var(PLpgSQL_execstate *estate, PLpgSQL_var *var,
* pain, but there's little choice.
*/
oldcxt = MemoryContextSwitchTo(get_eval_mcontext(estate));
- detoasted = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newvalue)));
+ detoasted = PointerGetDatum(detoast_external_attr((varlena *) DatumGetPointer(newvalue)));
MemoryContextSwitchTo(oldcxt);
/* Now's a good time to not leak the input value if it's freeable */
if (freeable)
diff --git a/src/pl/plpgsql/src/sql/plpgsql_domain.sql b/src/pl/plpgsql/src/sql/plpgsql_domain.sql
index 8f99aae5a9f..4c5dd7dc707 100644
--- a/src/pl/plpgsql/src/sql/plpgsql_domain.sql
+++ b/src/pl/plpgsql/src/sql/plpgsql_domain.sql
@@ -277,3 +277,13 @@ $$ LANGUAGE plpgsql;
SELECT * FROM test_assign_ordered_named_pairs(1,2,3);
SELECT * FROM test_assign_ordered_named_pairs(2,1,3);
SELECT * FROM test_assign_ordered_named_pairs(1,2,0); -- should fail someday
+
+CREATE FUNCTION test_null_ordered_named_pair()
+ RETURNS ordered_named_pair AS $$
+declare v ordered_named_pair;
+begin
+return v;
+end
+$$ LANGUAGE plpgsql;
+
+SELECT * FROM test_null_ordered_named_pair();
diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c
index 1f69109b081..44055de6aeb 100644
--- a/src/pl/plpython/plpy_typeio.c
+++ b/src/pl/plpython/plpy_typeio.c
@@ -735,6 +735,7 @@ PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim,
char *dataptr = *dataptr_p;
bits8 *bitmap = *bitmap_p;
int bitmask = *bitmask_p;
+ uint8 typalignby = typalign_to_alignby(elm->typalign);
for (i = 0; i < dims[dim]; i++)
{
@@ -751,7 +752,7 @@ PLyList_FromArray_recurse(PLyDatumToOb *elm, int *dims, int ndim, int dim,
itemvalue = fetch_att(dataptr, elm->typbyval, elm->typlen);
PyList_SetItem(list, i, elm->func(elm, itemvalue));
dataptr = att_addlength_pointer(dataptr, elm->typlen, dataptr);
- dataptr = (char *) att_align_nominal(dataptr, elm->typalign);
+ dataptr = (char *) att_nominal_alignby(dataptr, typalignby);
}
/* advance bitmap pointer if any */
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 187698ccdd2..b7318f7261e 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -2545,7 +2545,7 @@ pltcl_process_SPI_result(Tcl_Interp *interp,
break;
}
/* fall through for utility returning tuples */
- /* FALLTHROUGH */
+ pg_fallthrough;
case SPI_OK_SELECT:
case SPI_OK_INSERT_RETURNING:
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index ffda75825e5..49b130f1306 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -96,56 +96,6 @@ const uint8 pg_number_of_ones[256] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
-/*
- * pg_popcount32_portable
- * Return the number of 1 bits set in word
- */
-int
-pg_popcount32_portable(uint32 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
- return __builtin_popcount(word);
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
-}
-
-/*
- * pg_popcount64_portable
- * Return the number of 1 bits set in word
- */
-int
-pg_popcount64_portable(uint64 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
-#if SIZEOF_LONG == 8
- return __builtin_popcountl(word);
-#elif SIZEOF_LONG_LONG == 8
- return __builtin_popcountll(word);
-#else
-#error "cannot find integer of the same size as uint64_t"
-#endif
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
-}
-
/*
* pg_popcount_portable
* Returns the number of 1-bits in buf
@@ -163,24 +113,10 @@ pg_popcount_portable(const char *buf, int bytes)
while (bytes >= 8)
{
- popcnt += pg_popcount64_portable(*words++);
+ popcnt += pg_popcount64(*words++);
bytes -= 8;
}
- buf = (const char *) words;
- }
-#else
- /* Process in 32-bit chunks if the buffer is aligned. */
- if (buf == (const char *) TYPEALIGN(4, buf))
- {
- const uint32 *words = (const uint32 *) buf;
-
- while (bytes >= 4)
- {
- popcnt += pg_popcount32_portable(*words++);
- bytes -= 4;
- }
-
buf = (const char *) words;
}
#endif
@@ -211,26 +147,10 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
while (bytes >= 8)
{
- popcnt += pg_popcount64_portable(*words++ & maskv);
+ popcnt += pg_popcount64(*words++ & maskv);
bytes -= 8;
}
- buf = (const char *) words;
- }
-#else
- /* Process in 32-bit chunks if the buffer is aligned. */
- uint32 maskv = ~((uint32) 0) / 0xFF * mask;
-
- if (buf == (const char *) TYPEALIGN(4, buf))
- {
- const uint32 *words = (const uint32 *) buf;
-
- while (bytes >= 4)
- {
- popcnt += pg_popcount32_portable(*words++ & maskv);
- bytes -= 4;
- }
-
buf = (const char *) words;
}
#endif
@@ -250,17 +170,6 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
* actual external functions. The compiler should be able to inline the
* portable versions here.
*/
-int
-pg_popcount32(uint32 word)
-{
- return pg_popcount32_portable(word);
-}
-
-int
-pg_popcount64(uint64 word)
-{
- return pg_popcount64_portable(word);
-}
/*
* pg_popcount_optimized
diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c
index d574a686b42..8954669273a 100644
--- a/src/port/pg_numa.c
+++ b/src/port/pg_numa.c
@@ -87,7 +87,9 @@ pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
unsigned long count_chunk = Min(count - next,
NUMA_QUERY_CHUNK_SIZE);
+#ifndef FRONTEND
CHECK_FOR_INTERRUPTS();
+#endif
/*
* Bail out if any of the chunks errors out (ret<0). We ignore (ret>0)
diff --git a/src/port/pg_popcount_aarch64.c b/src/port/pg_popcount_aarch64.c
index ba57f2cd4bd..f474ef45510 100644
--- a/src/port/pg_popcount_aarch64.c
+++ b/src/port/pg_popcount_aarch64.c
@@ -292,21 +292,11 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
#endif /* ! USE_SVE_POPCNT_WITH_RUNTIME_CHECK */
/*
- * pg_popcount32
+ * pg_popcount64_neon
* Return number of 1 bits in word
*/
-int
-pg_popcount32(uint32 word)
-{
- return pg_popcount64((uint64) word);
-}
-
-/*
- * pg_popcount64
- * Return number of 1 bits in word
- */
-int
-pg_popcount64(uint64 word)
+static inline int
+pg_popcount64_neon(uint64 word)
{
/*
* For some compilers, __builtin_popcountl() already emits Neon
@@ -383,7 +373,7 @@ pg_popcount_neon(const char *buf, int bytes)
*/
for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
{
- popcnt += pg_popcount64(*((const uint64 *) buf));
+ popcnt += pg_popcount64_neon(*((const uint64 *) buf));
buf += sizeof(uint64);
}
@@ -465,7 +455,7 @@ pg_popcount_masked_neon(const char *buf, int bytes, bits8 mask)
*/
for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
{
- popcnt += pg_popcount64(*((const uint64 *) buf) & mask64);
+ popcnt += pg_popcount64_neon(*((const uint64 *) buf) & mask64);
buf += sizeof(uint64);
}
diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c
index 245f0167d00..6bce089432f 100644
--- a/src/port/pg_popcount_x86.c
+++ b/src/port/pg_popcount_x86.c
@@ -36,8 +36,6 @@
* operation, but in practice this is close enough, and "sse42" seems easier to
* follow than "popcnt" for these names.
*/
-static inline int pg_popcount32_sse42(uint32 word);
-static inline int pg_popcount64_sse42(uint64 word);
static uint64 pg_popcount_sse42(const char *buf, int bytes);
static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask);
@@ -55,12 +53,8 @@ static uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
* what the current CPU supports) and then will call the pointer to fulfill the
* caller's request.
*/
-static int pg_popcount32_choose(uint32 word);
-static int pg_popcount64_choose(uint64 word);
static uint64 pg_popcount_choose(const char *buf, int bytes);
static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
-int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
-int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
@@ -157,7 +151,7 @@ pg_popcount_avx512_available(void)
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
/*
- * These functions get called on the first call to pg_popcount32 etc.
+ * These functions get called on the first call to pg_popcount(), etc.
* They detect whether we can use the asm implementations, and replace
* the function pointers so that subsequent calls are routed directly to
* the chosen implementation.
@@ -167,15 +161,11 @@ choose_popcount_functions(void)
{
if (pg_popcount_sse42_available())
{
- pg_popcount32 = pg_popcount32_sse42;
- pg_popcount64 = pg_popcount64_sse42;
pg_popcount_optimized = pg_popcount_sse42;
pg_popcount_masked_optimized = pg_popcount_masked_sse42;
}
else
{
- pg_popcount32 = pg_popcount32_portable;
- pg_popcount64 = pg_popcount64_portable;
pg_popcount_optimized = pg_popcount_portable;
pg_popcount_masked_optimized = pg_popcount_masked_portable;
}
@@ -189,20 +179,6 @@ choose_popcount_functions(void)
#endif
}
-static int
-pg_popcount32_choose(uint32 word)
-{
- choose_popcount_functions();
- return pg_popcount32(word);
-}
-
-static int
-pg_popcount64_choose(uint64 word)
-{
- choose_popcount_functions();
- return pg_popcount64(word);
-}
-
static uint64
pg_popcount_choose(const char *buf, int bytes)
{
@@ -338,23 +314,6 @@ pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
-/*
- * pg_popcount32_sse42
- * Return the number of 1 bits set in word
- */
-static inline int
-pg_popcount32_sse42(uint32 word)
-{
-#ifdef _MSC_VER
- return __popcnt(word);
-#else
- uint32 res;
-
-__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
- return (int) res;
-#endif
-}
-
/*
* pg_popcount64_sse42
* Return the number of 1 bits set in word
@@ -376,40 +335,20 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
* pg_popcount_sse42
* Returns the number of 1-bits in buf
*/
+pg_attribute_no_sanitize_alignment()
static uint64
pg_popcount_sse42(const char *buf, int bytes)
{
uint64 popcnt = 0;
+ const uint64 *words = (const uint64 *) buf;
-#if SIZEOF_VOID_P >= 8
- /* Process in 64-bit chunks if the buffer is aligned. */
- if (buf == (const char *) TYPEALIGN(8, buf))
+ while (bytes >= 8)
{
- const uint64 *words = (const uint64 *) buf;
-
- while (bytes >= 8)
- {
- popcnt += pg_popcount64_sse42(*words++);
- bytes -= 8;
- }
-
- buf = (const char *) words;
+ popcnt += pg_popcount64_sse42(*words++);
+ bytes -= 8;
}
-#else
- /* Process in 32-bit chunks if the buffer is aligned. */
- if (buf == (const char *) TYPEALIGN(4, buf))
- {
- const uint32 *words = (const uint32 *) buf;
- while (bytes >= 4)
- {
- popcnt += pg_popcount32_sse42(*words++);
- bytes -= 4;
- }
-
- buf = (const char *) words;
- }
-#endif
+ buf = (const char *) words;
/* Process any remaining bytes */
while (bytes--)
@@ -422,44 +361,21 @@ pg_popcount_sse42(const char *buf, int bytes)
* pg_popcount_masked_sse42
* Returns the number of 1-bits in buf after applying the mask to each byte
*/
+pg_attribute_no_sanitize_alignment()
static uint64
pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask)
{
uint64 popcnt = 0;
-
-#if SIZEOF_VOID_P >= 8
- /* Process in 64-bit chunks if the buffer is aligned */
uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
+ const uint64 *words = (const uint64 *) buf;
- if (buf == (const char *) TYPEALIGN(8, buf))
+ while (bytes >= 8)
{
- const uint64 *words = (const uint64 *) buf;
-
- while (bytes >= 8)
- {
- popcnt += pg_popcount64_sse42(*words++ & maskv);
- bytes -= 8;
- }
-
- buf = (const char *) words;
+ popcnt += pg_popcount64_sse42(*words++ & maskv);
+ bytes -= 8;
}
-#else
- /* Process in 32-bit chunks if the buffer is aligned. */
- uint32 maskv = ~((uint32) 0) / 0xFF * mask;
-
- if (buf == (const char *) TYPEALIGN(4, buf))
- {
- const uint32 *words = (const uint32 *) buf;
- while (bytes >= 4)
- {
- popcnt += pg_popcount32_sse42(*words++ & maskv);
- bytes -= 4;
- }
-
- buf = (const char *) words;
- }
-#endif
+ buf = (const char *) words;
/* Process any remaining bytes */
while (bytes--)
diff --git a/src/port/snprintf.c b/src/port/snprintf.c
index 56c7036753c..5deee44d3a2 100644
--- a/src/port/snprintf.c
+++ b/src/port/snprintf.c
@@ -462,7 +462,7 @@ dopr(PrintfTarget *target, const char *format, va_list args)
/* set zero padding if no nonzero digits yet */
if (accum == 0 && !pointflag)
zpad = '0';
- /* FALL THRU */
+ pg_fallthrough;
case '1':
case '2':
case '3':
diff --git a/src/test/authentication/t/001_password.pl b/src/test/authentication/t/001_password.pl
index f4d65ba7bae..0ec9aa9f4e8 100644
--- a/src/test/authentication/t/001_password.pl
+++ b/src/test/authentication/t/001_password.pl
@@ -68,8 +68,24 @@ sub test_conn
$node->append_conf('postgresql.conf', "log_connections = on\n");
# Needed to allow connect_fails to inspect postmaster log:
$node->append_conf('postgresql.conf', "log_min_messages = debug2");
+$node->append_conf('postgresql.conf', "password_expiration_warning_threshold = '1100d'");
$node->start;
+# Set up roles for password_expiration_warning_threshold test
+my $current_year = 1900 + ${ [ localtime(time) ] }[5];
+my $expire_year = $current_year - 1;
+$node->safe_psql(
+ 'postgres',
+ "CREATE ROLE expired LOGIN VALID UNTIL '$expire_year-01-01' PASSWORD 'pass'");
+$expire_year = $current_year + 2;
+$node->safe_psql(
+ 'postgres',
+ "CREATE ROLE expiration_warnings LOGIN VALID UNTIL '$expire_year-01-01' PASSWORD 'pass'");
+$expire_year = $current_year + 5;
+$node->safe_psql(
+ 'postgres',
+ "CREATE ROLE no_warnings LOGIN VALID UNTIL '$expire_year-01-01' PASSWORD 'pass'");
+
# Test behavior of log_connections GUC
#
# There wasn't another test file where these tests obviously fit, and we don't
@@ -531,6 +547,24 @@ sub test_conn
qr/authentication method requirement "!password,!md5,!scram-sha-256" failed: server requested SCRAM-SHA-256 authentication/
);
+# Test password_expiration_warning_threshold
+$node->connect_fails(
+ "user=expired dbname=postgres",
+ "connection fails due to expired password",
+ expected_stderr =>
+ qr/password authentication failed for user "expired"/
+);
+$node->connect_ok(
+ "user=expiration_warnings dbname=postgres",
+ "connection succeeds with password expiration warning",
+ expected_stderr =>
+ qr/role password will expire soon/
+);
+$node->connect_ok(
+ "user=no_warnings dbname=postgres",
+ "connection succeeds with no password expiration warning"
+);
+
# Test SYSTEM_USER <> NULL with parallel workers.
$node->safe_psql(
'postgres',
diff --git a/src/test/isolation/expected/insert-conflict-do-select.out b/src/test/isolation/expected/insert-conflict-do-select.out
new file mode 100644
index 00000000000..bccfd47dcfb
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-select.out
@@ -0,0 +1,138 @@
+Parsed test spec with 2 sessions
+
+starting permutation: insert1 insert2 c1 select2 c2
+step insert1: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c1: COMMIT;
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
+
+starting permutation: insert1_update insert2_update c1 select2 c2
+step insert1_update: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2_update: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+step c1: COMMIT;
+step insert2_update: <... completed>
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
+
+starting permutation: insert1_update insert2_update a1 select2 c2
+step insert1_update: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2_update: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+step a1: ABORT;
+step insert2_update: <... completed>
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
+
+starting permutation: insert1_keyshare insert2_update c1 select2 c2
+step insert1_keyshare: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR KEY SHARE RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2_update: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+step c1: COMMIT;
+step insert2_update: <... completed>
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
+
+starting permutation: insert1_share insert2_update c1 select2 c2
+step insert1_share: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR SHARE RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2_update: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+step c1: COMMIT;
+step insert2_update: <... completed>
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
+
+starting permutation: insert1_nokeyupd insert2_update c1 select2 c2
+step insert1_nokeyupd: INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR NO KEY UPDATE RETURNING *;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step insert2_update: INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *;
+step c1: COMMIT;
+step insert2_update: <... completed>
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step select2: SELECT * FROM doselect;
+key|val
+---+--------
+ 1|original
+(1 row)
+
+step c2: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 6a4d3532e03..4e466580cd4 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -54,6 +54,7 @@ test: insert-conflict-do-update-2
test: insert-conflict-do-update-3
test: insert-conflict-do-update-4
test: insert-conflict-specconflict
+test: insert-conflict-do-select
test: merge-insert-update
test: merge-delete
test: merge-update
diff --git a/src/test/isolation/specs/insert-conflict-do-select.spec b/src/test/isolation/specs/insert-conflict-do-select.spec
new file mode 100644
index 00000000000..dcfd9f8cb53
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-select.spec
@@ -0,0 +1,53 @@
+# INSERT...ON CONFLICT DO SELECT test
+#
+# This test verifies locking behavior of ON CONFLICT DO SELECT with different
+# lock strengths: no lock, FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, and
+# FOR UPDATE.
+
+setup
+{
+ CREATE TABLE doselect (key int primary key, val text);
+ INSERT INTO doselect VALUES (1, 'original');
+}
+
+teardown
+{
+ DROP TABLE doselect;
+}
+
+session s1
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step insert1 { INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT RETURNING *; }
+step insert1_keyshare { INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR KEY SHARE RETURNING *; }
+step insert1_share { INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR SHARE RETURNING *; }
+step insert1_nokeyupd { INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR NO KEY UPDATE RETURNING *; }
+step insert1_update { INSERT INTO doselect(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *; }
+step c1 { COMMIT; }
+step a1 { ABORT; }
+
+session s2
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step insert2 { INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT RETURNING *; }
+step insert2_update { INSERT INTO doselect(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO SELECT FOR UPDATE RETURNING *; }
+step select2 { SELECT * FROM doselect; }
+step c2 { COMMIT; }
+
+# Test 1: DO SELECT without locking - should not block
+permutation insert1 insert2 c1 select2 c2
+
+# Test 2: DO SELECT FOR UPDATE - should block until first transaction commits
+permutation insert1_update insert2_update c1 select2 c2
+
+# Test 3: DO SELECT FOR UPDATE - should unblock when first transaction aborts
+permutation insert1_update insert2_update a1 select2 c2
+
+# Test 4: Different lock strengths all properly acquire locks
+permutation insert1_keyshare insert2_update c1 select2 c2
+permutation insert1_share insert2_update c1 select2 c2
+permutation insert1_nokeyupd insert2_update c1 select2 c2
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 44c7163c1cd..e8c31ec8e74 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -38,6 +38,7 @@ SUBDIRS = \
test_oat_hooks \
test_parser \
test_pg_dump \
+ test_plan_advice \
test_predtest \
test_radixtree \
test_rbtree \
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 2634a519935..6998a226fa7 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -39,6 +39,7 @@ subdir('test_misc')
subdir('test_oat_hooks')
subdir('test_parser')
subdir('test_pg_dump')
+subdir('test_plan_advice')
subdir('test_predtest')
subdir('test_radixtree')
subdir('test_rbtree')
diff --git a/src/test/modules/test_cplusplusext/test_cplusplusext.cpp b/src/test/modules/test_cplusplusext/test_cplusplusext.cpp
index 435937c00d2..8c2eabcca43 100644
--- a/src/test/modules/test_cplusplusext/test_cplusplusext.cpp
+++ b/src/test/modules/test_cplusplusext/test_cplusplusext.cpp
@@ -17,12 +17,16 @@
extern "C" {
#include "postgres.h"
#include "fmgr.h"
+#include "nodes/pg_list.h"
+#include "nodes/primnodes.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(test_cplusplus_add);
}
+StaticAssertDecl(sizeof(int32) == 4, "int32 should be 4 bytes");
+
/*
* Simple function that returns the sum of two integers. This verifies that
* C++ extension modules can be loaded and called correctly at runtime.
@@ -32,6 +36,24 @@ test_cplusplus_add(PG_FUNCTION_ARGS)
{
int32 a = PG_GETARG_INT32(0);
int32 b = PG_GETARG_INT32(1);
+ RangeTblRef *node = makeNode(RangeTblRef);
+ List *list = list_make1(node);
+
+ foreach_ptr(RangeTblRef, rtr, list)
+ {
+ (void) rtr;
+ }
+
+ foreach_node(RangeTblRef, rtr, list)
+ {
+ (void) rtr;
+ }
+
+ StaticAssertStmt(sizeof(int32) == 4, "int32 should be 4 bytes");
+ (void) StaticAssertExpr(sizeof(int64) == 8, "int64 should be 8 bytes");
+
+ list_free(list);
+ pfree(node);
PG_RETURN_INT32(a + b);
}
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c
index 6bc559f7bf8..8fbd180c861 100644
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -113,7 +113,7 @@ main(int argc, char **argv)
{
case 'r': /* chunk range */
run_chunk_ranges = true;
- /* fall through */
+ pg_fallthrough;
case 'c': /* chunk size */
chunk_size = strtou64(optarg, NULL, 10);
if (chunk_size > BUFSIZE)
diff --git a/src/test/modules/test_plan_advice/Makefile b/src/test/modules/test_plan_advice/Makefile
new file mode 100644
index 00000000000..be026ce34bf
--- /dev/null
+++ b/src/test/modules/test_plan_advice/Makefile
@@ -0,0 +1,28 @@
+# src/test/modules/test_plan_advice/Makefile
+
+PGFILEDESC = "test_plan_advice - test whether generated plan advice works"
+
+MODULE_big = test_plan_advice
+OBJS = \
+ $(WIN32RES) \
+ test_plan_advice.o
+
+EXTRA_INSTALL = contrib/pg_plan_advice
+
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_plan_advice
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+override CPPFLAGS += -I$(top_srcdir)/contrib/pg_plan_advice
+
+REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)
+export REGRESS_SHLIB
diff --git a/src/test/modules/test_plan_advice/meson.build b/src/test/modules/test_plan_advice/meson.build
new file mode 100644
index 00000000000..afde420baed
--- /dev/null
+++ b/src/test/modules/test_plan_advice/meson.build
@@ -0,0 +1,29 @@
+# Copyright (c) 2022-2026, PostgreSQL Global Development Group
+
+test_plan_advice_sources = files(
+ 'test_plan_advice.c',
+)
+
+if host_system == 'windows'
+ test_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'test_plan_advice',
+ '--FILEDESC', 'test_plan_advice - test whether generated plan advice works',])
+endif
+
+test_plan_advice = shared_module('test_plan_advice',
+ test_plan_advice_sources,
+ include_directories: pg_plan_advice_inc,
+ kwargs: pg_test_mod_args,
+)
+test_install_libs += test_plan_advice
+
+tests += {
+ 'name': 'test_plan_advice',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'tap': {
+ 'tests': [
+ 't/001_replan_regress.pl',
+ ],
+ },
+}
diff --git a/src/test/modules/test_plan_advice/t/001_replan_regress.pl b/src/test/modules/test_plan_advice/t/001_replan_regress.pl
new file mode 100644
index 00000000000..303210f13b8
--- /dev/null
+++ b/src/test/modules/test_plan_advice/t/001_replan_regress.pl
@@ -0,0 +1,65 @@
+# Copyright (c) 2021-2025, PostgreSQL Global Development Group
+
+# Run the core regression tests under pg_plan_advice to check for problems.
+use strict;
+use warnings FATAL => 'all';
+
+use Cwd qw(abs_path);
+use File::Basename qw(dirname);
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize the primary node
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init();
+
+# Set up our desired configuration.
+$node->append_conf('postgresql.conf', <start;
+
+my $srcdir = abs_path("../../../..");
+
+# --dlpath is needed to be able to find the location of regress.so
+# and any libraries the regression tests require.
+my $dlpath = dirname($ENV{REGRESS_SHLIB});
+
+# --outputdir points to the path where to place the output files.
+my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+# --inputdir points to the path of the input files.
+my $inputdir = "$srcdir/src/test/regress";
+
+# Run the tests.
+my $rc =
+ system($ENV{PG_REGRESS} . " "
+ . "--bindir= "
+ . "--dlpath=\"$dlpath\" "
+ . "--host=" . $node->host . " "
+ . "--port=" . $node->port . " "
+ . "--schedule=$srcdir/src/test/regress/parallel_schedule "
+ . "--max-concurrent-tests=20 "
+ . "--inputdir=\"$inputdir\" "
+ . "--outputdir=\"$outputdir\"");
+
+# Dump out the regression diffs file, if there is one
+if ($rc != 0)
+{
+ my $diffs = "$outputdir/regression.diffs";
+ if (-e $diffs)
+ {
+ print "=== dumping $diffs ===\n";
+ print slurp_file($diffs);
+ print "=== EOF ===\n";
+ }
+}
+
+# Report results
+is($rc, 0, 'regression tests pass');
+
+done_testing();
diff --git a/src/test/modules/test_plan_advice/test_plan_advice.c b/src/test/modules/test_plan_advice/test_plan_advice.c
new file mode 100644
index 00000000000..996675dc386
--- /dev/null
+++ b/src/test/modules/test_plan_advice/test_plan_advice.c
@@ -0,0 +1,143 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_plan_advice.c
+ * Test pg_plan_advice by planning every query with generated advice.
+ *
+ * With this module loaded, every time a query is executed, we end up
+ * planning it twice. The first time we plan it, we generate plan advice,
+ * which we then feed back to pg_plan_advice as the supplied plan advice.
+ * It is then planned a second time using that advice. This hopefully
+ * allows us to detect cases where the advice is incorrect or causes
+ * failures or plan changes for some reason.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * src/test/modules/test_plan_advice/test_plan_advice.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "fmgr.h"
+#include "optimizer/optimizer.h"
+#include "pg_plan_advice.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+static bool in_recursion = false;
+
+static char *test_plan_advice_advisor(PlannerGlobal *glob,
+ Query *parse,
+ const char *query_string,
+ int cursorOptions,
+ ExplainState *es);
+static DefElem *find_defelem_by_defname(List *deflist, char *defname);
+
+/*
+ * Initialize this module.
+ */
+void
+_PG_init(void)
+{
+ void *(*add_advisor_fn) (pg_plan_advice_advisor_hook hook);
+
+ /*
+ * Ask pg_plan_advice to get advice strings from test_plan_advice_advisor
+ */
+ add_advisor_fn =
+ load_external_function("pg_plan_advice", "pg_plan_advice_add_advisor",
+ true, NULL);
+
+ (*add_advisor_fn) (test_plan_advice_advisor);
+}
+
+/*
+ * Re-plan the given query and return the generated advice string as the
+ * supplied advice.
+ */
+static char *
+test_plan_advice_advisor(PlannerGlobal *glob, Query *parse,
+ const char *query_string, int cursorOptions,
+ ExplainState *es)
+{
+ PlannedStmt *pstmt;
+ int save_nestlevel = 0;
+ DefElem *pgpa_item;
+ DefElem *advice_string_item;
+
+ /*
+ * Since this function is called from the planner and triggers planning,
+ * we need a recursion guard.
+ */
+ if (in_recursion)
+ return NULL;
+
+ PG_TRY();
+ {
+ in_recursion = true;
+
+ /*
+ * Planning can trigger expression evaluation, which can result in
+ * sending NOTICE messages or other output to the client. To avoid
+ * that, we set client_min_messages = ERROR in the hopes of getting
+ * the same output with and without this module.
+ *
+ * We also need to set pg_plan_advice.always_store_advice_details so
+ * that pg_plan_advice will generate an advice string, since the whole
+ * point of this function is to get access to that.
+ */
+ save_nestlevel = NewGUCNestLevel();
+ set_config_option("client_min_messages", "error",
+ PGC_SUSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+ set_config_option("pg_plan_advice.always_store_advice_details", "true",
+ PGC_SUSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ /*
+ * Replan. We must copy the Query, because the planner modifies it.
+ * (As noted elsewhere, that's unfortunate; perhaps it will be fixed
+ * some day.)
+ */
+ pstmt = planner(copyObject(parse), query_string, cursorOptions,
+ glob->boundParams, es);
+ }
+ PG_FINALLY();
+ {
+ in_recursion = false;
+ }
+ PG_END_TRY();
+
+ /* Roll back any GUC changes */
+ if (save_nestlevel > 0)
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Extract and return the advice string */
+ pgpa_item = find_defelem_by_defname(pstmt->extension_state,
+ "pg_plan_advice");
+ if (pgpa_item == NULL)
+ elog(ERROR, "extension state for pg_plan_advice not found");
+ advice_string_item = find_defelem_by_defname((List *) pgpa_item->arg,
+ "advice_string");
+ if (advice_string_item == NULL)
+ elog(ERROR,
+ "advice string for pg_plan_advice not found in extension state");
+ return strVal(advice_string_item->arg);
+}
+
+/*
+ * Search a list of DefElem objects for a given defname.
+ */
+static DefElem *
+find_defelem_by_defname(List *deflist, char *defname)
+{
+ foreach_node(DefElem, item, deflist)
+ {
+ if (strcmp(item->defname, defname) == 0)
+ return item;
+ }
+
+ return NULL;
+}
diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c
index 070464a341e..4e97cde65a6 100644
--- a/src/test/modules/test_regex/test_regex.c
+++ b/src/test/modules/test_regex/test_regex.c
@@ -411,7 +411,8 @@ parse_test_flags(test_re_flags *flags, text *opts)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression test option: \"%.*s\"",
- pg_mblen(opt_p + i), opt_p + i)));
+ pg_mblen_range(opt_p + i, opt_p + opt_len),
+ opt_p + i)));
break;
}
}
diff --git a/src/test/modules/test_shm_mq/setup.c b/src/test/modules/test_shm_mq/setup.c
index ba2fd746d73..579e5933d28 100644
--- a/src/test/modules/test_shm_mq/setup.c
+++ b/src/test/modules/test_shm_mq/setup.c
@@ -228,6 +228,7 @@ setup_background_workers(int nworkers, dsm_segment *seg)
/* Register the workers. */
for (i = 0; i < nworkers; ++i)
{
+ snprintf(worker.bgw_name, BGW_MAXLEN, "test_shm_mq worker %d", i + 1);
if (!RegisterDynamicBackgroundWorker(&worker, &wstate->handle[i]))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
diff --git a/src/test/modules/test_shm_mq/worker.c b/src/test/modules/test_shm_mq/worker.c
index 368e4f3f234..6a4147554bb 100644
--- a/src/test/modules/test_shm_mq/worker.c
+++ b/src/test/modules/test_shm_mq/worker.c
@@ -54,13 +54,7 @@ test_shm_mq_main(Datum main_arg)
int myworkernumber;
PGPROC *registrant;
- /*
- * Establish signal handlers.
- *
- * We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as
- * it would a normal user backend. To make that happen, we use die().
- */
- pqsignal(SIGTERM, die);
+ /* Unblock signals. The standard signal handlers are OK for us. */
BackgroundWorkerUnblockSignals();
/*
diff --git a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm
index 5bd41a278dd..c6ff2dbde4c 100644
--- a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm
+++ b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm
@@ -155,11 +155,11 @@ sub wait_connect
#
# See query() for details about why/how the banner is used.
my $banner = "background_psql: ready";
- my $banner_match = qr/(^|\n)$banner\r?\n/;
- $self->{stdin} .= "\\echo $banner\n\\warn $banner\n";
+ my $banner_match = qr/$banner\r?\n/;
+ $self->{stdin} .= "\\echo '$banner'\n\\warn '$banner'\n";
$self->{run}->pump()
until ($self->{stdout} =~ /$banner_match/
- && $self->{stderr} =~ /$banner\r?\n/)
+ && $self->{stderr} =~ /$banner_match/)
|| $self->{timeout}->is_expired;
note "connect output:\n",
@@ -264,22 +264,17 @@ sub query
# stderr (or vice versa), even if psql printed them in the opposite
# order. We therefore wait on both.
#
- # We need to match for the newline, because we try to remove it below, and
- # it's possible to consume just the input *without* the newline. In
- # interactive psql we emit \r\n, so we need to allow for that. Also need
- # to be careful that we don't e.g. match the echoed \echo command, rather
- # than its output.
+ # In interactive psql we emit \r\n, so we need to allow for that.
+ # Also, include quotes around the banner string in the \echo and \warn
+ # commands, not because the string needs quoting but so that $banner_match
+ # can't match readline's echoing of these commands.
my $banner = "background_psql: QUERY_SEPARATOR $query_cnt:";
- my $banner_match = qr/(^|\n)$banner\r?\n/;
- $self->{stdin} .= "$query\n;\n\\echo $banner\n\\warn $banner\n";
- pump_until(
- $self->{run}, $self->{timeout},
- \$self->{stdout}, qr/$banner_match/);
- pump_until(
- $self->{run}, $self->{timeout},
- \$self->{stderr}, qr/$banner_match/);
-
- die "psql query timed out" if $self->{timeout}->is_expired;
+ my $banner_match = qr/$banner\r?\n/;
+ $self->{stdin} .= "$query\n;\n\\echo '$banner'\n\\warn '$banner'\n";
+ $self->{run}->pump()
+ until ($self->{stdout} =~ /$banner_match/
+ && $self->{stderr} =~ /$banner_match/)
+ || $self->{timeout}->is_expired;
note "results query $query_cnt:\n",
explain {
@@ -287,9 +282,12 @@ sub query
stderr => $self->{stderr},
} unless !$params{verbose};
- # Remove banner from stdout and stderr, our caller doesn't care. The
- # first newline is optional, as there would not be one if consuming an
- # empty query result.
+ die "psql query timed out" if $self->{timeout}->is_expired;
+
+ # Remove banner from stdout and stderr, our caller doesn't want it.
+ # Also remove the query output's trailing newline, if present (there
+ # would not be one if consuming an empty query result).
+ $banner_match = qr/\r?\n?$banner\r?\n/;
$output = $self->{stdout};
$output =~ s/$banner_match//;
$self->{stderr} =~ s/$banner_match//;
diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl
index 883ba75b313..aa40f58e6d6 100644
--- a/src/test/recovery/t/002_archiving.pl
+++ b/src/test/recovery/t/002_archiving.pl
@@ -115,6 +115,17 @@
recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
));
+# Create recovery.signal and confirm that both signal files exist.
+# This is necessary to test how recovery behaves when both files are present,
+# i.e., standby.signal should take precedence and both files should be
+# removed at the end of recovery.
+$node_standby2->set_recovery_mode();
+my $node_standby2_data = $node_standby2->data_dir;
+ok(-f "$node_standby2_data/recovery.signal",
+ "recovery.signal is present at the beginning of recovery");
+ok(-f "$node_standby2_data/standby.signal",
+ "standby.signal is present at the beginning of recovery");
+
$node_standby2->start;
# Save the log location, to see the failure of recovery_end_command.
@@ -126,7 +137,6 @@
# Check the logs of the standby to see that the commands have failed.
my $log_contents = slurp_file($node_standby2->logfile, $log_location);
-my $node_standby2_data = $node_standby2->data_dir;
like(
$log_contents,
@@ -141,4 +151,10 @@
qr/WARNING:.*recovery_end_command/s,
"recovery_end_command failure detected in logs after promotion");
+# Check that no signal files are present after promotion.
+ok( !-f "$node_standby2_data/recovery.signal",
+ "recovery.signal was left behind after promotion");
+ok( !-f "$node_standby2_data/standby.signal",
+ "standby.signal was left behind after promotion");
+
done_testing();
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index e1ab6dc278a..66439d427a3 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -1737,6 +1737,11 @@ select '[-2147483648:-2147483647]={1,2}'::int[];
(1 row)
-- all of the above should be accepted
+-- some day we might allow these cases, but for now they're errors:
+select array[]::oidvector;
+ERROR: array is not a valid oidvector
+select array[]::int2vector;
+ERROR: array is not a valid int2vector
-- tests for array aggregates
CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
INSERT INTO arraggtest (f1, f2, f3) VALUES
diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out
index 1bbf59cca02..a6fa9cacb72 100644
--- a/src/test/regress/expected/constraints.out
+++ b/src/test/regress/expected/constraints.out
@@ -780,6 +780,10 @@ INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO UPDATE SET c2 = EXCLUDED.c2;
ERROR: ON CONFLICT DO UPDATE not supported with exclusion constraints
+-- fail, because DO SELECT variant requires unique index
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO SELECT RETURNING *;
+ERROR: ON CONFLICT DO SELECT not supported with exclusion constraints
-- succeed because c1 doesn't overlap
INSERT INTO circles VALUES('<(20,20), 1>', '<(0,0), 5>');
-- succeed because c2 doesn't overlap
@@ -846,8 +850,12 @@ CREATE TABLE notnull_tbl1 (a INTEGER NOT NULL NOT NULL);
Not-null constraints:
"notnull_tbl1_a_not_null" NOT NULL "a"
--- no-op
+-- specifying an existing constraint is a no-op
+ALTER TABLE notnull_tbl1 ADD CONSTRAINT notnull_tbl1_a_not_null NOT NULL a;
+-- but using a different constraint name is not allowed
ALTER TABLE notnull_tbl1 ADD CONSTRAINT nn NOT NULL a;
+ERROR: cannot create not-null constraint "nn" on column "a" of table "notnull_tbl1"
+DETAIL: A not-null constraint named "notnull_tbl1_a_not_null" already exists for this column.
\d+ notnull_tbl1
Table "public.notnull_tbl1"
Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
diff --git a/src/test/regress/expected/copyencoding.out b/src/test/regress/expected/copyencoding.out
index cfa2ed6df00..76ea0e7cf04 100644
--- a/src/test/regress/expected/copyencoding.out
+++ b/src/test/regress/expected/copyencoding.out
@@ -17,6 +17,13 @@ CREATE TABLE copy_encoding_tab (t text);
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
-- Read UTF8 data as LATIN1: no error
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1');
+-- Non-server encodings have distinct code paths.
+\set fname :abs_builddir '/results/copyencoding_gb18030.csv'
+COPY (SELECT E'\u3042,') TO :'fname' WITH (FORMAT csv, ENCODING 'GB18030');
+COPY copy_encoding_tab FROM :'fname' WITH (FORMAT csv, ENCODING 'GB18030');
+\set fname :abs_builddir '/results/copyencoding_gb18030.data'
+COPY (SELECT E'\u3042,') TO :'fname' WITH (FORMAT text, ENCODING 'GB18030');
+COPY copy_encoding_tab FROM :'fname' WITH (FORMAT text, ENCODING 'GB18030');
-- Use client_encoding
SET client_encoding TO UTF8;
-- U+3042 HIRAGANA LETTER A
diff --git a/src/test/regress/expected/encoding.out b/src/test/regress/expected/encoding.out
new file mode 100644
index 00000000000..b3655527b0a
--- /dev/null
+++ b/src/test/regress/expected/encoding.out
@@ -0,0 +1,445 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+\set regresslib :libdir '/regress' :dlsuffix
+CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
+INSERT INTO regress_encoding
+VALUES ('café',
+ 'caf' || test_bytea_to_text('\xc3'),
+ 'café' || test_bytea_to_text('\x00') || 'dcba',
+ 'caf' || test_bytea_to_text('\xc300') || 'dcba');
+SELECT good, truncated, with_nul FROM regress_encoding;
+ good | truncated | with_nul
+------+-----------+----------
+ café | caf | café
+(1 row)
+
+SELECT length(good) FROM regress_encoding;
+ length
+--------
+ 4
+(1 row)
+
+SELECT substring(good, 3, 1) FROM regress_encoding;
+ substring
+-----------
+ f
+(1 row)
+
+SELECT substring(good, 4, 1) FROM regress_encoding;
+ substring
+-----------
+ é
+(1 row)
+
+SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace
+----------------
+ é
+(1 row)
+
+SELECT reverse(good) FROM regress_encoding;
+ reverse
+---------
+ éfac
+(1 row)
+
+-- invalid short mb character = error
+SELECT length(truncated) FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+SELECT substring(truncated, 1, 3) FROM regress_encoding;
+ substring
+-----------
+ caf
+(1 row)
+
+SELECT substring(truncated, 1, 4) FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+SELECT reverse(truncated) FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+-- invalid short mb character = silently dropped
+SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace
+----------------
+ caf
+(1 row)
+
+-- PostgreSQL doesn't allow strings to contain NUL. If a corrupted string
+-- contains NUL at a character boundary position, some functions treat it as a
+-- character while others treat it as a terminator, as implementation details.
+-- NUL = terminator
+SELECT length(with_nul) FROM regress_encoding;
+ length
+--------
+ 4
+(1 row)
+
+SELECT substring(with_nul, 3, 1) FROM regress_encoding;
+ substring
+-----------
+ f
+(1 row)
+
+SELECT substring(with_nul, 4, 1) FROM regress_encoding;
+ substring
+-----------
+ é
+(1 row)
+
+SELECT substring(with_nul, 5, 1) FROM regress_encoding;
+ substring
+-----------
+
+(1 row)
+
+SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
+ convert_to
+------------
+ \x
+(1 row)
+
+SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
+ regexp_replace
+----------------
+ é
+(1 row)
+
+-- NUL = character
+SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
+ with_nul | reverse | reverse
+----------+---------+---------
+ café | abcd | café
+(1 row)
+
+-- If a corrupted string contains NUL in the tail bytes of a multibyte
+-- character (invalid in all encodings), it is considered part of the
+-- character for length purposes. An error will only be raised in code paths
+-- that convert or verify encodings.
+SELECT length(truncated_with_nul) FROM regress_encoding;
+ length
+--------
+ 8
+(1 row)
+
+SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
+ substring
+-----------
+ f
+(1 row)
+
+SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
+ substring
+-----------
+
+(1 row)
+
+SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3 0x00
+SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
+ substring
+-----------
+ d
+(1 row)
+
+SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT reverse(truncated_with_nul) FROM regress_encoding;
+ reverse
+---------
+ abcd
+(1 row)
+
+-- unbounded: sequence would overrun the string!
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ test_mblen_func
+-----------------
+ 2
+(1 row)
+
+-- condition detected when using the length/range variants
+SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
+FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+-- unbounded: sequence would overrun the string, if the terminator were really
+-- the end of it
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+ test_mblen_func
+-----------------
+ 2
+(1 row)
+
+SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
+FROM regress_encoding;
+ test_mblen_func
+-----------------
+ 2
+(1 row)
+
+-- condition detected when using the cstr variants
+SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+ERROR: invalid byte sequence for encoding "UTF8": 0xc3
+DROP TABLE regress_encoding;
+-- mb<->wchar conversions
+CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
+RETURNS VOID LANGUAGE plpgsql AS
+$$
+DECLARE
+ prefix text;
+ len int;
+ wchars int[];
+ round_trip bytea;
+ result text;
+BEGIN
+ prefix := rpad(encoding || ' ' || description || ':', 28);
+
+ -- XXX could also test validation, length functions and include client
+ -- only encodings with these test cases
+
+ IF test_valid_server_encoding(encoding) THEN
+ wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
+ round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
+ if input = round_trip then
+ result := 'OK';
+ elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
+ result := 'truncated';
+ else
+ result := 'failed';
+ end if;
+ RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
+ END IF;
+END;
+$$;
+-- No validation is done on the encoding itself, just the length to avoid
+-- overruns, so some of the byte sequences below are bogus. They cover
+-- all code branches, server encodings only for now.
+CREATE TABLE encoding_tests (encoding text, description text, input bytea);
+INSERT INTO encoding_tests VALUES
+ -- LATIN1, other single-byte encodings
+ ('LATIN1', 'ASCII', 'a'),
+ ('LATIN1', 'extended', '\xe9'),
+ -- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
+ -- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+ -- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+ -- 2 80..ff (CS1)
+ ('EUC_JP', 'ASCII', 'a'),
+ ('EUC_JP', 'CS1, short', '\x80'),
+ ('EUC_JP', 'CS1', '\x8002'),
+ ('EUC_JP', 'CS2, short', '\x8e'),
+ ('EUC_JP', 'CS2', '\x8e02'),
+ ('EUC_JP', 'CS3, short', '\x8f'),
+ ('EUC_JP', 'CS3, short', '\x8f02'),
+ ('EUC_JP', 'CS3', '\x8f0203'),
+ -- EUC_CN
+ -- 3 8e (CS2, not used but arbitrarily considered to have length 3)
+ -- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+ -- 2 80..ff (CS1)
+ ('EUC_CN', 'ASCII', 'a'),
+ ('EUC_CN', 'CS1, short', '\x80'),
+ ('EUC_CN', 'CS1', '\x8002'),
+ ('EUC_CN', 'CS2, short', '\x8e'),
+ ('EUC_CN', 'CS2, short', '\x8e02'),
+ ('EUC_CN', 'CS2', '\x8e0203'),
+ ('EUC_CN', 'CS3, short', '\x8f'),
+ ('EUC_CN', 'CS3, short', '\x8f02'),
+ ('EUC_CN', 'CS3', '\x8f0203'),
+ -- EUC_TW:
+ -- 4 8e (CS2)
+ -- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+ -- 2 80..ff (CS1)
+ ('EUC_TW', 'ASCII', 'a'),
+ ('EUC_TW', 'CS1, short', '\x80'),
+ ('EUC_TW', 'CS1', '\x8002'),
+ ('EUC_TW', 'CS2, short', '\x8e'),
+ ('EUC_TW', 'CS2, short', '\x8e02'),
+ ('EUC_TW', 'CS2, short', '\x8e0203'),
+ ('EUC_TW', 'CS2', '\x8e020304'),
+ ('EUC_TW', 'CS3, short', '\x8f'),
+ ('EUC_TW', 'CS3, short', '\x8f02'),
+ ('EUC_TW', 'CS3', '\x8f0203'),
+ -- UTF8
+ -- 2 c0..df
+ -- 3 e0..ef
+ -- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
+ -- 5 f8..fb (not supported)
+ -- 6 fc..fd (not supported)
+ ('UTF8', 'ASCII', 'a'),
+ ('UTF8', '2 byte, short', '\xdf'),
+ ('UTF8', '2 byte', '\xdf82'),
+ ('UTF8', '3 byte, short', '\xef'),
+ ('UTF8', '3 byte, short', '\xef82'),
+ ('UTF8', '3 byte', '\xef8283'),
+ ('UTF8', '4 byte, short', '\xf7'),
+ ('UTF8', '4 byte, short', '\xf782'),
+ ('UTF8', '4 byte, short', '\xf78283'),
+ ('UTF8', '4 byte', '\xf7828384'),
+ ('UTF8', '5 byte, unsupported', '\xfb'),
+ ('UTF8', '5 byte, unsupported', '\xfb82'),
+ ('UTF8', '5 byte, unsupported', '\xfb8283'),
+ ('UTF8', '5 byte, unsupported', '\xfb828384'),
+ ('UTF8', '5 byte, unsupported', '\xfb82838485'),
+ ('UTF8', '6 byte, unsupported', '\xfd'),
+ ('UTF8', '6 byte, unsupported', '\xfd82'),
+ ('UTF8', '6 byte, unsupported', '\xfd8283'),
+ ('UTF8', '6 byte, unsupported', '\xfd828384'),
+ ('UTF8', '6 byte, unsupported', '\xfd82838485'),
+ ('UTF8', '6 byte, unsupported', '\xfd8283848586'),
+ -- MULE_INTERNAL
+ -- 2 81..8d LC1
+ -- 3 90..99 LC2
+ ('MULE_INTERNAL', 'ASCII', 'a'),
+ ('MULE_INTERNAL', 'LC1, short', '\x81'),
+ ('MULE_INTERNAL', 'LC1', '\x8182'),
+ ('MULE_INTERNAL', 'LC2, short', '\x90'),
+ ('MULE_INTERNAL', 'LC2, short', '\x9082'),
+ ('MULE_INTERNAL', 'LC2', '\x908283');
+SELECT COUNT(test_encoding(encoding, description, input)) > 0
+FROM encoding_tests;
+NOTICE: LATIN1 ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: LATIN1 extended: \xe9 -> {233} -> \xe9 = OK
+NOTICE: EUC_JP ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: EUC_JP CS1, short: \x80 -> {} -> \x = truncated
+NOTICE: EUC_JP CS1: \x8002 -> {32770} -> \x8002 = OK
+NOTICE: EUC_JP CS2, short: \x8e -> {} -> \x = truncated
+NOTICE: EUC_JP CS2: \x8e02 -> {36354} -> \x8e02 = OK
+NOTICE: EUC_JP CS3, short: \x8f -> {} -> \x = truncated
+NOTICE: EUC_JP CS3, short: \x8f02 -> {} -> \x = truncated
+NOTICE: EUC_JP CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE: EUC_CN ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: EUC_CN CS1, short: \x80 -> {} -> \x = truncated
+NOTICE: EUC_CN CS1: \x8002 -> {32770} -> \x8002 = OK
+NOTICE: EUC_CN CS2, short: \x8e -> {} -> \x = truncated
+NOTICE: EUC_CN CS2, short: \x8e02 -> {} -> \x = truncated
+NOTICE: EUC_CN CS2: \x8e0203 -> {9306627} -> \x8e0203 = OK
+NOTICE: EUC_CN CS3, short: \x8f -> {} -> \x = truncated
+NOTICE: EUC_CN CS3, short: \x8f02 -> {} -> \x = truncated
+NOTICE: EUC_CN CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE: EUC_TW ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: EUC_TW CS1, short: \x80 -> {} -> \x = truncated
+NOTICE: EUC_TW CS1: \x8002 -> {32770} -> \x8002 = OK
+NOTICE: EUC_TW CS2, short: \x8e -> {} -> \x = truncated
+NOTICE: EUC_TW CS2, short: \x8e02 -> {} -> \x = truncated
+NOTICE: EUC_TW CS2, short: \x8e0203 -> {} -> \x = truncated
+NOTICE: EUC_TW CS2: \x8e020304 -> {-1912470780} -> \x8e020304 = OK
+NOTICE: EUC_TW CS3, short: \x8f -> {} -> \x = truncated
+NOTICE: EUC_TW CS3, short: \x8f02 -> {} -> \x = truncated
+NOTICE: EUC_TW CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK
+NOTICE: UTF8 ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: UTF8 2 byte, short: \xdf -> {} -> \x = truncated
+NOTICE: UTF8 2 byte: \xdf82 -> {1986} -> \xdf82 = OK
+NOTICE: UTF8 3 byte, short: \xef -> {} -> \x = truncated
+NOTICE: UTF8 3 byte, short: \xef82 -> {} -> \x = truncated
+NOTICE: UTF8 3 byte: \xef8283 -> {61571} -> \xef8283 = OK
+NOTICE: UTF8 4 byte, short: \xf7 -> {} -> \x = truncated
+NOTICE: UTF8 4 byte, short: \xf782 -> {} -> \x = truncated
+NOTICE: UTF8 4 byte, short: \xf78283 -> {} -> \x = truncated
+NOTICE: UTF8 4 byte: \xf7828384 -> {1843396} -> \xf7828384 = OK
+NOTICE: UTF8 5 byte, unsupported: \xfb -> {251} -> \xc3bb = failed
+NOTICE: UTF8 5 byte, unsupported: \xfb82 -> {251,130} -> \xc3bbc282 = failed
+NOTICE: UTF8 5 byte, unsupported: \xfb8283 -> {251,130,131} -> \xc3bbc282c283 = failed
+NOTICE: UTF8 5 byte, unsupported: \xfb828384 -> {251,130,131,132} -> \xc3bbc282c283c284 = failed
+NOTICE: UTF8 5 byte, unsupported: \xfb82838485 -> {251,130,131,132,133} -> \xc3bbc282c283c284c285 = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd -> {253} -> \xc3bd = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd82 -> {253,130} -> \xc3bdc282 = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd8283 -> {253,130,131} -> \xc3bdc282c283 = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed
+NOTICE: UTF8 6 byte, unsupported: \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed
+NOTICE: MULE_INTERNAL ASCII: \x61 -> {97} -> \x61 = OK
+NOTICE: MULE_INTERNAL LC1, short: \x81 -> {} -> \x = truncated
+NOTICE: MULE_INTERNAL LC1: \x8182 -> {8454274} -> \x8182 = OK
+NOTICE: MULE_INTERNAL LC2, short: \x90 -> {} -> \x = truncated
+NOTICE: MULE_INTERNAL LC2, short: \x9082 -> {} -> \x = truncated
+NOTICE: MULE_INTERNAL LC2: \x908283 -> {9470595} -> \x908283 = OK
+ ?column?
+----------
+ t
+(1 row)
+
+-- substring fetches a slice of a toasted value; unused tail of that slice is
+-- an incomplete char (bug #19406)
+CREATE TABLE toast_3b_utf8 (c text);
+INSERT INTO toast_3b_utf8 VALUES (repeat(U&'\2026', 4000));
+SELECT SUBSTRING(c FROM 1 FOR 1) FROM toast_3b_utf8;
+ substring
+-----------
+ …
+(1 row)
+
+SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
+ substring
+-----------
+
+(1 row)
+
+-- diagnose incomplete char iff within the substring
+UPDATE toast_3b_utf8 SET c = c || test_bytea_to_text('\xe280');
+SELECT SUBSTRING(c FROM 4000 FOR 1) FROM toast_3b_utf8;
+ substring
+-----------
+ …
+(1 row)
+
+SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
+ERROR: invalid byte sequence for encoding "UTF8": 0xe2 0x80
+-- substring needing last byte of its slice_size
+ALTER TABLE toast_3b_utf8 RENAME TO toast_4b_utf8;
+UPDATE toast_4b_utf8 SET c = repeat(U&'\+01F680', 3000);
+SELECT SUBSTRING(c FROM 3000 FOR 1) FROM toast_4b_utf8;
+ substring
+-----------
+ 🚀
+(1 row)
+
+DROP TABLE encoding_tests;
+DROP TABLE toast_4b_utf8;
+DROP FUNCTION test_encoding;
+DROP FUNCTION test_wchars_to_text;
+DROP FUNCTION test_text_to_wchars;
+DROP FUNCTION test_valid_server_encoding;
+DROP FUNCTION test_mblen_func;
+DROP FUNCTION test_bytea_to_text;
+DROP FUNCTION test_text_to_bytea;
+-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
+SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
+ substring
+-----------
+
+(1 row)
+
+-- Levenshtein distance metric: exercise character length cache.
+SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+ERROR: column "real§_name" does not exist
+LINE 1: SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+ ^
+HINT: Perhaps you meant to reference the column "x.real_name".
+-- JSON errcontext: truncate long data.
+SELECT repeat(U&'\00A7', 30)::json;
+ERROR: invalid input syntax for type json
+DETAIL: Token "§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§" is invalid.
+CONTEXT: JSON data, line 1: ...§§§§§§§§§§§§§§§§§§§§§§§§
diff --git a/src/test/regress/expected/encoding_1.out b/src/test/regress/expected/encoding_1.out
new file mode 100644
index 00000000000..a5b02090901
--- /dev/null
+++ b/src/test/regress/expected/encoding_1.out
@@ -0,0 +1,4 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/expected/euc_kr.out b/src/test/regress/expected/euc_kr.out
new file mode 100644
index 00000000000..7a61c89a43a
--- /dev/null
+++ b/src/test/regress/expected/euc_kr.out
@@ -0,0 +1,16 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
+SELECT POSITION(
+ convert_from('\xbcf6c7d0', 'EUC_KR') IN
+ convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
+ position
+----------
+ 5
+(1 row)
+
diff --git a/src/test/regress/expected/euc_kr_1.out b/src/test/regress/expected/euc_kr_1.out
new file mode 100644
index 00000000000..faaac5d6355
--- /dev/null
+++ b/src/test/regress/expected/euc_kr_1.out
@@ -0,0 +1,6 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index d6fb879f500..3fa2562f231 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -711,6 +711,63 @@ select current_schemas(false);
reset search_path;
--
+-- Test parsing of log_min_messages
+--
+SET log_min_messages TO foo; -- fail
+ERROR: invalid value for parameter "log_min_messages": "foo"
+DETAIL: Unrecognized log level: "foo".
+SET log_min_messages TO fatal;
+SHOW log_min_messages;
+ log_min_messages
+------------------
+ fatal
+(1 row)
+
+SET log_min_messages TO 'fatal';
+SHOW log_min_messages;
+ log_min_messages
+------------------
+ fatal
+(1 row)
+
+SET log_min_messages TO 'checkpointer:debug2, autovacuum:debug1'; -- fail
+ERROR: invalid value for parameter "log_min_messages": "checkpointer:debug2, autovacuum:debug1"
+DETAIL: Default log level was not defined.
+SET log_min_messages TO 'debug1, backend:error, fatal'; -- fail
+ERROR: invalid value for parameter "log_min_messages": "debug1, backend:error, fatal"
+DETAIL: Redundant specification of default log level.
+SET log_min_messages TO 'backend:error, debug1, backend:warning'; -- fail
+ERROR: invalid value for parameter "log_min_messages": "backend:error, debug1, backend:warning"
+DETAIL: Redundant log level specification for process type "backend".
+SET log_min_messages TO 'backend:error, foo:fatal, archiver:debug1'; -- fail
+ERROR: invalid value for parameter "log_min_messages": "backend:error, foo:fatal, archiver:debug1"
+DETAIL: Unrecognized process type "foo".
+SET log_min_messages TO 'backend:error, checkpointer:bar, archiver:debug1'; -- fail
+ERROR: invalid value for parameter "log_min_messages": "backend:error, checkpointer:bar, archiver:debug1"
+DETAIL: Unrecognized log level for process type "checkpointer": "bar".
+SET log_min_messages TO 'backend:error, checkpointer:debug3, fatal, archiver:debug2, autovacuum:debug1, walsender:debug3';
+SHOW log_min_messages;
+ log_min_messages
+-------------------------------------------------------------------------------------------------
+ fatal, archiver:debug2, autovacuum:debug1, backend:error, checkpointer:debug3, walsender:debug3
+(1 row)
+
+SET log_min_messages TO 'warning, autovacuum:debug1';
+SHOW log_min_messages;
+ log_min_messages
+----------------------------
+ warning, autovacuum:debug1
+(1 row)
+
+SET log_min_messages TO 'autovacuum:debug1, warning';
+SHOW log_min_messages;
+ log_min_messages
+----------------------------
+ warning, autovacuum:debug1
+(1 row)
+
+RESET log_min_messages;
+--
-- Tests for function-local GUC settings
--
set work_mem = '3MB';
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index fdec5b9ba52..29090dca1ba 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1450,21 +1450,23 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
set enable_incremental_sort = on;
explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
- QUERY PLAN
-----------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------
Limit
-> Incremental Sort
Sort Key: a, b, (sum(c))
Presorted Key: a, b
- -> GroupAggregate
+ -> Finalize GroupAggregate
Group Key: a, b
-> Gather Merge
Workers Planned: 2
- -> Incremental Sort
- Sort Key: a, b
- Presorted Key: a
- -> Parallel Index Scan using t_a_idx on t
-(12 rows)
+ -> Partial GroupAggregate
+ Group Key: a, b
+ -> Incremental Sort
+ Sort Key: a, b
+ Presorted Key: a
+ -> Parallel Index Scan using t_a_idx on t
+(14 rows)
-- Incremental sort vs. set operations with varno 0
set enable_hashagg to off;
diff --git a/src/test/regress/expected/insert_conflict.out b/src/test/regress/expected/insert_conflict.out
index b0e12962088..34e2e7ee355 100644
--- a/src/test/regress/expected/insert_conflict.out
+++ b/src/test/regress/expected/insert_conflict.out
@@ -249,6 +249,25 @@ explain (costs off, format json) insert into insertconflicttest values (0, 'Bilb
]
(1 row)
+-- Should display lock strength, if specified
+explain (costs off) insert into insertconflicttest values (1, 'Apple') on conflict (key) do select returning *;
+ QUERY PLAN
+---------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: SELECT
+ Conflict Arbiter Indexes: key_index
+ -> Result
+(4 rows)
+
+explain (costs off) insert into insertconflicttest values (1, 'Apple') on conflict (key) do select for key share returning *;
+ QUERY PLAN
+---------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: SELECT FOR KEY SHARE
+ Conflict Arbiter Indexes: key_index
+ -> Result
+(4 rows)
+
-- Fails (no unique index inference specification, required for do update variant):
insert into insertconflicttest values (1, 'Apple') on conflict do update set fruit = excluded.fruit;
ERROR: ON CONFLICT DO UPDATE requires inference specification or constraint name
@@ -304,6 +323,48 @@ ERROR: column "insertconflicttest" of relation "insertconflicttest" does not ex
LINE 1: ...3, 'Kiwi') on conflict (key, fruit) do update set insertconf...
^
HINT: SET target columns cannot be qualified with the relation name.
+--
+-- DO SELECT tests
+--
+delete from insertconflicttest where fruit = 'Apple';
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do select; -- fails
+ERROR: ON CONFLICT DO SELECT requires a RETURNING clause
+LINE 1: ...nsert into insertconflicttest values (1, 'Apple') on conflic...
+ ^
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select returning old, new, i;
+ old | new | i
+-----+-----------+-----------
+ | (1,Apple) | (1,Apple)
+(1 row)
+
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select returning old, new, i;
+ old | new | i
+-----------+-----------+-----------
+ (1,Apple) | (1,Apple) | (1,Apple)
+(1 row)
+
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select where i.fruit = 'Apple' returning *;
+ key | fruit
+-----+-------
+ 1 | Apple
+(1 row)
+
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select where i.fruit = 'Orange' returning *;
+ key | fruit
+-----+-------
+(0 rows)
+
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select where excluded.fruit = 'Apple' returning *;
+ key | fruit
+-----+-------
+(0 rows)
+
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select where excluded.fruit = 'Orange' returning *;
+ key | fruit
+-----+-------
+ 1 | Apple
+(1 row)
+
drop index key_index;
--
-- Composite key tests
@@ -748,13 +809,58 @@ insert into selfconflict values (6,1), (6,2) on conflict(f1) do update set f2 =
ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time
HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
commit;
+begin transaction isolation level read committed;
+insert into selfconflict values (7,1), (7,2) on conflict(f1) do select returning *;
+ f1 | f2
+----+----
+ 7 | 1
+ 7 | 1
+(2 rows)
+
+commit;
+begin transaction isolation level repeatable read;
+insert into selfconflict values (8,1), (8,2) on conflict(f1) do select returning *;
+ f1 | f2
+----+----
+ 8 | 1
+ 8 | 1
+(2 rows)
+
+commit;
+begin transaction isolation level serializable;
+insert into selfconflict values (9,1), (9,2) on conflict(f1) do select returning *;
+ f1 | f2
+----+----
+ 9 | 1
+ 9 | 1
+(2 rows)
+
+commit;
+begin transaction isolation level read committed;
+insert into selfconflict values (10,1), (10,2) on conflict(f1) do select for update returning *;
+ERROR: ON CONFLICT DO SELECT command cannot affect row a second time
+HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
+commit;
+begin transaction isolation level repeatable read;
+insert into selfconflict values (11,1), (11,2) on conflict(f1) do select for update returning *;
+ERROR: ON CONFLICT DO SELECT command cannot affect row a second time
+HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
+commit;
+begin transaction isolation level serializable;
+insert into selfconflict values (12,1), (12,2) on conflict(f1) do select for update returning *;
+ERROR: ON CONFLICT DO SELECT command cannot affect row a second time
+HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
+commit;
select * from selfconflict;
f1 | f2
----+----
1 | 1
2 | 1
3 | 1
-(3 rows)
+ 7 | 1
+ 8 | 1
+ 9 | 1
+(6 rows)
drop table selfconflict;
-- check ON CONFLICT handling with partitioned tables
@@ -765,11 +871,31 @@ insert into parted_conflict_test values (1, 'a') on conflict do nothing;
-- index on a required, which does exist in parent
insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing;
insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (1, 'a') on conflict (a) do select returning *;
+ a | b
+---+---
+ 1 | a
+(1 row)
+
+insert into parted_conflict_test values (1, 'a') on conflict (a) do select for update returning *;
+ a | b
+---+---
+ 1 | a
+(1 row)
+
-- targeting partition directly will work
insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing;
insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do select returning b;
+ b
+---
+ b
+(1 row)
+
-- index on b required, which doesn't exist in parent
-insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a;
+insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; -- fail
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into parted_conflict_test values (2, 'b') on conflict (b) do select returning b; -- fail
ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
-- targeting partition directly will work
insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a;
@@ -780,13 +906,31 @@ select * from parted_conflict_test order by a;
2 | b
(1 row)
--- now check that DO UPDATE works correctly for target partition with
--- different attribute numbers
+-- now check that DO UPDATE and DO SELECT work correctly for target partition
+-- with different attribute numbers
create table parted_conflict_test_2 (b char, a int unique);
alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3);
truncate parted_conflict_test;
insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select returning b;
+ b
+---
+ b
+(1 row)
+
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select where excluded.b = 'a' returning parted_conflict_test;
+ parted_conflict_test
+----------------------
+ (3,b)
+(1 row)
+
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select where parted_conflict_test.b = 'b' returning b;
+ b
+---
+ b
+(1 row)
+
-- should see (3, 'b')
select * from parted_conflict_test order by a;
a | b
@@ -800,6 +944,12 @@ create table parted_conflict_test_3 partition of parted_conflict_test for values
truncate parted_conflict_test;
insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do select returning b;
+ b
+---
+ b
+(1 row)
+
-- should see (4, 'b')
select * from parted_conflict_test order by a;
a | b
@@ -813,6 +963,11 @@ create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for va
truncate parted_conflict_test;
insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do select where parted_conflict_test.b = 'a' returning b;
+ b
+---
+(0 rows)
+
-- should see (5, 'b')
select * from parted_conflict_test order by a;
a | b
@@ -833,6 +988,59 @@ select * from parted_conflict_test order by a;
4 | b
(3 rows)
+-- test DO SELECT with multiple rows hitting different partitions
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (1, 'a'), (2, 'b'), (4, 'c');
+insert into parted_conflict_test (a, b) values (1, 'x'), (2, 'y'), (4, 'z')
+ on conflict (a) do select returning *, tableoid::regclass;
+ a | b | tableoid
+---+---+------------------------
+ 1 | a | parted_conflict_test_1
+ 2 | b | parted_conflict_test_1
+ 4 | c | parted_conflict_test_3
+(3 rows)
+
+-- should see original values (1, 'a'), (2, 'b'), (4, 'c')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 1 | a
+ 2 | b
+ 4 | c
+(3 rows)
+
+-- test DO SELECT with WHERE filtering across partitions
+insert into parted_conflict_test (a, b) values (1, 'n') on conflict (a) do select where parted_conflict_test.b = 'a' returning *;
+ a | b
+---+---
+ 1 | a
+(1 row)
+
+insert into parted_conflict_test (a, b) values (2, 'n') on conflict (a) do select where parted_conflict_test.b = 'x' returning *;
+ a | b
+---+---
+(0 rows)
+
+-- test DO SELECT with EXCLUDED in WHERE across partitions with different layouts
+insert into parted_conflict_test (a, b) values (3, 't') on conflict (a) do select where excluded.b = 't' returning *;
+ a | b
+---+---
+ 3 | t
+(1 row)
+
+-- test DO SELECT FOR UPDATE across different partition layouts
+insert into parted_conflict_test (a, b) values (1, 'l') on conflict (a) do select for update returning *;
+ a | b
+---+---
+ 1 | a
+(1 row)
+
+insert into parted_conflict_test (a, b) values (3, 'l') on conflict (a) do select for update returning *;
+ a | b
+---+---
+ 3 | t
+(1 row)
+
drop table parted_conflict_test;
-- test behavior of inserting a conflicting tuple into an intermediate
-- partitioning level
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index d05a0ca0373..63d3c5d3ac8 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3273,6 +3273,68 @@ where (hundred, thousand) in (select twothousand, twothousand from onek);
reset enable_memoize;
--
+-- more antijoin recognition tests using NOT NULL constraints
+--
+begin;
+create temp table tbl_anti(a int not null, b int, c int);
+-- this is an antijoin, as t2.a is non-null for any matching row
+explain (costs off)
+select * from tenk1 t1 left join tbl_anti t2 on t1.unique1 = t2.b
+where t2.a is null;
+ QUERY PLAN
+----------------------------------
+ Hash Right Anti Join
+ Hash Cond: (t2.b = t1.unique1)
+ -> Seq Scan on tbl_anti t2
+ -> Hash
+ -> Seq Scan on tenk1 t1
+(5 rows)
+
+-- this is an antijoin, as t2.a is non-null for any matching row
+explain (costs off)
+select * from tenk1 t1 left join
+ (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b
+where t2.a is null;
+ QUERY PLAN
+-------------------------------------------
+ Hash Right Anti Join
+ Hash Cond: (t2.b = t1.unique1)
+ -> Merge Left Join
+ Merge Cond: (t2.c = t3.c)
+ -> Sort
+ Sort Key: t2.c
+ -> Seq Scan on tbl_anti t2
+ -> Sort
+ Sort Key: t3.c
+ -> Seq Scan on tbl_anti t3
+ -> Hash
+ -> Seq Scan on tenk1 t1
+(12 rows)
+
+-- this is not an antijoin, as t3.a can be nulled by t2/t3 join
+explain (costs off)
+select * from tenk1 t1 left join
+ (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b
+where t3.a is null;
+ QUERY PLAN
+-------------------------------------------
+ Hash Right Join
+ Hash Cond: (t2.b = t1.unique1)
+ Filter: (t3.a IS NULL)
+ -> Merge Left Join
+ Merge Cond: (t2.c = t3.c)
+ -> Sort
+ Sort Key: t2.c
+ -> Seq Scan on tbl_anti t2
+ -> Sort
+ Sort Key: t3.c
+ -> Seq Scan on tbl_anti t3
+ -> Hash
+ -> Seq Scan on tenk1 t1
+(13 rows)
+
+rollback;
+--
-- regression test for bogus RTE_GROUP entries
--
explain (costs off)
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out
index 4749f6ed70d..bc7cc76467f 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -76,8 +76,8 @@ insert into extremely_skewed
update pg_class
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
where relname = 'extremely_skewed';
--- Make a relation with a couple of enormous tuples.
-create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
+-- Make a relation with several enormous tuples.
+create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
alter table wide set (parallel_workers = 2);
-- The "optimal" case: the hash table fits in memory; we plan for 1
-- batch, we stick to that number, and peak memory usage stays within
@@ -922,7 +922,7 @@ set work_mem = '128kB';
set hash_mem_multiplier = 1.0;
explain (costs off)
select length(max(s.t))
- from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
QUERY PLAN
----------------------------------------------------------------
Finalize Aggregate
@@ -934,10 +934,11 @@ explain (costs off)
-> Parallel Seq Scan on wide
-> Parallel Hash
-> Parallel Seq Scan on wide wide_1
-(9 rows)
+ Filter: (id < 3)
+(10 rows)
select length(max(s.t))
-from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
length
--------
320000
@@ -947,7 +948,7 @@ select final > 1 as multibatch
from hash_join_batches(
$$
select length(max(s.t))
- from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
$$);
multibatch
------------
diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out
index 8ff1172008e..feae77cb840 100644
--- a/src/test/regress/expected/predicate.out
+++ b/src/test/regress/expected/predicate.out
@@ -632,3 +632,325 @@ SELECT * FROM pred_tab WHERE (a::oid) IS NULL;
(3 rows)
DROP TABLE pred_tab;
+--
+-- Test optimization of IS [NOT] DISTINCT FROM
+--
+CREATE TYPE dist_row_t AS (a int, b int);
+CREATE TABLE dist_tab (id int, val_nn int NOT NULL, val_null int, row_nn dist_row_t NOT NULL);
+INSERT INTO dist_tab VALUES (1, 10, 10, ROW(1, 1));
+INSERT INTO dist_tab VALUES (2, 20, NULL, ROW(2, 2));
+INSERT INTO dist_tab VALUES (3, 30, 30, ROW(1, NULL));
+CREATE INDEX dist_tab_nn_idx ON dist_tab (val_nn);
+ANALYZE dist_tab;
+-- Ensure that the predicate folds to constant TRUE
+EXPLAIN(COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT;
+ QUERY PLAN
+----------------------
+ Seq Scan on dist_tab
+(1 row)
+
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT;
+ id
+----
+ 1
+ 2
+ 3
+(3 rows)
+
+-- Ensure that the predicate folds to constant FALSE
+EXPLAIN(COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT;
+ QUERY PLAN
+------------------------------
+ Result
+ Replaces: Scan on dist_tab
+ One-Time Filter: false
+(3 rows)
+
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT;
+ id
+----
+(0 rows)
+
+-- Ensure that the predicate is converted to an inequality operator
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10;
+ QUERY PLAN
+--------------------------
+ Seq Scan on dist_tab
+ Filter: (val_nn <> 10)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10;
+ id
+----
+ 2
+ 3
+(2 rows)
+
+-- Ensure that the predicate is converted to an equality operator, and thus can
+-- use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10;
+ QUERY PLAN
+----------------------------------------------
+ Index Scan using dist_tab_nn_idx on dist_tab
+ Index Cond: (val_nn = 10)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10;
+ id
+----
+ 1
+(1 row)
+
+RESET enable_seqscan;
+-- Ensure that the predicate is preserved as "IS DISTINCT FROM"
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on dist_tab
+ Filter: (val_null IS DISTINCT FROM 20)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20;
+ id
+----
+ 1
+ 2
+ 3
+(3 rows)
+
+-- Safety check for rowtypes
+-- Ensure that the predicate is converted to an inequality operator
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t;
+ QUERY PLAN
+-------------------------------------------
+ Seq Scan on dist_tab
+ Filter: (row_nn <> '(1,5)'::dist_row_t)
+(2 rows)
+
+-- ... and that all 3 rows are returned
+SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t;
+ id
+----
+ 1
+ 2
+ 3
+(3 rows)
+
+-- Ensure that the predicate is converted to an equality operator, and thus
+-- mergejoinable or hashjoinable
+SET enable_nestloop TO off;
+EXPLAIN (COSTS OFF)
+SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn;
+ QUERY PLAN
+--------------------------------------
+ Hash Join
+ Hash Cond: (t1.val_nn = t2.val_nn)
+ -> Seq Scan on dist_tab t1
+ -> Hash
+ -> Seq Scan on dist_tab t2
+(5 rows)
+
+SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn;
+ id | val_nn | val_null | row_nn | id | val_nn | val_null | row_nn
+----+--------+----------+--------+----+--------+----------+--------
+ 1 | 10 | 10 | (1,1) | 1 | 10 | 10 | (1,1)
+ 2 | 20 | | (2,2) | 2 | 20 | | (2,2)
+ 3 | 30 | 30 | (1,) | 3 | 30 | 30 | (1,)
+(3 rows)
+
+RESET enable_nestloop;
+-- Ensure that the predicate is converted to IS NOT NULL
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT;
+ QUERY PLAN
+----------------------------------
+ Seq Scan on dist_tab
+ Filter: (val_null IS NOT NULL)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT;
+ id
+----
+ 1
+ 3
+(2 rows)
+
+-- Ensure that the predicate is converted to IS NULL
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT;
+ QUERY PLAN
+------------------------------
+ Seq Scan on dist_tab
+ Filter: (val_null IS NULL)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT;
+ id
+----
+ 2
+(1 row)
+
+-- Safety check for rowtypes
+-- The predicate is converted to IS NOT NULL, and get_rule_expr prints it as IS
+-- DISTINCT FROM because argisrow is false, indicating that we're applying a
+-- scalar test
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD;
+ QUERY PLAN
+-----------------------------------------------------------
+ Seq Scan on dist_tab
+ Filter: (ROW(val_null, val_null) IS DISTINCT FROM NULL)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD;
+ id
+----
+ 1
+ 2
+ 3
+(3 rows)
+
+-- The predicate is converted to IS NULL, and get_rule_expr prints it as IS NOT
+-- DISTINCT FROM because argisrow is false, indicating that we're applying a
+-- scalar test
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD;
+ QUERY PLAN
+---------------------------------------------------------------
+ Seq Scan on dist_tab
+ Filter: (ROW(val_null, val_null) IS NOT DISTINCT FROM NULL)
+(2 rows)
+
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD;
+ id
+----
+(0 rows)
+
+DROP TABLE dist_tab;
+DROP TYPE dist_row_t;
+--
+-- Test optimization of BooleanTest (IS [NOT] TRUE/FALSE/UNKNOWN) on
+-- non-nullable input
+--
+CREATE TABLE bool_tab (id int, flag_nn boolean NOT NULL, flag_null boolean);
+INSERT INTO bool_tab VALUES (1, true, true);
+INSERT INTO bool_tab VALUES (2, false, NULL);
+CREATE INDEX bool_tab_nn_idx ON bool_tab (flag_nn);
+ANALYZE bool_tab;
+-- Ensure that the predicate folds to constant FALSE
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS UNKNOWN;
+ QUERY PLAN
+------------------------------
+ Result
+ Replaces: Scan on bool_tab
+ One-Time Filter: false
+(3 rows)
+
+SELECT id FROM bool_tab WHERE flag_nn IS UNKNOWN;
+ id
+----
+(0 rows)
+
+-- Ensure that the predicate folds to constant TRUE
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT UNKNOWN;
+ QUERY PLAN
+----------------------
+ Seq Scan on bool_tab
+(1 row)
+
+SELECT id FROM bool_tab WHERE flag_nn IS NOT UNKNOWN;
+ id
+----
+ 1
+ 2
+(2 rows)
+
+-- Ensure that the predicate folds to flag_nn
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS TRUE;
+ QUERY PLAN
+----------------------
+ Seq Scan on bool_tab
+ Filter: flag_nn
+(2 rows)
+
+SELECT id FROM bool_tab WHERE flag_nn IS TRUE;
+ id
+----
+ 1
+(1 row)
+
+-- Ensure that the predicate folds to flag_nn, and thus can use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT FALSE;
+ QUERY PLAN
+----------------------------------------------
+ Index Scan using bool_tab_nn_idx on bool_tab
+ Index Cond: (flag_nn = true)
+(2 rows)
+
+SELECT id FROM bool_tab WHERE flag_nn IS NOT FALSE;
+ id
+----
+ 1
+(1 row)
+
+RESET enable_seqscan;
+-- Ensure that the predicate folds to not flag_nn
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS FALSE;
+ QUERY PLAN
+-------------------------
+ Seq Scan on bool_tab
+ Filter: (NOT flag_nn)
+(2 rows)
+
+SELECT id FROM bool_tab WHERE flag_nn IS FALSE;
+ id
+----
+ 2
+(1 row)
+
+-- Ensure that the predicate folds to not flag_nn, and thus can use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT TRUE;
+ QUERY PLAN
+----------------------------------------------
+ Index Scan using bool_tab_nn_idx on bool_tab
+ Index Cond: (flag_nn = false)
+(2 rows)
+
+SELECT id FROM bool_tab WHERE flag_nn IS NOT TRUE;
+ id
+----
+ 2
+(1 row)
+
+RESET enable_seqscan;
+-- Ensure that the predicate is preserved as a BooleanTest
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_null IS UNKNOWN;
+ QUERY PLAN
+----------------------------------
+ Seq Scan on bool_tab
+ Filter: (flag_null IS UNKNOWN)
+(2 rows)
+
+SELECT id FROM bool_tab WHERE flag_null IS UNKNOWN;
+ id
+----
+ 2
+(1 row)
+
+DROP TABLE bool_tab;
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
index daafaa94fde..84c1c1ca38d 100644
--- a/src/test/regress/expected/privileges.out
+++ b/src/test/regress/expected/privileges.out
@@ -921,6 +921,32 @@ INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- f
ERROR: permission denied for table atest5
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
ERROR: permission denied for table atest5
+-- Check that column level privileges are enforced for ON CONFLICT ... WHERE
+-- Ok. we may select one
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT WHERE atest5.one = 1 RETURNING atest5.two;
+ two
+-----
+ 2
+(1 row)
+
+-- Error. No select rights on three
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT WHERE atest5.three = 1 RETURNING atest5.two;
+ERROR: permission denied for table atest5
+-- Check that ON CONFLICT ... SELECT FOR UPDATE/SHARE requires an updatable column
+SET SESSION AUTHORIZATION regress_priv_user1;
+REVOKE UPDATE (three) ON atest5 FROM regress_priv_user4;
+SET SESSION AUTHORIZATION regress_priv_user4;
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT FOR UPDATE RETURNING atest5.two; -- fails
+ERROR: permission denied for table atest5
+SET SESSION AUTHORIZATION regress_priv_user1;
+GRANT UPDATE (three) ON atest5 TO regress_priv_user4;
+SET SESSION AUTHORIZATION regress_priv_user4;
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT FOR UPDATE RETURNING atest5.two; -- ok
+ two
+-----
+ 2
+(1 row)
+
-- Check that the columns in the inference require select privileges
INSERT INTO atest5(four) VALUES (4); -- fail
ERROR: permission denied for table atest5
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index c958ef4d70a..07d93e7def1 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -170,8 +170,9 @@ NOTICE: SELECT USING on rls_test_tgt.(1,"tgt d","TGT D")
1 | tgt d | TGT D
(1 row)
--- INSERT ... ON CONFLICT DO NOTHING should apply INSERT CHECK and SELECT USING
--- policy clauses (to new value, whether it conflicts or not)
+-- INSERT ... ON CONFLICT DO NOTHING with an arbiter clause should apply
+-- INSERT CHECK and SELECT USING policy clauses (to new value, whether it
+-- conflicts or not)
INSERT INTO rls_test_tgt VALUES (1, 'tgt a') ON CONFLICT (a) DO NOTHING;
NOTICE: INSERT CHECK on rls_test_tgt.(1,"tgt a","TGT A")
NOTICE: SELECT USING on rls_test_tgt.(1,"tgt a","TGT A")
@@ -217,6 +218,50 @@ NOTICE: SELECT USING on rls_test_tgt.(3,"tgt d","TGT D")
3 | tgt d | TGT D
(1 row)
+ROLLBACK;
+-- INSERT ... ON CONFLICT DO SELECT should apply INSERT CHECK and SELECT USING
+-- policy clauses to values proposed for insert. In the event of a conflict it
+-- should also apply SELECT USING policy clauses to the existing values.
+BEGIN;
+INSERT INTO rls_test_tgt VALUES (4, 'tgt a') ON CONFLICT (a) DO SELECT RETURNING *;
+NOTICE: INSERT CHECK on rls_test_tgt.(4,"tgt a","TGT A")
+NOTICE: SELECT USING on rls_test_tgt.(4,"tgt a","TGT A")
+ a | b | c
+---+-------+-------
+ 4 | tgt a | TGT A
+(1 row)
+
+INSERT INTO rls_test_tgt VALUES (4, 'tgt b') ON CONFLICT (a) DO SELECT RETURNING *;
+NOTICE: INSERT CHECK on rls_test_tgt.(4,"tgt b","TGT B")
+NOTICE: SELECT USING on rls_test_tgt.(4,"tgt b","TGT B")
+NOTICE: SELECT USING on rls_test_tgt.(4,"tgt a","TGT A")
+ a | b | c
+---+-------+-------
+ 4 | tgt a | TGT A
+(1 row)
+
+ROLLBACK;
+-- INSERT ... ON CONFLICT DO SELECT FOR UPDATE should also apply UPDATE USING
+-- policy clauses to the existing values, in the event of a conflict.
+BEGIN;
+INSERT INTO rls_test_tgt VALUES (5, 'tgt a') ON CONFLICT (a) DO SELECT FOR UPDATE RETURNING *;
+NOTICE: INSERT CHECK on rls_test_tgt.(5,"tgt a","TGT A")
+NOTICE: SELECT USING on rls_test_tgt.(5,"tgt a","TGT A")
+ a | b | c
+---+-------+-------
+ 5 | tgt a | TGT A
+(1 row)
+
+INSERT INTO rls_test_tgt VALUES (5, 'tgt b') ON CONFLICT (a) DO SELECT FOR UPDATE RETURNING *;
+NOTICE: INSERT CHECK on rls_test_tgt.(5,"tgt b","TGT B")
+NOTICE: SELECT USING on rls_test_tgt.(5,"tgt b","TGT B")
+NOTICE: UPDATE USING on rls_test_tgt.(5,"tgt a","TGT A")
+NOTICE: SELECT USING on rls_test_tgt.(5,"tgt a","TGT A")
+ a | b | c
+---+-------+-------
+ 5 | tgt a | TGT A
+(1 row)
+
ROLLBACK;
-- MERGE should always apply SELECT USING policy clauses to both source and
-- target rows
@@ -2394,10 +2439,58 @@ INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel')
ON CONFLICT (did) DO UPDATE SET dauthor = 'regress_rls_carol';
ERROR: new row violates row-level security policy for table "document"
--
+-- INSERT ... ON CONFLICT DO SELECT and Row-level security
+--
+SET SESSION AUTHORIZATION regress_rls_alice;
+DROP POLICY p3_with_all ON document;
+CREATE POLICY p1_select_novels ON document FOR SELECT
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'));
+CREATE POLICY p2_insert_own ON document FOR INSERT
+ WITH CHECK (dauthor = current_user);
+CREATE POLICY p3_update_novels ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel') AND dlevel = 1)
+ WITH CHECK (dauthor = current_user);
+SET SESSION AUTHORIZATION regress_rls_bob;
+-- DO SELECT requires SELECT rights, should succeed for novel
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT RETURNING did, dauthor, dtitle;
+ did | dauthor | dtitle
+-----+-----------------+----------------
+ 1 | regress_rls_bob | my first novel
+(1 row)
+
+-- DO SELECT requires SELECT rights, should fail for non-novel
+INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'science fiction'), 1, 'regress_rls_bob', 'another sci-fi')
+ ON CONFLICT (did) DO SELECT RETURNING did, dauthor, dtitle;
+ERROR: new row violates row-level security policy for table "document"
+-- DO SELECT with WHERE and EXCLUDED reference
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT WHERE excluded.dlevel = 1 RETURNING did, dauthor, dtitle;
+ did | dauthor | dtitle
+-----+-----------------+----------------
+ 1 | regress_rls_bob | my first novel
+(1 row)
+
+-- DO SELECT FOR UPDATE requires both SELECT and UPDATE rights, should succeed for novel and dlevel = 1
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT FOR UPDATE RETURNING did, dauthor, dtitle;
+ did | dauthor | dtitle
+-----+-----------------+----------------
+ 1 | regress_rls_bob | my first novel
+(1 row)
+
+-- should fail UPDATE USING policy for novel with dlevel = 2
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT FOR UPDATE RETURNING did, dauthor, dtitle;
+ERROR: new row violates row-level security policy (USING expression) for table "document"
+SET SESSION AUTHORIZATION regress_rls_alice;
+DROP POLICY p1_select_novels ON document;
+DROP POLICY p2_insert_own ON document;
+DROP POLICY p3_update_novels ON document;
+--
-- MERGE
--
RESET SESSION AUTHORIZATION;
-DROP POLICY p3_with_all ON document;
ALTER TABLE document ADD COLUMN dnotes text DEFAULT '';
-- all documents are readable
CREATE POLICY p1 ON document FOR SELECT USING (true);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index f4ee2bd7459..78a37d9fc8f 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2696,7 +2696,31 @@ pg_stats_ext_exprs| SELECT cn.nspname AS schemaname,
WHEN ((stat.a).stakind4 = 5) THEN (stat.a).stanumbers4
WHEN ((stat.a).stakind5 = 5) THEN (stat.a).stanumbers5
ELSE NULL::real[]
- END AS elem_count_histogram
+ END AS elem_count_histogram,
+ CASE
+ WHEN ((stat.a).stakind1 = 6) THEN (stat.a).stavalues1
+ WHEN ((stat.a).stakind2 = 6) THEN (stat.a).stavalues2
+ WHEN ((stat.a).stakind3 = 6) THEN (stat.a).stavalues3
+ WHEN ((stat.a).stakind4 = 6) THEN (stat.a).stavalues4
+ WHEN ((stat.a).stakind5 = 6) THEN (stat.a).stavalues5
+ ELSE NULL::anyarray
+ END AS range_length_histogram,
+ CASE
+ WHEN ((stat.a).stakind1 = 6) THEN (stat.a).stanumbers1[1]
+ WHEN ((stat.a).stakind2 = 6) THEN (stat.a).stanumbers2[1]
+ WHEN ((stat.a).stakind3 = 6) THEN (stat.a).stanumbers3[1]
+ WHEN ((stat.a).stakind4 = 6) THEN (stat.a).stanumbers4[1]
+ WHEN ((stat.a).stakind5 = 6) THEN (stat.a).stanumbers5[1]
+ ELSE NULL::real
+ END AS range_empty_frac,
+ CASE
+ WHEN ((stat.a).stakind1 = 7) THEN (stat.a).stavalues1
+ WHEN ((stat.a).stakind2 = 7) THEN (stat.a).stavalues2
+ WHEN ((stat.a).stakind3 = 7) THEN (stat.a).stavalues3
+ WHEN ((stat.a).stakind4 = 7) THEN (stat.a).stavalues4
+ WHEN ((stat.a).stakind5 = 7) THEN (stat.a).stavalues5
+ ELSE NULL::anyarray
+ END AS range_bounds_histogram
FROM (((((pg_statistic_ext s
JOIN pg_class c ON ((c.oid = s.stxrelid)))
LEFT JOIN pg_statistic_ext_data sd ON ((s.oid = sd.stxoid)))
@@ -3584,6 +3608,61 @@ SELECT * FROM hat_data WHERE hat_name IN ('h8', 'h9', 'h7') ORDER BY hat_name;
(3 rows)
DROP RULE hat_upsert ON hats;
+-- DO SELECT with a WHERE clause
+CREATE RULE hat_confsel AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name)
+ DO SELECT FOR UPDATE
+ WHERE excluded.hat_color <> 'forbidden' AND hat_data.* != excluded.*
+ RETURNING *;
+SELECT definition FROM pg_rules WHERE tablename = 'hats' ORDER BY rulename;
+ definition
+--------------------------------------------------------------------------------------
+ CREATE RULE hat_confsel AS +
+ ON INSERT TO public.hats DO INSTEAD INSERT INTO hat_data (hat_name, hat_color) +
+ VALUES (new.hat_name, new.hat_color) ON CONFLICT(hat_name) DO SELECT FOR UPDATE +
+ WHERE ((excluded.hat_color <> 'forbidden'::bpchar) AND (hat_data.* <> excluded.*))+
+ RETURNING hat_data.hat_name, +
+ hat_data.hat_color;
+(1 row)
+
+-- fails without RETURNING
+INSERT INTO hats VALUES ('h7', 'blue');
+ERROR: ON CONFLICT DO SELECT requires a RETURNING clause
+DETAIL: A rule action is INSERT ... ON CONFLICT DO SELECT, which requires a RETURNING clause.
+-- works (returns conflicts)
+EXPLAIN (costs off)
+INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------
+ Insert on hat_data
+ Conflict Resolution: SELECT FOR UPDATE
+ Conflict Arbiter Indexes: hat_data_unique_idx
+ Conflict Filter: ((excluded.hat_color <> 'forbidden'::bpchar) AND (hat_data.* <> excluded.*))
+ -> Result
+(5 rows)
+
+INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;
+ hat_name | hat_color
+------------+------------
+ h7 | black
+(1 row)
+
+-- conflicts excluded by WHERE clause
+INSERT INTO hats VALUES ('h7', 'forbidden') RETURNING *;
+ hat_name | hat_color
+----------+-----------
+(0 rows)
+
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+ hat_name | hat_color
+----------+-----------
+(0 rows)
+
+DROP RULE hat_confsel ON hats;
drop table hats;
drop table hat_data;
-- test for pg_get_functiondef properly regurgitating SET parameters
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index b2a06579135..cb8856ac50f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3628,3 +3628,30 @@ SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE numeric_lt(y, 1.0)'
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
+-- Check statistics generated for range type and expressions.
+CREATE TABLE stats_ext_tbl_range(name text, irange int4range);
+INSERT INTO stats_ext_tbl_range VALUES
+ ('red', '[1,7)'::int4range),
+ ('blue', '[2,8]'::int4range),
+ ('green', '[3,9)'::int4range);
+CREATE STATISTICS stats_ext_range (mcv)
+ ON irange, (irange + '[4,10)'::int4range)
+ FROM stats_ext_tbl_range;
+ANALYZE stats_ext_tbl_range;
+SELECT attnames, most_common_vals
+ FROM pg_stats_ext
+ WHERE statistics_name = 'stats_ext_range';
+ attnames | most_common_vals
+----------+------------------------------------------------------------
+ {irange} | {{"[1,7)","[1,10)"},{"[2,9)","[2,10)"},{"[3,9)","[3,10)"}}
+(1 row)
+
+SELECT range_length_histogram, range_empty_frac, range_bounds_histogram
+ FROM pg_stats_ext_exprs
+ WHERE statistics_name = 'stats_ext_range';
+ range_length_histogram | range_empty_frac | range_bounds_histogram
+------------------------+------------------+------------------------------
+ {7,8,9} | 0 | {"[1,10)","[2,10)","[3,10)"}
+(1 row)
+
+DROP TABLE stats_ext_tbl_range;
diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out
index 37131f9ceab..d6cc701500e 100644
--- a/src/test/regress/expected/stats_import.out
+++ b/src/test/regress/expected/stats_import.out
@@ -1481,7 +1481,7 @@ SELECT pg_clear_extended_stats(schemaname => 'stats_import',
statistics_schemaname => 'stats_import',
statistics_name => 'ext_stats_not_exist',
inherited => false);
-WARNING: could not find extended statistics object "stats_import"."ext_stats_not_exist"
+WARNING: could not find extended statistics object "stats_import.ext_stats_not_exist"
pg_clear_extended_stats
-------------------------
@@ -1493,7 +1493,7 @@ SELECT pg_clear_extended_stats(schemaname => 'stats_import',
statistics_schemaname => 'stats_import',
statistics_name => 'test_stat_clone',
inherited => false);
-WARNING: could not clear extended statistics object "stats_import"."test_stat_clone": incorrect relation "stats_import"."test" specified
+WARNING: could not clear extended statistics object "stats_import.test_stat_clone": incorrect relation "stats_import.test" specified
pg_clear_extended_stats
-------------------------
@@ -1678,7 +1678,7 @@ SELECT pg_catalog.pg_restore_extended_stats(
'statistics_schemaname', 'stats_import',
'statistics_name', 'ext_stats_not_exist',
'inherited', false);
-WARNING: could not find extended statistics object "stats_import"."ext_stats_not_exist"
+WARNING: could not find extended statistics object "stats_import.ext_stats_not_exist"
pg_restore_extended_stats
---------------------------
f
@@ -1691,7 +1691,7 @@ SELECT pg_catalog.pg_restore_extended_stats(
'statistics_schemaname', 'stats_import',
'statistics_name', 'test_stat_clone',
'inherited', false);
-WARNING: could not restore extended statistics object "stats_import"."test_stat_clone": incorrect relation "stats_import"."test" specified
+WARNING: could not restore extended statistics object "stats_import.test_stat_clone": incorrect relation "stats_import.test" specified
pg_restore_extended_stats
---------------------------
f
@@ -1762,7 +1762,7 @@ SELECT pg_catalog.pg_restore_extended_stats(
'inherited', false,
'n_distinct', '[{"attributes" : [1,3], "ndistinct" : 4}]'::pg_ndistinct);
WARNING: cannot specify parameter "n_distinct"
-HINT: Extended statistics object "stats_import"."test_stat_dependencies" does not support statistics of this type.
+HINT: Extended statistics object "stats_import.test_stat_dependencies" does not support statistics of this type.
pg_restore_extended_stats
---------------------------
f
@@ -1778,7 +1778,7 @@ SELECT pg_catalog.pg_restore_extended_stats(
'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000},
{"attributes": [3], "dependency": 2, "degree": 1.000000}]'::pg_dependencies);
WARNING: cannot specify parameter "dependencies"
-HINT: Extended statistics object "stats_import"."test_stat_ndistinct" does not support statistics of this type.
+HINT: Extended statistics object "stats_import.test_stat_ndistinct" does not support statistics of this type.
pg_restore_extended_stats
---------------------------
f
@@ -1966,7 +1966,7 @@ SELECT pg_catalog.pg_restore_extended_stats(
'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[],
'most_common_base_freqs', '{0.0625,0.0625,0.0625,0.0625}'::double precision[]);
WARNING: cannot specify parameters "most_common_vals", "most_common_freqs" or "most_common_base_freqs"
-HINT: Extended statistics object "stats_import"."test_stat_dependencies" does not support statistics of this type.
+HINT: Extended statistics object "stats_import.test_stat_dependencies" does not support statistics of this type.
pg_restore_extended_stats
---------------------------
f
diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out
index b3eccd8afe3..3a0637772c7 100644
--- a/src/test/regress/expected/subscription.out
+++ b/src/test/regress/expected/subscription.out
@@ -116,18 +116,18 @@ CREATE SUBSCRIPTION regress_testsub4 CONNECTION 'dbname=regress_doesnotexist' PU
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+ regress_testsub4
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
-------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub4 SET (origin = any);
\dRs+ regress_testsub4
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
-------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
DROP SUBSCRIPTION regress_testsub3;
@@ -145,10 +145,10 @@ ALTER SUBSCRIPTION regress_testsub CONNECTION 'foobar';
ERROR: invalid connection string syntax: missing "=" after "foobar" in connection info string
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET PUBLICATION testpub2, testpub3 WITH (refresh = false);
@@ -157,10 +157,10 @@ ALTER SUBSCRIPTION regress_testsub SET (slot_name = 'newname');
ALTER SUBSCRIPTION regress_testsub SET (password_required = false);
ALTER SUBSCRIPTION regress_testsub SET (run_as_owner = true);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | f | 0 | f | off | dbname=regress_doesnotexist2 | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | f | 0 | f | off | dbname=regress_doesnotexist2 | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (password_required = true);
@@ -176,10 +176,10 @@ ERROR: unrecognized subscription parameter: "create_slot"
-- ok
ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/12345');
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist2 | 0/00012345
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist2 | -1 | 0/00012345
(1 row)
-- ok - with lsn = NONE
@@ -188,10 +188,10 @@ ALTER SUBSCRIPTION regress_testsub SKIP (lsn = NONE);
ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/0');
ERROR: invalid WAL location (LSN): 0/0
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist2 | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist2 | -1 | 0/00000000
(1 row)
BEGIN;
@@ -222,11 +222,15 @@ ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = local);
ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = foobar);
ERROR: invalid value for parameter "synchronous_commit": "foobar"
HINT: Available values: local, remote_write, remote_apply, on, off.
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = '-1');
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = '80s');
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = 'foobar');
+ERROR: invalid value for parameter "wal_receiver_timeout": "foobar"
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
----------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------
- regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | local | dbname=regress_doesnotexist2 | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+---------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+------------------------------+------------------+------------
+ regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | f | 0 | f | local | dbname=regress_doesnotexist2 | 80s | 0/00000000
(1 row)
-- rename back to keep the rest simple
@@ -255,19 +259,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (binary = false);
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
DROP SUBSCRIPTION regress_testsub;
@@ -279,27 +283,27 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (streaming = parallel);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (streaming = false);
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
-- fail - publication already exists
@@ -314,10 +318,10 @@ ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refr
ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refresh = false);
ERROR: publication "testpub1" is already in subscription "regress_testsub"
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
-- fail - publication used more than once
@@ -332,10 +336,10 @@ ERROR: publication "testpub3" is not in subscription "regress_testsub"
-- ok - delete publications
ALTER SUBSCRIPTION regress_testsub DROP PUBLICATION testpub1, testpub2 WITH (refresh = false);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
DROP SUBSCRIPTION regress_testsub;
@@ -371,19 +375,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
-- we can alter streaming when two_phase enabled
ALTER SUBSCRIPTION regress_testsub SET (streaming = true);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
@@ -393,10 +397,10 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
@@ -409,18 +413,18 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (disable_on_error = true);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
@@ -433,10 +437,10 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
@@ -450,19 +454,19 @@ NOTICE: max_retention_duration is ineffective when retain_dead_tuples is disabl
WARNING: subscription was created, but is not connected
HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and alter the subscription to refresh publications.
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 1000 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 1000 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
-- ok
ALTER SUBSCRIPTION regress_testsub SET (max_retention_duration = 0);
\dRs+
- List of subscriptions
- Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Skip LSN
------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------
- regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | 0/00000000
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Retain dead tuples | Max retention duration | Retention active | Synchronous commit | Conninfo | Receiver timeout | Skip LSN
+-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------+------------------+--------------------+-----------------------------+------------------+------------
+ regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | f | 0 | f | off | dbname=regress_doesnotexist | -1 | 0/00000000
(1 row)
ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE);
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index 1acdd12d29e..98dee63b50a 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -1670,7 +1670,7 @@ drop table trigger_ddl_table;
drop function trigger_ddl_func();
--
-- Verify behavior of before and after triggers with INSERT...ON CONFLICT
--- DO UPDATE
+-- DO UPDATE and DO SELECT
--
create table upsert (key int4 primary key, color text);
create function upsert_before_func()
@@ -1745,6 +1745,14 @@ insert into upsert values(8, 'yellow') on conflict (key) do update set color = '
WARNING: before insert (new): (8,yellow)
WARNING: before insert (new, modified): (9,"yellow trig modified")
WARNING: after insert (new): (9,"yellow trig modified")
+insert into upsert values(8, 'blue') on conflict (key) do select for update where upsert.color = 'yellow trig modified' returning old.*, new.*, upsert.*;
+WARNING: before insert (new): (8,blue)
+WARNING: before insert (new, modified): (9,"blue trig modified")
+ key | color | key | color | key | color
+-----+----------------------+-----+----------------------+-----+----------------------
+ 9 | yellow trig modified | 9 | yellow trig modified | 9 | yellow trig modified
+(1 row)
+
select * from upsert;
key | color
-----+-----------------------------
diff --git a/src/test/regress/expected/tuplesort.out b/src/test/regress/expected/tuplesort.out
index 6dd97e7427a..fc1321bf443 100644
--- a/src/test/regress/expected/tuplesort.out
+++ b/src/test/regress/expected/tuplesort.out
@@ -304,9 +304,9 @@ FROM abbrev_abort_uuids
ORDER BY ctid DESC LIMIT 5;
id | abort_increasing | abort_decreasing | noabort_increasing | noabort_decreasing
-------+--------------------------------------+--------------------------------------+--------------------------------------+--------------------------------------
- 0 | | | |
20002 | | | |
20003 | | | |
+ 0 | | | |
10009 | 00000000-0000-0000-0000-000000010008 | 00000000-0000-0000-0000-000000009992 | 00010008-0000-0000-0000-000000010008 | 00009992-0000-0000-0000-000000009992
10008 | 00000000-0000-0000-0000-000000010007 | 00000000-0000-0000-0000-000000009993 | 00010007-0000-0000-0000-000000010007 | 00009993-0000-0000-0000-000000009993
(5 rows)
@@ -335,9 +335,9 @@ FROM abbrev_abort_uuids
ORDER BY ctid DESC LIMIT 5;
id | abort_increasing | abort_decreasing | noabort_increasing | noabort_decreasing
-------+--------------------------------------+--------------------------------------+--------------------------------------+--------------------------------------
- 0 | | | |
- 20003 | | | |
20002 | | | |
+ 20003 | | | |
+ 0 | | | |
9993 | 00000000-0000-0000-0000-000000009992 | 00000000-0000-0000-0000-000000010008 | 00009992-0000-0000-0000-000000009992 | 00010008-0000-0000-0000-000000010008
9994 | 00000000-0000-0000-0000-000000009993 | 00000000-0000-0000-0000-000000010007 | 00009993-0000-0000-0000-000000009993 | 00010007-0000-0000-0000-000000010007
(5 rows)
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 03df7e75b7b..9cea538b8e8 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -316,6 +316,21 @@ SELECT * FROM rw_view15;
3 | UNSPECIFIED
(6 rows)
+INSERT INTO rw_view15 (a) VALUES (3)
+ ON CONFLICT (a) DO UPDATE SET a = excluded.a WHERE excluded.upper = 'UNSPECIFIED'
+ RETURNING old, new;
+ old | new
+-----------------+-----------------
+ (3,UNSPECIFIED) | (3,UNSPECIFIED)
+(1 row)
+
+INSERT INTO rw_view15 (a) VALUES (3)
+ ON CONFLICT (a) DO SELECT WHERE excluded.upper = 'UNSPECIFIED' RETURNING old, new;
+ old | new
+-----------------+-----------------
+ (3,UNSPECIFIED) | (3,UNSPECIFIED)
+(1 row)
+
SELECT * FROM rw_view15;
a | upper
----+-------------
@@ -3646,7 +3661,7 @@ ERROR: new row violates check option for view "wcowrtest_v2"
DETAIL: Failing row contains (2, no such row in sometable).
drop view wcowrtest_v, wcowrtest_v2;
drop table wcowrtest, sometable;
--- Check INSERT .. ON CONFLICT DO UPDATE works correctly when the view's
+-- Check INSERT .. ON CONFLICT DO SELECT/UPDATE works correctly when the view's
-- columns are named and ordered differently than the underlying table's.
create table uv_iocu_tab (a text unique, b float);
insert into uv_iocu_tab values ('xyxyxy', 0);
@@ -3668,6 +3683,13 @@ select * from uv_iocu_tab;
xyxyxy | 1
(1 row)
+insert into uv_iocu_view (a, b) values ('xyxyxy', 1)
+ on conflict (a) do select where uv_iocu_view.c = 2 and excluded.c = 2 returning *;
+ b | c | a | two
+---+---+--------+-----
+ 1 | 2 | xyxyxy | 2.0
+(1 row)
+
-- OK to access view columns that are not present in underlying base
-- relation in the ON CONFLICT portion of the query
insert into uv_iocu_view (a, b) values ('xyxyxy', 3)
@@ -3731,6 +3753,25 @@ select * from uv_iocu_view;
Rejected: (y,1,"(1,y)") | 1 | (1,"Rejected: (y,1,""(1,y)"")")
(1 row)
+explain (costs off)
+insert into uv_iocu_view (aa,bb) values (1,'Rejected: (y,1,"(1,y)")')
+ on conflict (aa) do select where uv_iocu_view.* = excluded.* returning *;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------
+ Insert on uv_iocu_tab
+ Conflict Resolution: SELECT
+ Conflict Arbiter Indexes: uv_iocu_tab_a_key
+ Conflict Filter: (ROW(uv_iocu_tab.b, uv_iocu_tab.a, (uv_iocu_tab.*)::text) = ROW(excluded.b, excluded.a, (excluded.*)::text))
+ -> Result
+(5 rows)
+
+insert into uv_iocu_view (aa,bb) values (1,'Rejected: (y,1,"(1,y)")')
+ on conflict (aa) do select where uv_iocu_view.* = excluded.* returning *;
+ bb | aa | cc
+-------------------------+----+---------------------------------
+ Rejected: (y,1,"(1,y)") | 1 | (1,"Rejected: (y,1,""(1,y)"")")
+(1 row)
+
-- Test omitting a column of the base relation
delete from uv_iocu_view;
insert into uv_iocu_view (aa,bb) values (1,'x');
@@ -3751,6 +3792,13 @@ select * from uv_iocu_view;
Rejected: ("table default",1,"(1,""table default"")") | 1 | (1,"Rejected: (""table default"",1,""(1,""""table default"""")"")")
(1 row)
+insert into uv_iocu_view (aa) values (1)
+ on conflict (aa) do select returning *;
+ bb | aa | cc
+-------------------------------------------------------+----+---------------------------------------------------------------------
+ Rejected: ("table default",1,"(1,""table default"")") | 1 | (1,"Rejected: (""table default"",1,""(1,""""table default"""")"")")
+(1 row)
+
alter view uv_iocu_view alter column bb set default 'view default';
insert into uv_iocu_view (aa) values (1)
on conflict (aa) do update set bb = 'Rejected: '||excluded.*;
@@ -3760,6 +3808,13 @@ select * from uv_iocu_view;
Rejected: ("view default",1,"(1,""view default"")") | 1 | (1,"Rejected: (""view default"",1,""(1,""""view default"""")"")")
(1 row)
+insert into uv_iocu_view (aa) values (1)
+ on conflict (aa) do select returning *;
+ bb | aa | cc
+-----------------------------------------------------+----+-------------------------------------------------------------------
+ Rejected: ("view default",1,"(1,""view default"")") | 1 | (1,"Rejected: (""view default"",1,""(1,""""view default"""")"")")
+(1 row)
+
-- Should fail to update non-updatable columns
insert into uv_iocu_view (aa) values (1)
on conflict (aa) do update set cc = 'XXX';
@@ -3767,7 +3822,7 @@ ERROR: cannot insert into column "cc" of view "uv_iocu_view"
DETAIL: View columns that are not columns of their base relation are not updatable.
drop view uv_iocu_view;
drop table uv_iocu_tab;
--- ON CONFLICT DO UPDATE permissions checks
+-- ON CONFLICT DO SELECT/UPDATE permissions checks
create user regress_view_user1;
create user regress_view_user2;
set session authorization regress_view_user1;
@@ -3791,6 +3846,16 @@ insert into rw_view1 values ('zzz',2.0,1)
insert into rw_view1 values ('zzz',2.0,1)
on conflict (aa) do update set cc = 3.0; -- Not allowed
ERROR: permission denied for view rw_view1
+insert into rw_view1 values ('yyy',2.0,1)
+ on conflict (aa) do select for update returning cc; -- Not allowed
+ERROR: permission denied for view rw_view1
+insert into rw_view1 values ('yyy',2.0,1)
+ on conflict (aa) do select for update returning aa, bb;
+ aa | bb
+----+--------
+ 1 | yyyxxx
+(1 row)
+
reset session authorization;
select * from base_tbl;
a | b | c
@@ -3807,9 +3872,19 @@ create view rw_view2 as select b as bb, c as cc, a as aa from base_tbl;
insert into rw_view2 (aa,bb) values (1,'xxx')
on conflict (aa) do update set bb = excluded.bb; -- Not allowed
ERROR: permission denied for table base_tbl
+insert into rw_view2 (aa,bb) values (1,'xxx')
+ on conflict (aa) do select returning 1; -- Not allowed
+ERROR: permission denied for table base_tbl
create view rw_view3 as select b as bb, a as aa from base_tbl;
insert into rw_view3 (aa,bb) values (1,'xxx')
on conflict (aa) do update set bb = excluded.bb; -- OK
+insert into rw_view3 (aa,bb) values (1,'xxx')
+ on conflict (aa) do select returning aa, bb; -- OK
+ aa | bb
+----+-----
+ 1 | xxx
+(1 row)
+
reset session authorization;
select * from base_tbl;
a | b | c
@@ -3822,6 +3897,9 @@ create view rw_view4 as select aa, bb, cc FROM rw_view1;
insert into rw_view4 (aa,bb) values (1,'yyy')
on conflict (aa) do update set bb = excluded.bb; -- Not allowed
ERROR: permission denied for view rw_view1
+insert into rw_view4 (aa,bb) values (1,'yyy')
+ on conflict (aa) do select returning 1; -- Not allowed
+ERROR: permission denied for view rw_view1
create view rw_view5 as select aa, bb FROM rw_view1;
insert into rw_view5 (aa,bb) values (1,'yyy')
on conflict (aa) do update set bb = excluded.bb; -- OK
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 021d57f66bb..549e9b2d7be 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t
# geometry depends on point, lseg, line, box, path, polygon, circle
# horology depends on date, time, timetz, timestamp, timestamptz, interval
# ----------
-test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import pg_ndistinct pg_dependencies oid8
+test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import pg_ndistinct pg_dependencies oid8 encoding euc_kr
# ----------
# Load huge amounts of data
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index ce5f5f9eb19..a02f41c9727 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -376,9 +376,9 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
for (i = 0; i < ncolumns; i++)
{
- struct varlena *attr;
- struct varlena *new_attr;
- struct varatt_indirect redirect_pointer;
+ varlena *attr;
+ varlena *new_attr;
+ varatt_indirect redirect_pointer;
/* only work on existing, not-null varlenas */
if (TupleDescAttr(tupdesc, i)->attisdropped ||
@@ -387,7 +387,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
TupleDescAttr(tupdesc, i)->attstorage == TYPSTORAGE_PLAIN)
continue;
- attr = (struct varlena *) DatumGetPointer(values[i]);
+ attr = (varlena *) DatumGetPointer(values[i]);
/* don't recursively indirect */
if (VARATT_IS_EXTERNAL_INDIRECT(attr))
@@ -398,14 +398,14 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
attr = detoast_external_attr(attr);
else
{
- struct varlena *oldattr = attr;
+ varlena *oldattr = attr;
attr = palloc0(VARSIZE_ANY(oldattr));
memcpy(attr, oldattr, VARSIZE_ANY(oldattr));
}
/* build indirection Datum */
- new_attr = (struct varlena *) palloc0(INDIRECT_POINTER_SIZE);
+ new_attr = (varlena *) palloc0(INDIRECT_POINTER_SIZE);
redirect_pointer.pointer = attr;
SET_VARTAG_EXTERNAL(new_attr, VARTAG_INDIRECT);
memcpy(VARDATA_EXTERNAL(new_attr), &redirect_pointer,
@@ -1115,6 +1115,145 @@ test_enc_conversion(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
}
+/* Convert bytea to text without validation for corruption tests from SQL. */
+PG_FUNCTION_INFO_V1(test_bytea_to_text);
+Datum
+test_bytea_to_text(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(PG_GETARG_BYTEA_PP(0));
+}
+
+/* And the reverse. */
+PG_FUNCTION_INFO_V1(test_text_to_bytea);
+Datum
+test_text_to_bytea(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(PG_GETARG_TEXT_PP(0));
+}
+
+/* Corruption tests in C. */
+PG_FUNCTION_INFO_V1(test_mblen_func);
+Datum
+test_mblen_func(PG_FUNCTION_ARGS)
+{
+ const char *func = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+ const char *encoding = text_to_cstring(PG_GETARG_BYTEA_PP(1));
+ text *string = PG_GETARG_BYTEA_PP(2);
+ int offset = PG_GETARG_INT32(3);
+ const char *data = VARDATA_ANY(string);
+ size_t size = VARSIZE_ANY_EXHDR(string);
+ int result = 0;
+
+ if (strcmp(func, "pg_mblen_unbounded") == 0)
+ result = pg_mblen_unbounded(data + offset);
+ else if (strcmp(func, "pg_mblen_cstr") == 0)
+ result = pg_mblen_cstr(data + offset);
+ else if (strcmp(func, "pg_mblen_with_len") == 0)
+ result = pg_mblen_with_len(data + offset, size - offset);
+ else if (strcmp(func, "pg_mblen_range") == 0)
+ result = pg_mblen_range(data + offset, data + size);
+ else if (strcmp(func, "pg_encoding_mblen") == 0)
+ result = pg_encoding_mblen(pg_char_to_encoding(encoding), data + offset);
+ else
+ elog(ERROR, "unknown function");
+
+ PG_RETURN_INT32(result);
+}
+
+PG_FUNCTION_INFO_V1(test_text_to_wchars);
+Datum
+test_text_to_wchars(PG_FUNCTION_ARGS)
+{
+ const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+ text *string = PG_GETARG_TEXT_PP(1);
+ const char *data = VARDATA_ANY(string);
+ size_t size = VARSIZE_ANY_EXHDR(string);
+ pg_wchar *wchars = palloc(sizeof(pg_wchar) * (size + 1));
+ Datum *datums;
+ int wlen;
+ int encoding;
+
+ encoding = pg_char_to_encoding(encoding_name);
+ if (encoding < 0)
+ elog(ERROR, "unknown encoding name: %s", encoding_name);
+
+ if (size > 0)
+ {
+ datums = palloc(sizeof(Datum) * size);
+ wlen = pg_encoding_mb2wchar_with_len(encoding,
+ data,
+ wchars,
+ size);
+ Assert(wlen >= 0);
+ Assert(wlen <= size);
+ Assert(wchars[wlen] == 0);
+
+ for (int i = 0; i < wlen; ++i)
+ datums[i] = UInt32GetDatum(wchars[i]);
+ }
+ else
+ {
+ datums = NULL;
+ wlen = 0;
+ }
+
+ PG_RETURN_ARRAYTYPE_P(construct_array_builtin(datums, wlen, INT4OID));
+}
+
+PG_FUNCTION_INFO_V1(test_wchars_to_text);
+Datum
+test_wchars_to_text(PG_FUNCTION_ARGS)
+{
+ const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0));
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P(1);
+ Datum *datums;
+ bool *nulls;
+ char *mb;
+ text *result;
+ int wlen;
+ int bytes;
+ int encoding;
+
+ encoding = pg_char_to_encoding(encoding_name);
+ if (encoding < 0)
+ elog(ERROR, "unknown encoding name: %s", encoding_name);
+
+ deconstruct_array_builtin(array, INT4OID, &datums, &nulls, &wlen);
+
+ if (wlen > 0)
+ {
+ pg_wchar *wchars = palloc(sizeof(pg_wchar) * wlen);
+
+ for (int i = 0; i < wlen; ++i)
+ {
+ if (nulls[i])
+ elog(ERROR, "unexpected NULL in array");
+ wchars[i] = DatumGetInt32(datums[i]);
+ }
+
+ mb = palloc(pg_encoding_max_length(encoding) * wlen + 1);
+ bytes = pg_encoding_wchar2mb_with_len(encoding, wchars, mb, wlen);
+ }
+ else
+ {
+ mb = "";
+ bytes = 0;
+ }
+
+ result = palloc(bytes + VARHDRSZ);
+ SET_VARSIZE(result, bytes + VARHDRSZ);
+ memcpy(VARDATA(result), mb, bytes);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+PG_FUNCTION_INFO_V1(test_valid_server_encoding);
+Datum
+test_valid_server_encoding(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(pg_valid_server_encoding(text_to_cstring(PG_GETARG_TEXT_PP(0))) >= 0);
+}
+
/* Provide SQL access to IsBinaryCoercible() */
PG_FUNCTION_INFO_V1(binary_coercible);
Datum
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 450389831a0..82837af7c4a 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -528,6 +528,10 @@ select '[2147483646:2147483646]={1}'::int[];
select '[-2147483648:-2147483647]={1,2}'::int[];
-- all of the above should be accepted
+-- some day we might allow these cases, but for now they're errors:
+select array[]::oidvector;
+select array[]::int2vector;
+
-- tests for array aggregates
CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]);
diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql
index 733a1dbccfe..b7f6efdd814 100644
--- a/src/test/regress/sql/constraints.sql
+++ b/src/test/regress/sql/constraints.sql
@@ -568,6 +568,9 @@ INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
-- fail, because DO UPDATE variant requires unique index
INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO UPDATE SET c2 = EXCLUDED.c2;
+-- fail, because DO SELECT variant requires unique index
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO SELECT RETURNING *;
-- succeed because c1 doesn't overlap
INSERT INTO circles VALUES('<(20,20), 1>', '<(0,0), 5>');
-- succeed because c2 doesn't overlap
@@ -623,7 +626,9 @@ DROP TABLE deferred_excl;
-- verify constraints created for NOT NULL clauses
CREATE TABLE notnull_tbl1 (a INTEGER NOT NULL NOT NULL);
\d+ notnull_tbl1
--- no-op
+-- specifying an existing constraint is a no-op
+ALTER TABLE notnull_tbl1 ADD CONSTRAINT notnull_tbl1_a_not_null NOT NULL a;
+-- but using a different constraint name is not allowed
ALTER TABLE notnull_tbl1 ADD CONSTRAINT nn NOT NULL a;
\d+ notnull_tbl1
-- duplicate name
diff --git a/src/test/regress/sql/copyencoding.sql b/src/test/regress/sql/copyencoding.sql
index 4e96a4d6505..64718245b94 100644
--- a/src/test/regress/sql/copyencoding.sql
+++ b/src/test/regress/sql/copyencoding.sql
@@ -23,6 +23,13 @@ CREATE TABLE copy_encoding_tab (t text);
COPY (SELECT E'\u3042') TO :'utf8_csv' WITH (FORMAT csv, ENCODING 'UTF8');
-- Read UTF8 data as LATIN1: no error
COPY copy_encoding_tab FROM :'utf8_csv' WITH (FORMAT csv, ENCODING 'LATIN1');
+-- Non-server encodings have distinct code paths.
+\set fname :abs_builddir '/results/copyencoding_gb18030.csv'
+COPY (SELECT E'\u3042,') TO :'fname' WITH (FORMAT csv, ENCODING 'GB18030');
+COPY copy_encoding_tab FROM :'fname' WITH (FORMAT csv, ENCODING 'GB18030');
+\set fname :abs_builddir '/results/copyencoding_gb18030.data'
+COPY (SELECT E'\u3042,') TO :'fname' WITH (FORMAT text, ENCODING 'GB18030');
+COPY copy_encoding_tab FROM :'fname' WITH (FORMAT text, ENCODING 'GB18030');
-- Use client_encoding
SET client_encoding TO UTF8;
diff --git a/src/test/regress/sql/encoding.sql b/src/test/regress/sql/encoding.sql
new file mode 100644
index 00000000000..d591818c3eb
--- /dev/null
+++ b/src/test/regress/sql/encoding.sql
@@ -0,0 +1,247 @@
+/* skip test if not UTF8 server encoding */
+SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+
+\set regresslib :libdir '/regress' :dlsuffix
+
+CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[]
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text
+ AS :'regresslib' LANGUAGE C STRICT;
+CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean
+ AS :'regresslib' LANGUAGE C STRICT;
+
+
+CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text);
+INSERT INTO regress_encoding
+VALUES ('café',
+ 'caf' || test_bytea_to_text('\xc3'),
+ 'café' || test_bytea_to_text('\x00') || 'dcba',
+ 'caf' || test_bytea_to_text('\xc300') || 'dcba');
+
+SELECT good, truncated, with_nul FROM regress_encoding;
+
+SELECT length(good) FROM regress_encoding;
+SELECT substring(good, 3, 1) FROM regress_encoding;
+SELECT substring(good, 4, 1) FROM regress_encoding;
+SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding;
+SELECT reverse(good) FROM regress_encoding;
+
+-- invalid short mb character = error
+SELECT length(truncated) FROM regress_encoding;
+SELECT substring(truncated, 1, 3) FROM regress_encoding;
+SELECT substring(truncated, 1, 4) FROM regress_encoding;
+SELECT reverse(truncated) FROM regress_encoding;
+-- invalid short mb character = silently dropped
+SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding;
+
+-- PostgreSQL doesn't allow strings to contain NUL. If a corrupted string
+-- contains NUL at a character boundary position, some functions treat it as a
+-- character while others treat it as a terminator, as implementation details.
+
+-- NUL = terminator
+SELECT length(with_nul) FROM regress_encoding;
+SELECT substring(with_nul, 3, 1) FROM regress_encoding;
+SELECT substring(with_nul, 4, 1) FROM regress_encoding;
+SELECT substring(with_nul, 5, 1) FROM regress_encoding;
+SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding;
+SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding;
+-- NUL = character
+SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding;
+
+-- If a corrupted string contains NUL in the tail bytes of a multibyte
+-- character (invalid in all encodings), it is considered part of the
+-- character for length purposes. An error will only be raised in code paths
+-- that convert or verify encodings.
+
+SELECT length(truncated_with_nul) FROM regress_encoding;
+SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding;
+SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding;
+SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding;
+SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding;
+SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding;
+SELECT reverse(truncated_with_nul) FROM regress_encoding;
+
+-- unbounded: sequence would overrun the string!
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3)
+FROM regress_encoding;
+
+-- condition detected when using the length/range variants
+SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3)
+FROM regress_encoding;
+SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3)
+FROM regress_encoding;
+
+-- unbounded: sequence would overrun the string, if the terminator were really
+-- the end of it
+SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3)
+FROM regress_encoding;
+
+-- condition detected when using the cstr variants
+SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3)
+FROM regress_encoding;
+
+DROP TABLE regress_encoding;
+
+-- mb<->wchar conversions
+CREATE FUNCTION test_encoding(encoding text, description text, input bytea)
+RETURNS VOID LANGUAGE plpgsql AS
+$$
+DECLARE
+ prefix text;
+ len int;
+ wchars int[];
+ round_trip bytea;
+ result text;
+BEGIN
+ prefix := rpad(encoding || ' ' || description || ':', 28);
+
+ -- XXX could also test validation, length functions and include client
+ -- only encodings with these test cases
+
+ IF test_valid_server_encoding(encoding) THEN
+ wchars := test_text_to_wchars(encoding, test_bytea_to_text(input));
+ round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars));
+ if input = round_trip then
+ result := 'OK';
+ elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then
+ result := 'truncated';
+ else
+ result := 'failed';
+ end if;
+ RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result;
+ END IF;
+END;
+$$;
+-- No validation is done on the encoding itself, just the length to avoid
+-- overruns, so some of the byte sequences below are bogus. They cover
+-- all code branches, server encodings only for now.
+CREATE TABLE encoding_tests (encoding text, description text, input bytea);
+INSERT INTO encoding_tests VALUES
+ -- LATIN1, other single-byte encodings
+ ('LATIN1', 'ASCII', 'a'),
+ ('LATIN1', 'extended', '\xe9'),
+ -- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion):
+ -- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+ -- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length)
+ -- 2 80..ff (CS1)
+ ('EUC_JP', 'ASCII', 'a'),
+ ('EUC_JP', 'CS1, short', '\x80'),
+ ('EUC_JP', 'CS1', '\x8002'),
+ ('EUC_JP', 'CS2, short', '\x8e'),
+ ('EUC_JP', 'CS2', '\x8e02'),
+ ('EUC_JP', 'CS3, short', '\x8f'),
+ ('EUC_JP', 'CS3, short', '\x8f02'),
+ ('EUC_JP', 'CS3', '\x8f0203'),
+ -- EUC_CN
+ -- 3 8e (CS2, not used but arbitrarily considered to have length 3)
+ -- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+ -- 2 80..ff (CS1)
+ ('EUC_CN', 'ASCII', 'a'),
+ ('EUC_CN', 'CS1, short', '\x80'),
+ ('EUC_CN', 'CS1', '\x8002'),
+ ('EUC_CN', 'CS2, short', '\x8e'),
+ ('EUC_CN', 'CS2, short', '\x8e02'),
+ ('EUC_CN', 'CS2', '\x8e0203'),
+ ('EUC_CN', 'CS3, short', '\x8f'),
+ ('EUC_CN', 'CS3, short', '\x8f02'),
+ ('EUC_CN', 'CS3', '\x8f0203'),
+ -- EUC_TW:
+ -- 4 8e (CS2)
+ -- 3 8f (CS3, not used but arbitrarily considered to have length 3)
+ -- 2 80..ff (CS1)
+ ('EUC_TW', 'ASCII', 'a'),
+ ('EUC_TW', 'CS1, short', '\x80'),
+ ('EUC_TW', 'CS1', '\x8002'),
+ ('EUC_TW', 'CS2, short', '\x8e'),
+ ('EUC_TW', 'CS2, short', '\x8e02'),
+ ('EUC_TW', 'CS2, short', '\x8e0203'),
+ ('EUC_TW', 'CS2', '\x8e020304'),
+ ('EUC_TW', 'CS3, short', '\x8f'),
+ ('EUC_TW', 'CS3, short', '\x8f02'),
+ ('EUC_TW', 'CS3', '\x8f0203'),
+ -- UTF8
+ -- 2 c0..df
+ -- 3 e0..ef
+ -- 4 f0..f7 (but maximum real codepoint U+10ffff has f4)
+ -- 5 f8..fb (not supported)
+ -- 6 fc..fd (not supported)
+ ('UTF8', 'ASCII', 'a'),
+ ('UTF8', '2 byte, short', '\xdf'),
+ ('UTF8', '2 byte', '\xdf82'),
+ ('UTF8', '3 byte, short', '\xef'),
+ ('UTF8', '3 byte, short', '\xef82'),
+ ('UTF8', '3 byte', '\xef8283'),
+ ('UTF8', '4 byte, short', '\xf7'),
+ ('UTF8', '4 byte, short', '\xf782'),
+ ('UTF8', '4 byte, short', '\xf78283'),
+ ('UTF8', '4 byte', '\xf7828384'),
+ ('UTF8', '5 byte, unsupported', '\xfb'),
+ ('UTF8', '5 byte, unsupported', '\xfb82'),
+ ('UTF8', '5 byte, unsupported', '\xfb8283'),
+ ('UTF8', '5 byte, unsupported', '\xfb828384'),
+ ('UTF8', '5 byte, unsupported', '\xfb82838485'),
+ ('UTF8', '6 byte, unsupported', '\xfd'),
+ ('UTF8', '6 byte, unsupported', '\xfd82'),
+ ('UTF8', '6 byte, unsupported', '\xfd8283'),
+ ('UTF8', '6 byte, unsupported', '\xfd828384'),
+ ('UTF8', '6 byte, unsupported', '\xfd82838485'),
+ ('UTF8', '6 byte, unsupported', '\xfd8283848586'),
+ -- MULE_INTERNAL
+ -- 2 81..8d LC1
+ -- 3 90..99 LC2
+ ('MULE_INTERNAL', 'ASCII', 'a'),
+ ('MULE_INTERNAL', 'LC1, short', '\x81'),
+ ('MULE_INTERNAL', 'LC1', '\x8182'),
+ ('MULE_INTERNAL', 'LC2, short', '\x90'),
+ ('MULE_INTERNAL', 'LC2, short', '\x9082'),
+ ('MULE_INTERNAL', 'LC2', '\x908283');
+
+SELECT COUNT(test_encoding(encoding, description, input)) > 0
+FROM encoding_tests;
+
+-- substring fetches a slice of a toasted value; unused tail of that slice is
+-- an incomplete char (bug #19406)
+CREATE TABLE toast_3b_utf8 (c text);
+INSERT INTO toast_3b_utf8 VALUES (repeat(U&'\2026', 4000));
+SELECT SUBSTRING(c FROM 1 FOR 1) FROM toast_3b_utf8;
+SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
+-- diagnose incomplete char iff within the substring
+UPDATE toast_3b_utf8 SET c = c || test_bytea_to_text('\xe280');
+SELECT SUBSTRING(c FROM 4000 FOR 1) FROM toast_3b_utf8;
+SELECT SUBSTRING(c FROM 4001 FOR 1) FROM toast_3b_utf8;
+-- substring needing last byte of its slice_size
+ALTER TABLE toast_3b_utf8 RENAME TO toast_4b_utf8;
+UPDATE toast_4b_utf8 SET c = repeat(U&'\+01F680', 3000);
+SELECT SUBSTRING(c FROM 3000 FOR 1) FROM toast_4b_utf8;
+
+DROP TABLE encoding_tests;
+DROP TABLE toast_4b_utf8;
+DROP FUNCTION test_encoding;
+DROP FUNCTION test_wchars_to_text;
+DROP FUNCTION test_text_to_wchars;
+DROP FUNCTION test_valid_server_encoding;
+DROP FUNCTION test_mblen_func;
+DROP FUNCTION test_bytea_to_text;
+DROP FUNCTION test_text_to_bytea;
+
+
+-- substring slow path: multi-byte escape char vs. multi-byte pattern char.
+SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7');
+-- Levenshtein distance metric: exercise character length cache.
+SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name);
+-- JSON errcontext: truncate long data.
+SELECT repeat(U&'\00A7', 30)::json;
diff --git a/src/test/regress/sql/euc_kr.sql b/src/test/regress/sql/euc_kr.sql
new file mode 100644
index 00000000000..1851b2a8c14
--- /dev/null
+++ b/src/test/regress/sql/euc_kr.sql
@@ -0,0 +1,12 @@
+-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent
+-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all
+-- of EUC_KR, also run the test in UTF8.
+SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+-- Exercise is_multibyte_char_in_char (non-UTF8) slow path.
+SELECT POSITION(
+ convert_from('\xbcf6c7d0', 'EUC_KR') IN
+ convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR'));
diff --git a/src/test/regress/sql/guc.sql b/src/test/regress/sql/guc.sql
index bafaf067e82..dfb843fd3ae 100644
--- a/src/test/regress/sql/guc.sql
+++ b/src/test/regress/sql/guc.sql
@@ -232,6 +232,28 @@ drop schema not_there_initially;
select current_schemas(false);
reset search_path;
+--
+-- Test parsing of log_min_messages
+--
+
+SET log_min_messages TO foo; -- fail
+SET log_min_messages TO fatal;
+SHOW log_min_messages;
+SET log_min_messages TO 'fatal';
+SHOW log_min_messages;
+SET log_min_messages TO 'checkpointer:debug2, autovacuum:debug1'; -- fail
+SET log_min_messages TO 'debug1, backend:error, fatal'; -- fail
+SET log_min_messages TO 'backend:error, debug1, backend:warning'; -- fail
+SET log_min_messages TO 'backend:error, foo:fatal, archiver:debug1'; -- fail
+SET log_min_messages TO 'backend:error, checkpointer:bar, archiver:debug1'; -- fail
+SET log_min_messages TO 'backend:error, checkpointer:debug3, fatal, archiver:debug2, autovacuum:debug1, walsender:debug3';
+SHOW log_min_messages;
+SET log_min_messages TO 'warning, autovacuum:debug1';
+SHOW log_min_messages;
+SET log_min_messages TO 'autovacuum:debug1, warning';
+SHOW log_min_messages;
+RESET log_min_messages;
+
--
-- Tests for function-local GUC settings
--
diff --git a/src/test/regress/sql/insert_conflict.sql b/src/test/regress/sql/insert_conflict.sql
index 03b1f0e44b0..a5a84d1d4b8 100644
--- a/src/test/regress/sql/insert_conflict.sql
+++ b/src/test/regress/sql/insert_conflict.sql
@@ -93,6 +93,9 @@ explain (costs off) insert into insertconflicttest values (0, 'Bilberry') on con
explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) do update set fruit = excluded.fruit where excluded.fruit != 'Elderberry';
-- Does the same, but JSON format shows "Conflict Arbiter Index" as JSON array:
explain (costs off, format json) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit where insertconflicttest.fruit != 'Lime' returning *;
+-- Should display lock strength, if specified
+explain (costs off) insert into insertconflicttest values (1, 'Apple') on conflict (key) do select returning *;
+explain (costs off) insert into insertconflicttest values (1, 'Apple') on conflict (key) do select for key share returning *;
-- Fails (no unique index inference specification, required for do update variant):
insert into insertconflicttest values (1, 'Apple') on conflict do update set fruit = excluded.fruit;
@@ -130,6 +133,18 @@ insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (ke
-- Check helpful hint when qualifying set column with target table
insert into insertconflicttest values (3, 'Kiwi') on conflict (key, fruit) do update set insertconflicttest.fruit = 'Mango';
+--
+-- DO SELECT tests
+--
+delete from insertconflicttest where fruit = 'Apple';
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do select; -- fails
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select returning old, new, i;
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select returning old, new, i;
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select where i.fruit = 'Apple' returning *;
+insert into insertconflicttest as i values (1, 'Apple') on conflict (key) do select where i.fruit = 'Orange' returning *;
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select where excluded.fruit = 'Apple' returning *;
+insert into insertconflicttest as i values (1, 'Orange') on conflict (key) do select where excluded.fruit = 'Orange' returning *;
+
drop index key_index;
--
@@ -459,6 +474,30 @@ begin transaction isolation level serializable;
insert into selfconflict values (6,1), (6,2) on conflict(f1) do update set f2 = 0;
commit;
+begin transaction isolation level read committed;
+insert into selfconflict values (7,1), (7,2) on conflict(f1) do select returning *;
+commit;
+
+begin transaction isolation level repeatable read;
+insert into selfconflict values (8,1), (8,2) on conflict(f1) do select returning *;
+commit;
+
+begin transaction isolation level serializable;
+insert into selfconflict values (9,1), (9,2) on conflict(f1) do select returning *;
+commit;
+
+begin transaction isolation level read committed;
+insert into selfconflict values (10,1), (10,2) on conflict(f1) do select for update returning *;
+commit;
+
+begin transaction isolation level repeatable read;
+insert into selfconflict values (11,1), (11,2) on conflict(f1) do select for update returning *;
+commit;
+
+begin transaction isolation level serializable;
+insert into selfconflict values (12,1), (12,2) on conflict(f1) do select for update returning *;
+commit;
+
select * from selfconflict;
drop table selfconflict;
@@ -473,13 +512,17 @@ insert into parted_conflict_test values (1, 'a') on conflict do nothing;
-- index on a required, which does exist in parent
insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing;
insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (1, 'a') on conflict (a) do select returning *;
+insert into parted_conflict_test values (1, 'a') on conflict (a) do select for update returning *;
-- targeting partition directly will work
insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing;
insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do select returning b;
-- index on b required, which doesn't exist in parent
-insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a;
+insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a; -- fail
+insert into parted_conflict_test values (2, 'b') on conflict (b) do select returning b; -- fail
-- targeting partition directly will work
insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a;
@@ -487,13 +530,16 @@ insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set
-- should see (2, 'b')
select * from parted_conflict_test order by a;
--- now check that DO UPDATE works correctly for target partition with
--- different attribute numbers
+-- now check that DO UPDATE and DO SELECT work correctly for target partition
+-- with different attribute numbers
create table parted_conflict_test_2 (b char, a int unique);
alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3);
truncate parted_conflict_test;
insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select returning b;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select where excluded.b = 'a' returning parted_conflict_test;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do select where parted_conflict_test.b = 'b' returning b;
-- should see (3, 'b')
select * from parted_conflict_test order by a;
@@ -504,6 +550,7 @@ create table parted_conflict_test_3 partition of parted_conflict_test for values
truncate parted_conflict_test;
insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do select returning b;
-- should see (4, 'b')
select * from parted_conflict_test order by a;
@@ -514,6 +561,7 @@ create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for va
truncate parted_conflict_test;
insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b;
insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do select where parted_conflict_test.b = 'a' returning b;
-- should see (5, 'b')
select * from parted_conflict_test order by a;
@@ -526,6 +574,26 @@ insert into parted_conflict_test (a, b) values (1, 'b'), (2, 'c'), (4, 'b') on c
-- should see (1, 'b'), (2, 'a'), (4, 'b')
select * from parted_conflict_test order by a;
+-- test DO SELECT with multiple rows hitting different partitions
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (1, 'a'), (2, 'b'), (4, 'c');
+insert into parted_conflict_test (a, b) values (1, 'x'), (2, 'y'), (4, 'z')
+ on conflict (a) do select returning *, tableoid::regclass;
+
+-- should see original values (1, 'a'), (2, 'b'), (4, 'c')
+select * from parted_conflict_test order by a;
+
+-- test DO SELECT with WHERE filtering across partitions
+insert into parted_conflict_test (a, b) values (1, 'n') on conflict (a) do select where parted_conflict_test.b = 'a' returning *;
+insert into parted_conflict_test (a, b) values (2, 'n') on conflict (a) do select where parted_conflict_test.b = 'x' returning *;
+
+-- test DO SELECT with EXCLUDED in WHERE across partitions with different layouts
+insert into parted_conflict_test (a, b) values (3, 't') on conflict (a) do select where excluded.b = 't' returning *;
+
+-- test DO SELECT FOR UPDATE across different partition layouts
+insert into parted_conflict_test (a, b) values (1, 'l') on conflict (a) do select for update returning *;
+insert into parted_conflict_test (a, b) values (3, 'l') on conflict (a) do select for update returning *;
+
drop table parted_conflict_test;
-- test behavior of inserting a conflicting tuple into an intermediate
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index b91fb7574df..14cbec28766 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -866,6 +866,33 @@ select 1 from tenk1
where (hundred, thousand) in (select twothousand, twothousand from onek);
reset enable_memoize;
+--
+-- more antijoin recognition tests using NOT NULL constraints
+--
+
+begin;
+
+create temp table tbl_anti(a int not null, b int, c int);
+
+-- this is an antijoin, as t2.a is non-null for any matching row
+explain (costs off)
+select * from tenk1 t1 left join tbl_anti t2 on t1.unique1 = t2.b
+where t2.a is null;
+
+-- this is an antijoin, as t2.a is non-null for any matching row
+explain (costs off)
+select * from tenk1 t1 left join
+ (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b
+where t2.a is null;
+
+-- this is not an antijoin, as t3.a can be nulled by t2/t3 join
+explain (costs off)
+select * from tenk1 t1 left join
+ (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b
+where t3.a is null;
+
+rollback;
+
--
-- regression test for bogus RTE_GROUP entries
--
diff --git a/src/test/regress/sql/join_hash.sql b/src/test/regress/sql/join_hash.sql
index 49d3fd61856..53db1754bb2 100644
--- a/src/test/regress/sql/join_hash.sql
+++ b/src/test/regress/sql/join_hash.sql
@@ -83,8 +83,8 @@ update pg_class
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
where relname = 'extremely_skewed';
--- Make a relation with a couple of enormous tuples.
-create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
+-- Make a relation with several enormous tuples.
+create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
alter table wide set (parallel_workers = 2);
-- The "optimal" case: the hash table fits in memory; we plan for 1
@@ -496,14 +496,14 @@ set work_mem = '128kB';
set hash_mem_multiplier = 1.0;
explain (costs off)
select length(max(s.t))
- from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
select length(max(s.t))
-from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
select final > 1 as multibatch
from hash_join_batches(
$$
select length(max(s.t))
- from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+ from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
$$);
rollback to settings;
diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql
index db72b11bb22..0f92bb52435 100644
--- a/src/test/regress/sql/predicate.sql
+++ b/src/test/regress/sql/predicate.sql
@@ -308,3 +308,143 @@ EXPLAIN (COSTS OFF)
SELECT * FROM pred_tab WHERE (a::oid) IS NULL;
DROP TABLE pred_tab;
+
+--
+-- Test optimization of IS [NOT] DISTINCT FROM
+--
+
+CREATE TYPE dist_row_t AS (a int, b int);
+CREATE TABLE dist_tab (id int, val_nn int NOT NULL, val_null int, row_nn dist_row_t NOT NULL);
+
+INSERT INTO dist_tab VALUES (1, 10, 10, ROW(1, 1));
+INSERT INTO dist_tab VALUES (2, 20, NULL, ROW(2, 2));
+INSERT INTO dist_tab VALUES (3, 30, 30, ROW(1, NULL));
+
+CREATE INDEX dist_tab_nn_idx ON dist_tab (val_nn);
+
+ANALYZE dist_tab;
+
+-- Ensure that the predicate folds to constant TRUE
+EXPLAIN(COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT;
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT;
+
+-- Ensure that the predicate folds to constant FALSE
+EXPLAIN(COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT;
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT;
+
+-- Ensure that the predicate is converted to an inequality operator
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10;
+SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10;
+
+-- Ensure that the predicate is converted to an equality operator, and thus can
+-- use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10;
+SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10;
+RESET enable_seqscan;
+
+-- Ensure that the predicate is preserved as "IS DISTINCT FROM"
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20;
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20;
+
+-- Safety check for rowtypes
+-- Ensure that the predicate is converted to an inequality operator
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t;
+-- ... and that all 3 rows are returned
+SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t;
+
+-- Ensure that the predicate is converted to an equality operator, and thus
+-- mergejoinable or hashjoinable
+SET enable_nestloop TO off;
+EXPLAIN (COSTS OFF)
+SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn;
+SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn;
+RESET enable_nestloop;
+
+-- Ensure that the predicate is converted to IS NOT NULL
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT;
+SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT;
+
+-- Ensure that the predicate is converted to IS NULL
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT;
+SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT;
+
+-- Safety check for rowtypes
+-- The predicate is converted to IS NOT NULL, and get_rule_expr prints it as IS
+-- DISTINCT FROM because argisrow is false, indicating that we're applying a
+-- scalar test
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD;
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD;
+
+-- The predicate is converted to IS NULL, and get_rule_expr prints it as IS NOT
+-- DISTINCT FROM because argisrow is false, indicating that we're applying a
+-- scalar test
+EXPLAIN (COSTS OFF)
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD;
+SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD;
+
+DROP TABLE dist_tab;
+DROP TYPE dist_row_t;
+
+--
+-- Test optimization of BooleanTest (IS [NOT] TRUE/FALSE/UNKNOWN) on
+-- non-nullable input
+--
+CREATE TABLE bool_tab (id int, flag_nn boolean NOT NULL, flag_null boolean);
+
+INSERT INTO bool_tab VALUES (1, true, true);
+INSERT INTO bool_tab VALUES (2, false, NULL);
+
+CREATE INDEX bool_tab_nn_idx ON bool_tab (flag_nn);
+
+ANALYZE bool_tab;
+
+-- Ensure that the predicate folds to constant FALSE
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS UNKNOWN;
+SELECT id FROM bool_tab WHERE flag_nn IS UNKNOWN;
+
+-- Ensure that the predicate folds to constant TRUE
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT UNKNOWN;
+SELECT id FROM bool_tab WHERE flag_nn IS NOT UNKNOWN;
+
+-- Ensure that the predicate folds to flag_nn
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS TRUE;
+SELECT id FROM bool_tab WHERE flag_nn IS TRUE;
+
+-- Ensure that the predicate folds to flag_nn, and thus can use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT FALSE;
+SELECT id FROM bool_tab WHERE flag_nn IS NOT FALSE;
+RESET enable_seqscan;
+
+-- Ensure that the predicate folds to not flag_nn
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS FALSE;
+SELECT id FROM bool_tab WHERE flag_nn IS FALSE;
+
+-- Ensure that the predicate folds to not flag_nn, and thus can use index scan
+SET enable_seqscan TO off;
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_nn IS NOT TRUE;
+SELECT id FROM bool_tab WHERE flag_nn IS NOT TRUE;
+RESET enable_seqscan;
+
+-- Ensure that the predicate is preserved as a BooleanTest
+EXPLAIN (COSTS OFF)
+SELECT id FROM bool_tab WHERE flag_null IS UNKNOWN;
+SELECT id FROM bool_tab WHERE flag_null IS UNKNOWN;
+
+DROP TABLE bool_tab;
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
index 96eff1104d2..66e06d91a41 100644
--- a/src/test/regress/sql/privileges.sql
+++ b/src/test/regress/sql/privileges.sql
@@ -565,6 +565,24 @@ INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLU
INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.three;
INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- fails (due to UPDATE)
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
+-- Check that column level privileges are enforced for ON CONFLICT ... WHERE
+-- Ok. we may select one
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT WHERE atest5.one = 1 RETURNING atest5.two;
+-- Error. No select rights on three
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT WHERE atest5.three = 1 RETURNING atest5.two;
+
+-- Check that ON CONFLICT ... SELECT FOR UPDATE/SHARE requires an updatable column
+SET SESSION AUTHORIZATION regress_priv_user1;
+REVOKE UPDATE (three) ON atest5 FROM regress_priv_user4;
+SET SESSION AUTHORIZATION regress_priv_user4;
+
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT FOR UPDATE RETURNING atest5.two; -- fails
+
+SET SESSION AUTHORIZATION regress_priv_user1;
+GRANT UPDATE (three) ON atest5 TO regress_priv_user4;
+SET SESSION AUTHORIZATION regress_priv_user4;
+
+INSERT INTO atest5(two) VALUES (2) ON CONFLICT (two) DO SELECT FOR UPDATE RETURNING atest5.two; -- ok
-- Check that the columns in the inference require select privileges
INSERT INTO atest5(four) VALUES (4); -- fail
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 5d923c5ca3b..6b3566271df 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -121,8 +121,9 @@ BEGIN; DELETE FROM rls_test_tgt; ROLLBACK;
BEGIN; DELETE FROM rls_test_tgt WHERE a = 1; ROLLBACK;
DELETE FROM rls_test_tgt RETURNING *;
--- INSERT ... ON CONFLICT DO NOTHING should apply INSERT CHECK and SELECT USING
--- policy clauses (to new value, whether it conflicts or not)
+-- INSERT ... ON CONFLICT DO NOTHING with an arbiter clause should apply
+-- INSERT CHECK and SELECT USING policy clauses (to new value, whether it
+-- conflicts or not)
INSERT INTO rls_test_tgt VALUES (1, 'tgt a') ON CONFLICT (a) DO NOTHING;
INSERT INTO rls_test_tgt VALUES (1, 'tgt b') ON CONFLICT (a) DO NOTHING;
@@ -141,6 +142,21 @@ INSERT INTO rls_test_tgt VALUES (3, 'tgt a') ON CONFLICT (a) DO UPDATE SET b = '
INSERT INTO rls_test_tgt VALUES (3, 'tgt c') ON CONFLICT (a) DO UPDATE SET b = 'tgt d' RETURNING *;
ROLLBACK;
+-- INSERT ... ON CONFLICT DO SELECT should apply INSERT CHECK and SELECT USING
+-- policy clauses to values proposed for insert. In the event of a conflict it
+-- should also apply SELECT USING policy clauses to the existing values.
+BEGIN;
+INSERT INTO rls_test_tgt VALUES (4, 'tgt a') ON CONFLICT (a) DO SELECT RETURNING *;
+INSERT INTO rls_test_tgt VALUES (4, 'tgt b') ON CONFLICT (a) DO SELECT RETURNING *;
+ROLLBACK;
+
+-- INSERT ... ON CONFLICT DO SELECT FOR UPDATE should also apply UPDATE USING
+-- policy clauses to the existing values, in the event of a conflict.
+BEGIN;
+INSERT INTO rls_test_tgt VALUES (5, 'tgt a') ON CONFLICT (a) DO SELECT FOR UPDATE RETURNING *;
+INSERT INTO rls_test_tgt VALUES (5, 'tgt b') ON CONFLICT (a) DO SELECT FOR UPDATE RETURNING *;
+ROLLBACK;
+
-- MERGE should always apply SELECT USING policy clauses to both source and
-- target rows
MERGE INTO rls_test_tgt t USING rls_test_src s ON t.a = s.a
@@ -952,11 +968,51 @@ INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel')
INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'my first novel')
ON CONFLICT (did) DO UPDATE SET dauthor = 'regress_rls_carol';
+--
+-- INSERT ... ON CONFLICT DO SELECT and Row-level security
+--
+SET SESSION AUTHORIZATION regress_rls_alice;
+DROP POLICY p3_with_all ON document;
+
+CREATE POLICY p1_select_novels ON document FOR SELECT
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'));
+CREATE POLICY p2_insert_own ON document FOR INSERT
+ WITH CHECK (dauthor = current_user);
+CREATE POLICY p3_update_novels ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel') AND dlevel = 1)
+ WITH CHECK (dauthor = current_user);
+
+SET SESSION AUTHORIZATION regress_rls_bob;
+
+-- DO SELECT requires SELECT rights, should succeed for novel
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT RETURNING did, dauthor, dtitle;
+
+-- DO SELECT requires SELECT rights, should fail for non-novel
+INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'science fiction'), 1, 'regress_rls_bob', 'another sci-fi')
+ ON CONFLICT (did) DO SELECT RETURNING did, dauthor, dtitle;
+
+-- DO SELECT with WHERE and EXCLUDED reference
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT WHERE excluded.dlevel = 1 RETURNING did, dauthor, dtitle;
+
+-- DO SELECT FOR UPDATE requires both SELECT and UPDATE rights, should succeed for novel and dlevel = 1
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT FOR UPDATE RETURNING did, dauthor, dtitle;
+
+-- should fail UPDATE USING policy for novel with dlevel = 2
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'regress_rls_bob', 'another novel')
+ ON CONFLICT (did) DO SELECT FOR UPDATE RETURNING did, dauthor, dtitle;
+
+SET SESSION AUTHORIZATION regress_rls_alice;
+DROP POLICY p1_select_novels ON document;
+DROP POLICY p2_insert_own ON document;
+DROP POLICY p3_update_novels ON document;
+
--
-- MERGE
--
RESET SESSION AUTHORIZATION;
-DROP POLICY p3_with_all ON document;
ALTER TABLE document ADD COLUMN dnotes text DEFAULT '';
-- all documents are readable
diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql
index 3f240bec7b0..40f5c16e540 100644
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@@ -1205,6 +1205,32 @@ SELECT * FROM hat_data WHERE hat_name IN ('h8', 'h9', 'h7') ORDER BY hat_name;
DROP RULE hat_upsert ON hats;
+-- DO SELECT with a WHERE clause
+CREATE RULE hat_confsel AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name)
+ DO SELECT FOR UPDATE
+ WHERE excluded.hat_color <> 'forbidden' AND hat_data.* != excluded.*
+ RETURNING *;
+SELECT definition FROM pg_rules WHERE tablename = 'hats' ORDER BY rulename;
+
+-- fails without RETURNING
+INSERT INTO hats VALUES ('h7', 'blue');
+
+-- works (returns conflicts)
+EXPLAIN (costs off)
+INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;
+INSERT INTO hats VALUES ('h7', 'blue') RETURNING *;
+
+-- conflicts excluded by WHERE clause
+INSERT INTO hats VALUES ('h7', 'forbidden') RETURNING *;
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+
+DROP RULE hat_confsel ON hats;
+
drop table hats;
drop table hat_data;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 76ee9d29c08..9dcce3440c8 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1866,3 +1866,21 @@ SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE numeric_lt(y, 1.0)'
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
+
+-- Check statistics generated for range type and expressions.
+CREATE TABLE stats_ext_tbl_range(name text, irange int4range);
+INSERT INTO stats_ext_tbl_range VALUES
+ ('red', '[1,7)'::int4range),
+ ('blue', '[2,8]'::int4range),
+ ('green', '[3,9)'::int4range);
+CREATE STATISTICS stats_ext_range (mcv)
+ ON irange, (irange + '[4,10)'::int4range)
+ FROM stats_ext_tbl_range;
+ANALYZE stats_ext_tbl_range;
+SELECT attnames, most_common_vals
+ FROM pg_stats_ext
+ WHERE statistics_name = 'stats_ext_range';
+SELECT range_length_histogram, range_empty_frac, range_bounds_histogram
+ FROM pg_stats_ext_exprs
+ WHERE statistics_name = 'stats_ext_range';
+DROP TABLE stats_ext_tbl_range;
diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql
index ef0c298d2df..d93cbc279d9 100644
--- a/src/test/regress/sql/subscription.sql
+++ b/src/test/regress/sql/subscription.sql
@@ -139,6 +139,9 @@ RESET ROLE;
ALTER SUBSCRIPTION regress_testsub RENAME TO regress_testsub_foo;
ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = local);
ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = foobar);
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = '-1');
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = '80s');
+ALTER SUBSCRIPTION regress_testsub_foo SET (wal_receiver_timeout = 'foobar');
\dRs+
diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql
index cc878455ace..ea39817ee3d 100644
--- a/src/test/regress/sql/triggers.sql
+++ b/src/test/regress/sql/triggers.sql
@@ -1148,7 +1148,7 @@ drop function trigger_ddl_func();
--
-- Verify behavior of before and after triggers with INSERT...ON CONFLICT
--- DO UPDATE
+-- DO UPDATE and DO SELECT
--
create table upsert (key int4 primary key, color text);
@@ -1197,6 +1197,7 @@ insert into upsert values(5, 'purple') on conflict (key) do update set color = '
insert into upsert values(6, 'white') on conflict (key) do update set color = 'updated ' || upsert.color;
insert into upsert values(7, 'pink') on conflict (key) do update set color = 'updated ' || upsert.color;
insert into upsert values(8, 'yellow') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(8, 'blue') on conflict (key) do select for update where upsert.color = 'yellow trig modified' returning old.*, new.*, upsert.*;
select * from upsert;
diff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql
index c071fffc116..1635adde2d4 100644
--- a/src/test/regress/sql/updatable_views.sql
+++ b/src/test/regress/sql/updatable_views.sql
@@ -106,6 +106,12 @@ INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set a = excluded.
SELECT * FROM rw_view15;
INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set upper = 'blarg'; -- fails
SELECT * FROM rw_view15;
+INSERT INTO rw_view15 (a) VALUES (3)
+ ON CONFLICT (a) DO UPDATE SET a = excluded.a WHERE excluded.upper = 'UNSPECIFIED'
+ RETURNING old, new;
+INSERT INTO rw_view15 (a) VALUES (3)
+ ON CONFLICT (a) DO SELECT WHERE excluded.upper = 'UNSPECIFIED' RETURNING old, new;
+
SELECT * FROM rw_view15;
ALTER VIEW rw_view15 ALTER COLUMN upper SET DEFAULT 'NOT SET';
INSERT INTO rw_view15 (a) VALUES (4); -- should fail
@@ -1850,7 +1856,7 @@ insert into wcowrtest_v2 values (2, 'no such row in sometable');
drop view wcowrtest_v, wcowrtest_v2;
drop table wcowrtest, sometable;
--- Check INSERT .. ON CONFLICT DO UPDATE works correctly when the view's
+-- Check INSERT .. ON CONFLICT DO SELECT/UPDATE works correctly when the view's
-- columns are named and ordered differently than the underlying table's.
create table uv_iocu_tab (a text unique, b float);
insert into uv_iocu_tab values ('xyxyxy', 0);
@@ -1863,6 +1869,8 @@ select * from uv_iocu_tab;
insert into uv_iocu_view (a, b) values ('xyxyxy', 1)
on conflict (a) do update set b = excluded.b;
select * from uv_iocu_tab;
+insert into uv_iocu_view (a, b) values ('xyxyxy', 1)
+ on conflict (a) do select where uv_iocu_view.c = 2 and excluded.c = 2 returning *;
-- OK to access view columns that are not present in underlying base
-- relation in the ON CONFLICT portion of the query
@@ -1899,6 +1907,11 @@ insert into uv_iocu_view (aa,bb) values (1,'y')
and excluded.bb != ''
and excluded.cc is not null;
select * from uv_iocu_view;
+explain (costs off)
+insert into uv_iocu_view (aa,bb) values (1,'Rejected: (y,1,"(1,y)")')
+ on conflict (aa) do select where uv_iocu_view.* = excluded.* returning *;
+insert into uv_iocu_view (aa,bb) values (1,'Rejected: (y,1,"(1,y)")')
+ on conflict (aa) do select where uv_iocu_view.* = excluded.* returning *;
-- Test omitting a column of the base relation
delete from uv_iocu_view;
@@ -1911,11 +1924,15 @@ alter table uv_iocu_tab alter column b set default 'table default';
insert into uv_iocu_view (aa) values (1)
on conflict (aa) do update set bb = 'Rejected: '||excluded.*;
select * from uv_iocu_view;
+insert into uv_iocu_view (aa) values (1)
+ on conflict (aa) do select returning *;
alter view uv_iocu_view alter column bb set default 'view default';
insert into uv_iocu_view (aa) values (1)
on conflict (aa) do update set bb = 'Rejected: '||excluded.*;
select * from uv_iocu_view;
+insert into uv_iocu_view (aa) values (1)
+ on conflict (aa) do select returning *;
-- Should fail to update non-updatable columns
insert into uv_iocu_view (aa) values (1)
@@ -1924,7 +1941,7 @@ insert into uv_iocu_view (aa) values (1)
drop view uv_iocu_view;
drop table uv_iocu_tab;
--- ON CONFLICT DO UPDATE permissions checks
+-- ON CONFLICT DO SELECT/UPDATE permissions checks
create user regress_view_user1;
create user regress_view_user2;
@@ -1948,6 +1965,10 @@ insert into rw_view1 values ('zzz',2.0,1)
on conflict (aa) do update set bb = rw_view1.bb||'xxx'; -- OK
insert into rw_view1 values ('zzz',2.0,1)
on conflict (aa) do update set cc = 3.0; -- Not allowed
+insert into rw_view1 values ('yyy',2.0,1)
+ on conflict (aa) do select for update returning cc; -- Not allowed
+insert into rw_view1 values ('yyy',2.0,1)
+ on conflict (aa) do select for update returning aa, bb;
reset session authorization;
select * from base_tbl;
@@ -1960,9 +1981,13 @@ set session authorization regress_view_user2;
create view rw_view2 as select b as bb, c as cc, a as aa from base_tbl;
insert into rw_view2 (aa,bb) values (1,'xxx')
on conflict (aa) do update set bb = excluded.bb; -- Not allowed
+insert into rw_view2 (aa,bb) values (1,'xxx')
+ on conflict (aa) do select returning 1; -- Not allowed
create view rw_view3 as select b as bb, a as aa from base_tbl;
insert into rw_view3 (aa,bb) values (1,'xxx')
on conflict (aa) do update set bb = excluded.bb; -- OK
+insert into rw_view3 (aa,bb) values (1,'xxx')
+ on conflict (aa) do select returning aa, bb; -- OK
reset session authorization;
select * from base_tbl;
@@ -1970,6 +1995,8 @@ set session authorization regress_view_user2;
create view rw_view4 as select aa, bb, cc FROM rw_view1;
insert into rw_view4 (aa,bb) values (1,'yyy')
on conflict (aa) do update set bb = excluded.bb; -- Not allowed
+insert into rw_view4 (aa,bb) values (1,'yyy')
+ on conflict (aa) do select returning 1; -- Not allowed
create view rw_view5 as select aa, bb FROM rw_view1;
insert into rw_view5 (aa,bb) values (1,'yyy')
on conflict (aa) do update set bb = excluded.bb; -- OK
diff --git a/src/test/ssl/t/SSL/Server.pm b/src/test/ssl/t/SSL/Server.pm
index a023fa6bdee..4c101a26503 100644
--- a/src/test/ssl/t/SSL/Server.pm
+++ b/src/test/ssl/t/SSL/Server.pm
@@ -324,7 +324,7 @@ sub switch_server_cert
$node->append_conf('sslconfig.conf', $backend->set_server_cert(\%params));
# use lists of ECDH curves and cipher suites for syntax testing
$node->append_conf('sslconfig.conf',
- 'ssl_groups=X25519:prime256v1:secp521r1');
+ 'ssl_groups=prime256v1:secp521r1');
$node->append_conf('sslconfig.conf',
'ssl_tls13_ciphers=TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256');
diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
index d7e62e4d488..7d41715ed81 100644
--- a/src/test/subscription/t/001_rep_changes.pl
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -353,7 +353,8 @@
# Note that the current location of the log file is not grabbed immediately
# after reloading the configuration, but after sending one SQL command to
# the node so as we are sure that the reloading has taken effect.
-my $log_location = -s $node_subscriber->logfile;
+my $log_location_pub = -s $node_publisher->logfile;
+my $log_location_sub = -s $node_subscriber->logfile;
$node_publisher->safe_psql('postgres',
"UPDATE tab_full_pk SET b = 'quux' WHERE a = 1");
@@ -363,7 +364,7 @@
$node_publisher->wait_for_catchup('tap_sub');
-my $logfile = slurp_file($node_subscriber->logfile, $log_location);
+my $logfile = slurp_file($node_subscriber->logfile, $log_location_sub);
like(
$logfile,
qr/conflict detected on relation "public.tab_full_pk": conflict=update_missing.*\n.*DETAIL:.* Could not find the row to be updated: remote row \(1, quux\), replica identity \(a\)=\(1\)/m,
@@ -445,11 +446,12 @@
#
# First, confirm that no such QUERY STATISTICS message appears before enabling
# log_statement_stats.
-$logfile = slurp_file($node_publisher->logfile, $log_location);
+$logfile = slurp_file($node_publisher->logfile, $log_location_pub);
unlike(
$logfile,
qr/QUERY STATISTICS/,
'log_statement_stats has not been enabled yet');
+$log_location_pub = -s $node_publisher->logfile;
# check that change of connection string and/or publication list causes
# restart of subscription workers. We check the state along with
@@ -476,7 +478,7 @@
# Check that the expected QUERY STATISTICS message appears,
# which shows that log_statement_stats=on from the CONNECTION string
# was correctly passed through to and honored by the walsender.
-$logfile = slurp_file($node_publisher->logfile, $log_location);
+$logfile = slurp_file($node_publisher->logfile, $log_location_pub);
like(
$logfile,
qr/QUERY STATISTICS/,
@@ -538,13 +540,13 @@
# Note that the current location of the log file is not grabbed immediately
# after reloading the configuration, but after sending one SQL command to
# the node so that we are sure that the reloading has taken effect.
-$log_location = -s $node_publisher->logfile;
+$log_location_pub = -s $node_publisher->logfile;
$node_publisher->safe_psql('postgres', "INSERT INTO tab_notrep VALUES (11)");
$node_publisher->wait_for_catchup('tap_sub');
-$logfile = slurp_file($node_publisher->logfile, $log_location);
+$logfile = slurp_file($node_publisher->logfile, $log_location_pub);
like(
$logfile,
qr/skipped replication of an empty transaction with XID/,
diff --git a/src/timezone/zic.c b/src/timezone/zic.c
index 8dcc7b337a7..2f36486a350 100644
--- a/src/timezone/zic.c
+++ b/src/timezone/zic.c
@@ -1395,19 +1395,19 @@ gethms(char const *string, char const *errstring)
break;
case 8:
ok = '0' <= xr && xr <= '9';
- /* fallthrough */
+ pg_fallthrough;
case 7:
ok &= ssx == '.';
if (ok && noise)
warning(_("fractional seconds rejected by"
" pre-2018 versions of zic"));
- /* fallthrough */
+ pg_fallthrough;
case 5:
ok &= mmx == ':';
- /* fallthrough */
+ pg_fallthrough;
case 3:
ok &= hhx == ':';
- /* fallthrough */
+ pg_fallthrough;
case 1:
break;
}
diff --git a/src/tools/pg_bsd_indent/indent.c b/src/tools/pg_bsd_indent/indent.c
index 2622cc6227a..6e550ff310f 100644
--- a/src/tools/pg_bsd_indent/indent.c
+++ b/src/tools/pg_bsd_indent/indent.c
@@ -352,7 +352,7 @@ main(int argc, char **argv)
}
goto sw_buffer;
}
- /* FALLTHROUGH */
+ pg_fallthrough;
default: /* it is the start of a normal statement */
{
int remove_newlines;
@@ -922,7 +922,7 @@ main(int argc, char **argv)
case structure:
if (ps.p_l_follow > 0)
goto copy_id;
- /* FALLTHROUGH */
+ pg_fallthrough;
case decl: /* we have a declaration type (int, etc.) */
parse(decl); /* let parser worry about indentation */
if (ps.last_token == rparen && ps.tos <= 1) {
diff --git a/src/tools/pg_bsd_indent/parse.c b/src/tools/pg_bsd_indent/parse.c
index e707da639c7..94cea724393 100644
--- a/src/tools/pg_bsd_indent/parse.c
+++ b/src/tools/pg_bsd_indent/parse.c
@@ -96,7 +96,7 @@ parse(int tk) /* tk: the code for the construct scanned */
*/
ps.i_l_follow = ps.il[ps.tos--];
/* the rest is the same as for dolit and forstmt */
- /* FALLTHROUGH */
+ pg_fallthrough;
case dolit: /* 'do' */
case forstmt: /* for (...) */
ps.p_stack[++ps.tos] = tk;
@@ -303,7 +303,7 @@ reduce(void)
case swstmt:
/* */
case_ind = ps.cstk[ps.tos - 1];
- /* FALLTHROUGH */
+ pg_fallthrough;
case decl: /* finish of a declaration */
case elsehead:
/* < else> */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 34374df0d67..bee95caacaf 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -125,6 +125,7 @@ AnlIndexData
AnyArrayType
Append
AppendPath
+AppendPathInput
AppendRelInfo
AppendState
ApplyErrorCallbackArg
@@ -710,6 +711,7 @@ EachState
Edge
EditableObjectType
ElementsState
+ElidedNode
EnableTimeoutParams
EndDataPtrType
EndDirectModify_function
@@ -803,6 +805,7 @@ ExtensibleNodeMethods
ExtensionControlFile
ExtensionInfo
ExtensionLocation
+ExtensionSiblingCache
ExtensionVersionInfo
FDWCollateState
FD_SET
@@ -1837,9 +1840,9 @@ OldToNewMappingData
OnCommitAction
OnCommitItem
OnConflictAction
+OnConflictActionState
OnConflictClause
OnConflictExpr
-OnConflictSetState
OpClassCacheEnt
OpExpr
OpFamilyMember
@@ -2488,6 +2491,7 @@ RecordCacheArrayEntry
RecordCacheEntry
RecordCompareData
RecordIOData
+RecoveryConflictReason
RecoveryLockEntry
RecoveryLockXidEntry
RecoveryPauseState
@@ -2804,6 +2808,7 @@ SharedTypmodTableEntry
Sharedsort
ShellTypeInfo
ShippableCacheEntry
+ShmemAllocatorData
ShippableCacheKey
ShmemIndexEnt
ShutdownForeignScan_function
@@ -2928,6 +2933,7 @@ SubLink
SubLinkType
SubOpts
SubPlan
+SubPlanRTInfo
SubPlanState
SubRelInfo
SubRemoveRels
@@ -3736,6 +3742,7 @@ gistxlogPageReuse
gistxlogPageSplit
gistxlogPageUpdate
grouping_sets_data
+growable_trgm_array
gseg_picksplit_item
gss_OID_set
gss_buffer_desc
@@ -3974,6 +3981,45 @@ pg_uuid_t
pg_wchar
pg_wchar_tbl
pgp_armor_headers_state
+pgpa_collected_advice
+pgpa_advice_item
+pgpa_advice_tag_type
+pgpa_advice_target
+pgpa_identifier
+pgpa_index_target
+pgpa_index_type
+pgpa_itm_type
+pgpa_jo_outcome
+pgpa_join_class
+pgpa_join_member
+pgpa_join_state
+pgpa_join_strategy
+pgpa_join_unroller
+pgpa_local_advice
+pgpa_local_advice_chunk
+pgpa_output_context
+pgpa_plan_walker_context
+pgpa_planner_state
+pgpa_qf_type
+pgpa_query_feature
+pgpa_ri_checker
+pgpa_ri_checker_key
+pgpa_scan
+pgpa_scan_strategy
+pgpa_shared_advice
+pgpa_shared_advice_chunk
+pgpa_shared_state
+pgpa_sj_unique_rel
+pgpa_target_type
+pgpa_trove
+pgpa_trove_entry
+pgpa_trove_entry_element
+pgpa_trove_entry_hash
+pgpa_trove_entry_key
+pgpa_trove_lookup_type
+pgpa_trove_result
+pgpa_trove_slice
+pgpa_unrolled_join
pgsocket
pgsql_thing_t
pgssEntry
@@ -4057,6 +4103,7 @@ qsort_comparator
query_pathkeys_callback
radius_attribute
radius_packet
+RadixSortInfo
rangeTableEntry_used_context
rank_context
rbt_allocfunc
@@ -4264,9 +4311,12 @@ va_list
vacuumingOptions
validate_string_relopt
varatt_expanded
+varatt_external
+varatt_indirect
varattrib_1b
varattrib_1b_e
varattrib_4b
+varlena
vartag_external
vbits
verifier_context