Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion contrib/pg_stat_statements/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
MODULE_big = pg_stat_statements
OBJS = \
$(WIN32RES) \
pg_stat_statements.o
pg_stat_statements.o \
hll.o

EXTENSION = pg_stat_statements
DATA = pg_stat_statements--1.4.sql \
Expand Down
191 changes: 191 additions & 0 deletions contrib/pg_stat_statements/hll.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/*-------------------------------------------------------------------------
*
* hll.c
* Sliding HyperLogLog cardinality estimator
*
* Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
*
* Implements https://hal.science/hal-00465313/document
*
* Based on Hideaki Ohno's C++ implementation. This is probably not ideally
* suited to estimating the cardinality of very large sets; in particular, we
* have not attempted to further optimize the implementation as described in
* the Heule, Nunkesser and Hall paper "HyperLogLog in Practice: Algorithmic
* Engineering of a State of The Art Cardinality Estimation Algorithm".
*
* A sparse representation of HyperLogLog state is used, with fixed space
* overhead.
*
* The copyright terms of Ohno's original version (the MIT license) follow.
*
* IDENTIFICATION
* src/backend/lib/hyperloglog.c
*
*-------------------------------------------------------------------------
*/

/*
* Copyright (c) 2013 Hideaki Ohno <hide.o.j55{at}gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the 'Software'), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include <math.h>

#include "postgres.h"
#include "funcapi.h"
#include "port/pg_bitutils.h"
#include "utils/timestamp.h"
#include "hll.h"


#define POW_2_32 (4294967296.0)
#define NEG_POW_2_32 (-4294967296.0)

#define ALPHA_MM ((0.7213 / (1.0 + 1.079 / HLL_N_REGISTERS)) * HLL_N_REGISTERS * HLL_N_REGISTERS)

/*
* Worker for addHyperLogLog().
*
* Calculates the position of the first set bit in first b bits of x argument
* starting from the first, reading from most significant to least significant
* bits.
*
* Example (when considering fist 10 bits of x):
*
* rho(x = 0b1000000000) returns 1
* rho(x = 0b0010000000) returns 3
* rho(x = 0b0000000000) returns b + 1
*
* "The binary address determined by the first b bits of x"
*
* Return value "j" used to index bit pattern to watch.
*/
static inline uint8
rho(uint32 x, uint8 b)
{
uint8 j = 1;

if (x == 0)
return b + 1;

j = 32 - pg_leftmost_one_pos32(x);

if (j > b)
return b + 1;

return j;
}

/*
* Initialize HyperLogLog track state
*/
void
initSHLL(HyperLogLogState *cState)
{
memset(cState->regs, 0, sizeof(cState->regs));
}

/*
* Adds element to the estimator, from caller-supplied hash.
*
* It is critical that the hash value passed be an actual hash value, typically
* generated using hash_any(). The algorithm relies on a specific bit-pattern
* observable in conjunction with stochastic averaging. There must be a
* uniform distribution of bits in hash values for each distinct original value
* observed.
*/
void
addSHLL(HyperLogLogState *cState, uint32 hash)
{
uint8 count;
uint32 index;

TimestampTz now = GetCurrentTimestamp();
/* Use the first "k" (registerWidth) bits as a zero based index */
index = hash >> HLL_C_BITS;

/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS) - 1;
Assert(count <= HLL_C_BITS);
cState->regs[index][count] = now;
}

static uint8
getMaximum(const TimestampTz* reg, TimestampTz since)
{
uint8 max = 0;

for (size_t i = 0; i < HLL_C_BITS + 1; i++)
{
if (reg[i] >= since)
{
max = i + 1;
}
}

return max;
}


/*
* Estimates cardinality, based on elements added so far
*/
double
estimateSHLL(HyperLogLogState *cState, time_t duration)
{
double result;
double sum = 0.0;
size_t i;
uint8 R[HLL_N_REGISTERS];
/* 0 indicates uninitialized timestamp, so if we need to cover the whole range than starts with 1 */
TimestampTz since = duration == (time_t)-1 ? 1 : GetCurrentTimestamp() - duration * USECS_PER_SEC;

for (i = 0; i < HLL_N_REGISTERS; i++)
{
R[i] = getMaximum(cState->regs[i], since);
sum += 1.0 / pow(2.0, R[i]);
}

/* result set to "raw" HyperLogLog estimate (E in the HyperLogLog paper) */
result = ALPHA_MM / sum;

if (result <= (5.0 / 2.0) * HLL_N_REGISTERS)
{
/* Small range correction */
int zero_count = 0;

for (i = 0; i < HLL_N_REGISTERS; i++)
{
zero_count += R[i] == 0;
}

if (zero_count != 0)
result = HLL_N_REGISTERS * log((double) HLL_N_REGISTERS /
zero_count);
}
else if (result > (1.0 / 30.0) * POW_2_32)
{
/* Large range correction */
result = NEG_POW_2_32 * log(1.0 - (result / POW_2_32));
}

return result;
}

1 change: 1 addition & 0 deletions contrib/pg_stat_statements/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

pg_stat_statements_sources = files(
'pg_stat_statements.c',
'hhl.c'
)

if host_system == 'windows'
Expand Down
7 changes: 7 additions & 0 deletions contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,10 @@ CREATE VIEW pg_stat_statements AS
SELECT * FROM pg_stat_statements(true);

GRANT SELECT ON pg_stat_statements TO PUBLIC;

CREATE FUNCTION pg_bufferpool_working_set_size_pages(duration integer)
RETURNS integer
AS 'MODULE_PATHNAME', 'pg_bufferpool_working_set_size_pages'
LANGUAGE C PARALLEL SAFE;

GRANT EXECUTE ON FUNCTION pg_bufferpool_working_set_size_pages(integer) TO PUBLIC;
54 changes: 54 additions & 0 deletions contrib/pg_stat_statements/pg_stat_statements.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "common/int.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "hll.h"
#include "jit/jit.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
Expand All @@ -71,6 +72,8 @@
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#include "storage/bufmgr.h"
#include "storage/shmem.h"

PG_MODULE_MAGIC_EXT(
.name = "pg_stat_statements",
Expand All @@ -91,6 +94,10 @@ static const uint32 PGSS_FILE_HEADER = 0x20250731;
/* PostgreSQL major version number, changes in which invalidate all entries */
static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;

static HyperLogLogState *BufferPoolWss = NULL;

static void orion_wss_add_hash(uint32 hash);

/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
#define USAGE_EXEC(duration) (1.0)
#define USAGE_INIT (1.0) /* including initial planning */
Expand Down Expand Up @@ -505,6 +512,12 @@ pgss_shmem_request(void)

RequestAddinShmemSpace(pgss_memsize());
RequestNamedLWLockTranche("pg_stat_statements", 1);

/* Request shared memory for buffer pool WSS HLL state */
RequestAddinShmemSpace(sizeof(HyperLogLogState));

/* Register the WSS tracking hook */
WssAddHashHook = orion_wss_add_hash;
}

/*
Expand Down Expand Up @@ -564,6 +577,14 @@ pgss_shmem_startup(void)
&info,
HASH_ELEM | HASH_BLOBS);

/* Initialize buffer pool working set size HLL state */
BufferPoolWss = (HyperLogLogState *)
ShmemInitStruct("Orion Buffer Pool WSS",
sizeof(HyperLogLogState),
&found);
if (!found)
initSHLL(BufferPoolWss);

LWLockRelease(AddinShmemInitLock);

/*
Expand Down Expand Up @@ -3076,3 +3097,36 @@ comp_location(const void *a, const void *b)

return pg_cmp_s32(l, r);
}

/*
* WSS hook function: add buffer tag hash to HLL estimator.
* Called from BufferAlloc() for every buffer allocation.
*/
static void
orion_wss_add_hash(uint32 hash)
{
addSHLL(BufferPoolWss, hash);
}

/*
* SQL function: pg_bufferpool_working_set_size_pages
*
* Returns the estimated number of unique buffer pages accessed
* in the last 'duration' seconds.
*/
PG_FUNCTION_INFO_V1(pg_bufferpool_working_set_size_pages);

Datum
pg_bufferpool_working_set_size_pages(PG_FUNCTION_ARGS)
{
int32 result;
time_t duration;

if (BufferPoolWss == NULL)
PG_RETURN_NULL();

duration = (time_t) PG_GETARG_INT32(0);
result = (int32) estimateSHLL(BufferPoolWss, duration);

PG_RETURN_INT32(result);
}
2 changes: 1 addition & 1 deletion src/backend/lib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ backend_sources += files(
'integerset.c',
'knapsack.c',
'pairingheap.c',
'rbtree.c',
'rbtree.c'
)
2 changes: 1 addition & 1 deletion src/backend/port/sysv_shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ round_off_mapping_sizes_for_hugepages(MemoryMappingSizes *mapping, int hugepages
return;

if (mapping->shmem_req_size % hugepagesize != 0)
mapping->shmem_req_size += add_size(mapping->shmem_req_size,
mapping->shmem_req_size = add_size(mapping->shmem_req_size,
hugepagesize - (mapping->shmem_req_size % hugepagesize));

if (mapping->shmem_reserved % hugepagesize != 0)
Expand Down
11 changes: 11 additions & 0 deletions src/backend/storage/buffer/bufmgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@ typedef struct SMgrSortArray
SMgrRelation srel;
} SMgrSortArray;

/*
* Hook for working set size tracking.
* Initially NULL - extensions set this to enable tracking.
*/
WssAddHashHook_type WssAddHashHook = NULL;


/* GUC variables */
bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
Expand Down Expand Up @@ -2124,6 +2131,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
newHash = BufTableHashCode(&newTag);
newPartitionLock = BufMappingPartitionLock(newHash);

/* Track buffer access for working set size estimation */
if (likely(WssAddHashHook != NULL))
WssAddHashHook(newHash);

/* see if the block is in the buffer pool already */
LWLockAcquire(newPartitionLock, LW_SHARED);
existing_buf_id = BufTableLookup(&newTag, newHash);
Expand Down
Loading