gooderp18绿色标准版
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

1755 lignes
61KB

  1. /*-------------------------------------------------------------------------
  2. *
  3. * tableam.h
  4. * POSTGRES table access method definitions.
  5. *
  6. *
  7. * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/access/tableam.h
  11. *
  12. * NOTES
  13. * See tableam.sgml for higher level documentation.
  14. *
  15. *-------------------------------------------------------------------------
  16. */
  17. #ifndef TABLEAM_H
  18. #define TABLEAM_H
  19. #include "access/relscan.h"
  20. #include "access/sdir.h"
  21. #include "utils/guc.h"
  22. #include "utils/rel.h"
  23. #include "utils/snapshot.h"
  24. #define DEFAULT_TABLE_ACCESS_METHOD "heap"
  25. /* GUCs */
  26. extern char *default_table_access_method;
  27. extern bool synchronize_seqscans;
  28. struct BulkInsertStateData;
  29. struct IndexInfo;
  30. struct SampleScanState;
  31. struct TBMIterateResult;
  32. struct VacuumParams;
  33. struct ValidateIndexState;
  34. /*
  35. * Bitmask values for the flags argument to the scan_begin callback.
  36. */
  37. typedef enum ScanOptions
  38. {
  39. /* one of SO_TYPE_* may be specified */
  40. SO_TYPE_SEQSCAN = 1 << 0,
  41. SO_TYPE_BITMAPSCAN = 1 << 1,
  42. SO_TYPE_SAMPLESCAN = 1 << 2,
  43. SO_TYPE_ANALYZE = 1 << 3,
  44. SO_TYPE_TIDSCAN = 1 << 8,
  45. /* several of SO_ALLOW_* may be specified */
  46. /* allow or disallow use of access strategy */
  47. SO_ALLOW_STRAT = 1 << 4,
  48. /* report location to syncscan logic? */
  49. SO_ALLOW_SYNC = 1 << 5,
  50. /* verify visibility page-at-a-time? */
  51. SO_ALLOW_PAGEMODE = 1 << 6,
  52. /* unregister snapshot at scan end? */
  53. SO_TEMP_SNAPSHOT = 1 << 7
  54. } ScanOptions;
  55. /*
  56. * Result codes for table_{update,delete,lock_tuple}, and for visibility
  57. * routines inside table AMs.
  58. */
  59. typedef enum TM_Result
  60. {
  61. /*
  62. * Signals that the action succeeded (i.e. update/delete performed, lock
  63. * was acquired)
  64. */
  65. TM_Ok,
  66. /* The affected tuple wasn't visible to the relevant snapshot */
  67. TM_Invisible,
  68. /* The affected tuple was already modified by the calling backend */
  69. TM_SelfModified,
  70. /*
  71. * The affected tuple was updated by another transaction. This includes
  72. * the case where tuple was moved to another partition.
  73. */
  74. TM_Updated,
  75. /* The affected tuple was deleted by another transaction */
  76. TM_Deleted,
  77. /*
  78. * The affected tuple is currently being modified by another session. This
  79. * will only be returned if table_(update/delete/lock_tuple) are
  80. * instructed not to wait.
  81. */
  82. TM_BeingModified,
  83. /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
  84. TM_WouldBlock
  85. } TM_Result;
  86. /*
  87. * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
  88. * because the target tuple is already outdated, they fill in this struct to
  89. * provide information to the caller about what happened.
  90. *
  91. * ctid is the target's ctid link: it is the same as the target's TID if the
  92. * target was deleted, or the location of the replacement tuple if the target
  93. * was updated.
  94. *
  95. * xmax is the outdating transaction's XID. If the caller wants to visit the
  96. * replacement tuple, it must check that this matches before believing the
  97. * replacement is really a match.
  98. *
  99. * cmax is the outdating command's CID, but only when the failure code is
  100. * TM_SelfModified (i.e., something in the current transaction outdated the
  101. * tuple); otherwise cmax is zero. (We make this restriction because
  102. * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
  103. * transactions.)
  104. */
  105. typedef struct TM_FailureData
  106. {
  107. ItemPointerData ctid;
  108. TransactionId xmax;
  109. CommandId cmax;
  110. bool traversed;
  111. } TM_FailureData;
  112. /* "options" flag bits for table_tuple_insert */
  113. #define TABLE_INSERT_SKIP_WAL 0x0001
  114. #define TABLE_INSERT_SKIP_FSM 0x0002
  115. #define TABLE_INSERT_FROZEN 0x0004
  116. #define TABLE_INSERT_NO_LOGICAL 0x0008
  117. /* flag bits for table_tuple_lock */
  118. /* Follow tuples whose update is in progress if lock modes don't conflict */
  119. #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
  120. /* Follow update chain and lock latest version of tuple */
  121. #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION (1 << 1)
  122. /* Typedef for callback function for table_index_build_scan */
  123. typedef void (*IndexBuildCallback) (Relation index,
  124. HeapTuple htup,
  125. Datum *values,
  126. bool *isnull,
  127. bool tupleIsAlive,
  128. void *state);
  129. /*
  130. * API struct for a table AM. Note this must be allocated in a
  131. * server-lifetime manner, typically as a static const struct, which then gets
  132. * returned by FormData_pg_am.amhandler.
  133. *
  134. * In most cases it's not appropriate to call the callbacks directly, use the
  135. * table_* wrapper functions instead.
  136. *
  137. * GetTableAmRoutine() asserts that required callbacks are filled in, remember
  138. * to update when adding a callback.
  139. */
  140. typedef struct TableAmRoutine
  141. {
  142. /* this must be set to T_TableAmRoutine */
  143. NodeTag type;
  144. /* ------------------------------------------------------------------------
  145. * Slot related callbacks.
  146. * ------------------------------------------------------------------------
  147. */
  148. /*
  149. * Return slot implementation suitable for storing a tuple of this AM.
  150. */
  151. const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
  152. /* ------------------------------------------------------------------------
  153. * Table scan callbacks.
  154. * ------------------------------------------------------------------------
  155. */
  156. /*
  157. * Start a scan of `rel`. The callback has to return a TableScanDesc,
  158. * which will typically be embedded in a larger, AM specific, struct.
  159. *
  160. * If nkeys != 0, the results need to be filtered by those scan keys.
  161. *
  162. * pscan, if not NULL, will have already been initialized with
  163. * parallelscan_initialize(), and has to be for the same relation. Will
  164. * only be set coming from table_beginscan_parallel().
  165. *
  166. * `flags` is a bitmask indicating the type of scan (ScanOptions's
  167. * SO_TYPE_*, currently only one may be specified), options controlling
  168. * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
  169. * specified, an AM may ignore unsupported ones) and whether the snapshot
  170. * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
  171. */
  172. TableScanDesc (*scan_begin) (Relation rel,
  173. Snapshot snapshot,
  174. int nkeys, struct ScanKeyData *key,
  175. ParallelTableScanDesc pscan,
  176. uint32 flags);
  177. /*
  178. * Release resources and deallocate scan. If TableScanDesc.temp_snap,
  179. * TableScanDesc.rs_snapshot needs to be unregistered.
  180. */
  181. void (*scan_end) (TableScanDesc scan);
  182. /*
  183. * Restart relation scan. If set_params is set to true, allow_{strat,
  184. * sync, pagemode} (see scan_begin) changes should be taken into account.
  185. */
  186. void (*scan_rescan) (TableScanDesc scan, struct ScanKeyData *key,
  187. bool set_params, bool allow_strat,
  188. bool allow_sync, bool allow_pagemode);
  189. /*
  190. * Return next tuple from `scan`, store in slot.
  191. */
  192. bool (*scan_getnextslot) (TableScanDesc scan,
  193. ScanDirection direction,
  194. TupleTableSlot *slot);
  195. /* ------------------------------------------------------------------------
  196. * Parallel table scan related functions.
  197. * ------------------------------------------------------------------------
  198. */
  199. /*
  200. * Estimate the size of shared memory needed for a parallel scan of this
  201. * relation. The snapshot does not need to be accounted for.
  202. */
  203. Size (*parallelscan_estimate) (Relation rel);
  204. /*
  205. * Initialize ParallelTableScanDesc for a parallel scan of this relation.
  206. * `pscan` will be sized according to parallelscan_estimate() for the same
  207. * relation.
  208. */
  209. Size (*parallelscan_initialize) (Relation rel,
  210. ParallelTableScanDesc pscan);
  211. /*
  212. * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
  213. * when `pscan` was initialized by parallelscan_initialize.
  214. */
  215. void (*parallelscan_reinitialize) (Relation rel,
  216. ParallelTableScanDesc pscan);
  217. /* ------------------------------------------------------------------------
  218. * Index Scan Callbacks
  219. * ------------------------------------------------------------------------
  220. */
  221. /*
  222. * Prepare to fetch tuples from the relation, as needed when fetching
  223. * tuples for an index scan. The callback has to return an
  224. * IndexFetchTableData, which the AM will typically embed in a larger
  225. * structure with additional information.
  226. *
  227. * Tuples for an index scan can then be fetched via index_fetch_tuple.
  228. */
  229. struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
  230. /*
  231. * Reset index fetch. Typically this will release cross index fetch
  232. * resources held in IndexFetchTableData.
  233. */
  234. void (*index_fetch_reset) (struct IndexFetchTableData *data);
  235. /*
  236. * Release resources and deallocate index fetch.
  237. */
  238. void (*index_fetch_end) (struct IndexFetchTableData *data);
  239. /*
  240. * Fetch tuple at `tid` into `slot`, after doing a visibility test
  241. * according to `snapshot`. If a tuple was found and passed the visibility
  242. * test, return true, false otherwise.
  243. *
  244. * Note that AMs that do not necessarily update indexes when indexed
  245. * columns do not change, need to return the current/correct version of
  246. * the tuple that is visible to the snapshot, even if the tid points to an
  247. * older version of the tuple.
  248. *
  249. * *call_again is false on the first call to index_fetch_tuple for a tid.
  250. * If there potentially is another tuple matching the tid, *call_again
  251. * needs be set to true by index_fetch_tuple, signalling to the caller
  252. * that index_fetch_tuple should be called again for the same tid.
  253. *
  254. * *all_dead, if all_dead is not NULL, should be set to true by
  255. * index_fetch_tuple iff it is guaranteed that no backend needs to see
  256. * that tuple. Index AMs can use that to avoid returning that tid in
  257. * future searches.
  258. */
  259. bool (*index_fetch_tuple) (struct IndexFetchTableData *scan,
  260. ItemPointer tid,
  261. Snapshot snapshot,
  262. TupleTableSlot *slot,
  263. bool *call_again, bool *all_dead);
  264. /* ------------------------------------------------------------------------
  265. * Callbacks for non-modifying operations on individual tuples
  266. * ------------------------------------------------------------------------
  267. */
  268. /*
  269. * Fetch tuple at `tid` into `slot`, after doing a visibility test
  270. * according to `snapshot`. If a tuple was found and passed the visibility
  271. * test, returns true, false otherwise.
  272. */
  273. bool (*tuple_fetch_row_version) (Relation rel,
  274. ItemPointer tid,
  275. Snapshot snapshot,
  276. TupleTableSlot *slot);
  277. /*
  278. * Is tid valid for a scan of this relation.
  279. */
  280. bool (*tuple_tid_valid) (TableScanDesc scan,
  281. ItemPointer tid);
  282. /*
  283. * Return the latest version of the tuple at `tid`, by updating `tid` to
  284. * point at the newest version.
  285. */
  286. void (*tuple_get_latest_tid) (TableScanDesc scan,
  287. ItemPointer tid);
  288. /*
  289. * Does the tuple in `slot` satisfy `snapshot`? The slot needs to be of
  290. * the appropriate type for the AM.
  291. */
  292. bool (*tuple_satisfies_snapshot) (Relation rel,
  293. TupleTableSlot *slot,
  294. Snapshot snapshot);
  295. /* see table_compute_xid_horizon_for_tuples() */
  296. TransactionId (*compute_xid_horizon_for_tuples) (Relation rel,
  297. ItemPointerData *items,
  298. int nitems);
  299. /* ------------------------------------------------------------------------
  300. * Manipulations of physical tuples.
  301. * ------------------------------------------------------------------------
  302. */
  303. /* see table_tuple_insert() for reference about parameters */
  304. void (*tuple_insert) (Relation rel, TupleTableSlot *slot,
  305. CommandId cid, int options,
  306. struct BulkInsertStateData *bistate);
  307. /* see table_tuple_insert_speculative() for reference about parameters */
  308. void (*tuple_insert_speculative) (Relation rel,
  309. TupleTableSlot *slot,
  310. CommandId cid,
  311. int options,
  312. struct BulkInsertStateData *bistate,
  313. uint32 specToken);
  314. /* see table_tuple_complete_speculative() for reference about parameters */
  315. void (*tuple_complete_speculative) (Relation rel,
  316. TupleTableSlot *slot,
  317. uint32 specToken,
  318. bool succeeded);
  319. /* see table_multi_insert() for reference about parameters */
  320. void (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
  321. CommandId cid, int options, struct BulkInsertStateData *bistate);
  322. /* see table_tuple_delete() for reference about parameters */
  323. TM_Result (*tuple_delete) (Relation rel,
  324. ItemPointer tid,
  325. CommandId cid,
  326. Snapshot snapshot,
  327. Snapshot crosscheck,
  328. bool wait,
  329. TM_FailureData *tmfd,
  330. bool changingPart);
  331. /* see table_tuple_update() for reference about parameters */
  332. TM_Result (*tuple_update) (Relation rel,
  333. ItemPointer otid,
  334. TupleTableSlot *slot,
  335. CommandId cid,
  336. Snapshot snapshot,
  337. Snapshot crosscheck,
  338. bool wait,
  339. TM_FailureData *tmfd,
  340. LockTupleMode *lockmode,
  341. bool *update_indexes);
  342. /* see table_tuple_lock() for reference about parameters */
  343. TM_Result (*tuple_lock) (Relation rel,
  344. ItemPointer tid,
  345. Snapshot snapshot,
  346. TupleTableSlot *slot,
  347. CommandId cid,
  348. LockTupleMode mode,
  349. LockWaitPolicy wait_policy,
  350. uint8 flags,
  351. TM_FailureData *tmfd);
  352. /*
  353. * Perform operations necessary to complete insertions made via
  354. * tuple_insert and multi_insert with a BulkInsertState specified. This
  355. * may for example be used to flush the relation, when the
  356. * TABLE_INSERT_SKIP_WAL option was used.
  357. *
  358. * Typically callers of tuple_insert and multi_insert will just pass all
  359. * the flags that apply to them, and each AM has to decide which of them
  360. * make sense for it, and then only take actions in finish_bulk_insert for
  361. * those flags, and ignore others.
  362. *
  363. * Optional callback.
  364. */
  365. void (*finish_bulk_insert) (Relation rel, int options);
  366. /* ------------------------------------------------------------------------
  367. * DDL related functionality.
  368. * ------------------------------------------------------------------------
  369. */
  370. /*
  371. * This callback needs to create a new relation filenode for `rel`, with
  372. * appropriate durability behaviour for `persistence`.
  373. *
  374. * Note that only the subset of the relcache filled by
  375. * RelationBuildLocalRelation() can be relied upon and that the relation's
  376. * catalog entries will either not yet exist (new relation), or will still
  377. * reference the old relfilenode.
  378. *
  379. * As output *freezeXid, *minmulti must be set to the values appropriate
  380. * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
  381. * fields to be filled they can be set to InvalidTransactionId and
  382. * InvalidMultiXactId, respectively.
  383. *
  384. * See also table_relation_set_new_filenode().
  385. */
  386. void (*relation_set_new_filenode) (Relation rel,
  387. const RelFileNode *newrnode,
  388. char persistence,
  389. TransactionId *freezeXid,
  390. MultiXactId *minmulti);
  391. /*
  392. * This callback needs to remove all contents from `rel`'s current
  393. * relfilenode. No provisions for transactional behaviour need to be made.
  394. * Often this can be implemented by truncating the underlying storage to
  395. * its minimal size.
  396. *
  397. * See also table_relation_nontransactional_truncate().
  398. */
  399. void (*relation_nontransactional_truncate) (Relation rel);
  400. /*
  401. * See table_relation_copy_data().
  402. *
  403. * This can typically be implemented by directly copying the underlying
  404. * storage, unless it contains references to the tablespace internally.
  405. */
  406. void (*relation_copy_data) (Relation rel,
  407. const RelFileNode *newrnode);
  408. /* See table_relation_copy_for_cluster() */
  409. void (*relation_copy_for_cluster) (Relation NewTable,
  410. Relation OldTable,
  411. Relation OldIndex,
  412. bool use_sort,
  413. TransactionId OldestXmin,
  414. TransactionId *xid_cutoff,
  415. MultiXactId *multi_cutoff,
  416. double *num_tuples,
  417. double *tups_vacuumed,
  418. double *tups_recently_dead);
  419. /*
  420. * React to VACUUM command on the relation. The VACUUM can be
  421. * triggered by a user or by autovacuum. The specific actions
  422. * performed by the AM will depend heavily on the individual AM.
  423. *
  424. * On entry a transaction is already established, and the relation is
  425. * locked with a ShareUpdateExclusive lock.
  426. *
  427. * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
  428. * this routine, even if (for ANALYZE) it is part of the same VACUUM
  429. * command.
  430. *
  431. * There probably, in the future, needs to be a separate callback to
  432. * integrate with autovacuum's scheduling.
  433. */
  434. void (*relation_vacuum) (Relation onerel,
  435. struct VacuumParams *params,
  436. BufferAccessStrategy bstrategy);
  437. /*
  438. * Prepare to analyze block `blockno` of `scan`. The scan has been started
  439. * with table_beginscan_analyze(). See also
  440. * table_scan_analyze_next_block().
  441. *
  442. * The callback may acquire resources like locks that are held until
  443. * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
  444. * to hold a lock until all tuples on a block have been analyzed by
  445. * scan_analyze_next_tuple.
  446. *
  447. * The callback can return false if the block is not suitable for
  448. * sampling, e.g. because it's a metapage that could never contain tuples.
  449. *
  450. * XXX: This obviously is primarily suited for block-based AMs. It's not
  451. * clear what a good interface for non block based AMs would be, so there
  452. * isn't one yet.
  453. */
  454. bool (*scan_analyze_next_block) (TableScanDesc scan,
  455. BlockNumber blockno,
  456. BufferAccessStrategy bstrategy);
  457. /*
  458. * See table_scan_analyze_next_tuple().
  459. *
  460. * Not every AM might have a meaningful concept of dead rows, in which
  461. * case it's OK to not increment *deadrows - but note that that may
  462. * influence autovacuum scheduling (see comment for relation_vacuum
  463. * callback).
  464. */
  465. bool (*scan_analyze_next_tuple) (TableScanDesc scan,
  466. TransactionId OldestXmin,
  467. double *liverows,
  468. double *deadrows,
  469. TupleTableSlot *slot);
  470. /* see table_index_build_range_scan for reference about parameters */
  471. double (*index_build_range_scan) (Relation table_rel,
  472. Relation index_rel,
  473. struct IndexInfo *index_info,
  474. bool allow_sync,
  475. bool anyvisible,
  476. bool progress,
  477. BlockNumber start_blockno,
  478. BlockNumber numblocks,
  479. IndexBuildCallback callback,
  480. void *callback_state,
  481. TableScanDesc scan);
  482. /* see table_index_validate_scan for reference about parameters */
  483. void (*index_validate_scan) (Relation table_rel,
  484. Relation index_rel,
  485. struct IndexInfo *index_info,
  486. Snapshot snapshot,
  487. struct ValidateIndexState *state);
  488. /* ------------------------------------------------------------------------
  489. * Miscellaneous functions.
  490. * ------------------------------------------------------------------------
  491. */
  492. /*
  493. * See table_relation_size().
  494. *
  495. * Note that currently a few callers use the MAIN_FORKNUM size to figure
  496. * out the range of potentially interesting blocks (brin, analyze). It's
  497. * probable that we'll need to revise the interface for those at some
  498. * point.
  499. */
  500. uint64 (*relation_size) (Relation rel, ForkNumber forkNumber);
  501. /*
  502. * This callback should return true if the relation requires a TOAST table
  503. * and false if it does not. It may wish to examine the relation's tuple
  504. * descriptor before making a decision, but if it uses some other method
  505. * of storing large values (or if it does not support them) it can simply
  506. * return false.
  507. */
  508. bool (*relation_needs_toast_table) (Relation rel);
  509. /* ------------------------------------------------------------------------
  510. * Planner related functions.
  511. * ------------------------------------------------------------------------
  512. */
  513. /*
  514. * See table_relation_estimate_size().
  515. *
  516. * While block oriented, it shouldn't be too hard for an AM that doesn't
  517. * internally use blocks to convert into a usable representation.
  518. *
  519. * This differs from the relation_size callback by returning size
  520. * estimates (both relation size and tuple count) for planning purposes,
  521. * rather than returning a currently correct estimate.
  522. */
  523. void (*relation_estimate_size) (Relation rel, int32 *attr_widths,
  524. BlockNumber *pages, double *tuples,
  525. double *allvisfrac);
  526. /* ------------------------------------------------------------------------
  527. * Executor related functions.
  528. * ------------------------------------------------------------------------
  529. */
  530. /*
  531. * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
  532. * of a bitmap table scan. `scan` was started via table_beginscan_bm().
  533. * Return false if there are no tuples to be found on the page, true
  534. * otherwise.
  535. *
  536. * This will typically read and pin the target block, and do the necessary
  537. * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
  538. * make sense to perform tuple visibility checks at this time). For some
  539. * AMs it will make more sense to do all the work referencing `tbmres`
  540. * contents here, for others it might be better to defer more work to
  541. * scan_bitmap_next_tuple.
  542. *
  543. * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
  544. * on the page have to be returned, otherwise the tuples at offsets in
  545. * `tbmres->offsets` need to be returned.
  546. *
  547. * XXX: Currently this may only be implemented if the AM uses md.c as its
  548. * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
  549. * blockids directly to the underlying storage. nodeBitmapHeapscan.c
  550. * performs prefetching directly using that interface. This probably
  551. * needs to be rectified at a later point.
  552. *
  553. * XXX: Currently this may only be implemented if the AM uses the
  554. * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
  555. * perform prefetching. This probably needs to be rectified at a later
  556. * point.
  557. *
  558. * Optional callback, but either both scan_bitmap_next_block and
  559. * scan_bitmap_next_tuple need to exist, or neither.
  560. */
  561. bool (*scan_bitmap_next_block) (TableScanDesc scan,
  562. struct TBMIterateResult *tbmres);
  563. /*
  564. * Fetch the next tuple of a bitmap table scan into `slot` and return true
  565. * if a visible tuple was found, false otherwise.
  566. *
  567. * For some AMs it will make more sense to do all the work referencing
  568. * `tbmres` contents in scan_bitmap_next_block, for others it might be
  569. * better to defer more work to this callback.
  570. *
  571. * Optional callback, but either both scan_bitmap_next_block and
  572. * scan_bitmap_next_tuple need to exist, or neither.
  573. */
  574. bool (*scan_bitmap_next_tuple) (TableScanDesc scan,
  575. struct TBMIterateResult *tbmres,
  576. TupleTableSlot *slot);
  577. /*
  578. * Prepare to fetch tuples from the next block in a sample scan. Return
  579. * false if the sample scan is finished, true otherwise. `scan` was
  580. * started via table_beginscan_sampling().
  581. *
  582. * Typically this will first determine the target block by calling the
  583. * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
  584. * perform a sequential scan over all blocks. The determined block is
  585. * then typically read and pinned.
  586. *
  587. * As the TsmRoutine interface is block based, a block needs to be passed
  588. * to NextSampleBlock(). If that's not appropriate for an AM, it
  589. * internally needs to perform mapping between the internal and a block
  590. * based representation.
  591. *
  592. * Note that it's not acceptable to hold deadlock prone resources such as
  593. * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
  594. * block - the tuple is likely to be returned to an upper query node, and
  595. * the next call could be off a long while. Holding buffer pins and such
  596. * is obviously OK.
  597. *
  598. * Currently it is required to implement this interface, as there's no
  599. * alternative way (contrary e.g. to bitmap scans) to implement sample
  600. * scans. If infeasible to implement, the AM may raise an error.
  601. */
  602. bool (*scan_sample_next_block) (TableScanDesc scan,
  603. struct SampleScanState *scanstate);
  604. /*
  605. * This callback, only called after scan_sample_next_block has returned
  606. * true, should determine the next tuple to be returned from the selected
  607. * block using the TsmRoutine's NextSampleTuple() callback.
  608. *
  609. * The callback needs to perform visibility checks, and only return
  610. * visible tuples. That obviously can mean calling NextSampleTuple()
  611. * multiple times.
  612. *
  613. * The TsmRoutine interface assumes that there's a maximum offset on a
  614. * given page, so if that doesn't apply to an AM, it needs to emulate that
  615. * assumption somehow.
  616. */
  617. bool (*scan_sample_next_tuple) (TableScanDesc scan,
  618. struct SampleScanState *scanstate,
  619. TupleTableSlot *slot);
  620. } TableAmRoutine;
  621. /* ----------------------------------------------------------------------------
  622. * Slot functions.
  623. * ----------------------------------------------------------------------------
  624. */
  625. /*
  626. * Returns slot callbacks suitable for holding tuples of the appropriate type
  627. * for the relation. Works for tables, views, foreign tables and partitioned
  628. * tables.
  629. */
  630. extern const TupleTableSlotOps *table_slot_callbacks(Relation rel);
  631. /*
  632. * Returns slot using the callbacks returned by table_slot_callbacks(), and
  633. * registers it on *reglist.
  634. */
  635. extern TupleTableSlot *table_slot_create(Relation rel, List **reglist);
  636. /* ----------------------------------------------------------------------------
  637. * Table scan functions.
  638. * ----------------------------------------------------------------------------
  639. */
  640. /*
  641. * Start a scan of `rel`. Returned tuples pass a visibility test of
  642. * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
  643. */
  644. static inline TableScanDesc
  645. table_beginscan(Relation rel, Snapshot snapshot,
  646. int nkeys, struct ScanKeyData *key)
  647. {
  648. uint32 flags = SO_TYPE_SEQSCAN |
  649. SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
  650. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  651. }
  652. /*
  653. * Like table_beginscan(), but for scanning catalog. It'll automatically use a
  654. * snapshot appropriate for scanning catalog relations.
  655. */
  656. extern TableScanDesc table_beginscan_catalog(Relation rel, int nkeys,
  657. struct ScanKeyData *key);
  658. /*
  659. * Like table_beginscan(), but table_beginscan_strat() offers an extended API
  660. * that lets the caller control whether a nondefault buffer access strategy
  661. * can be used, and whether syncscan can be chosen (possibly resulting in the
  662. * scan not starting from block zero). Both of these default to true with
  663. * plain table_beginscan.
  664. */
  665. static inline TableScanDesc
  666. table_beginscan_strat(Relation rel, Snapshot snapshot,
  667. int nkeys, struct ScanKeyData *key,
  668. bool allow_strat, bool allow_sync)
  669. {
  670. uint32 flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE;
  671. if (allow_strat)
  672. flags |= SO_ALLOW_STRAT;
  673. if (allow_sync)
  674. flags |= SO_ALLOW_SYNC;
  675. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  676. }
  677. /*
  678. * table_beginscan_bm is an alternative entry point for setting up a
  679. * TableScanDesc for a bitmap heap scan. Although that scan technology is
  680. * really quite unlike a standard seqscan, there is just enough commonality to
  681. * make it worth using the same data structure.
  682. */
  683. static inline TableScanDesc
  684. table_beginscan_bm(Relation rel, Snapshot snapshot,
  685. int nkeys, struct ScanKeyData *key)
  686. {
  687. uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
  688. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  689. }
  690. /*
  691. * table_beginscan_sampling is an alternative entry point for setting up a
  692. * TableScanDesc for a TABLESAMPLE scan. As with bitmap scans, it's worth
  693. * using the same data structure although the behavior is rather different.
  694. * In addition to the options offered by table_beginscan_strat, this call
  695. * also allows control of whether page-mode visibility checking is used.
  696. */
  697. static inline TableScanDesc
  698. table_beginscan_sampling(Relation rel, Snapshot snapshot,
  699. int nkeys, struct ScanKeyData *key,
  700. bool allow_strat, bool allow_sync,
  701. bool allow_pagemode)
  702. {
  703. uint32 flags = SO_TYPE_SAMPLESCAN;
  704. if (allow_strat)
  705. flags |= SO_ALLOW_STRAT;
  706. if (allow_sync)
  707. flags |= SO_ALLOW_SYNC;
  708. if (allow_pagemode)
  709. flags |= SO_ALLOW_PAGEMODE;
  710. return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
  711. }
  712. /*
  713. * table_beginscan_tid is an alternative entry point for setting up a
  714. * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
  715. * the same data structure although the behavior is rather different.
  716. */
  717. static inline TableScanDesc
  718. table_beginscan_tid(Relation rel, Snapshot snapshot)
  719. {
  720. uint32 flags = SO_TYPE_TIDSCAN;
  721. return rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
  722. }
  723. /*
  724. * table_beginscan_analyze is an alternative entry point for setting up a
  725. * TableScanDesc for an ANALYZE scan. As with bitmap scans, it's worth using
  726. * the same data structure although the behavior is rather different.
  727. */
  728. static inline TableScanDesc
  729. table_beginscan_analyze(Relation rel)
  730. {
  731. uint32 flags = SO_TYPE_ANALYZE;
  732. return rel->rd_tableam->scan_begin(rel, NULL, 0, NULL, NULL, flags);
  733. }
  734. /*
  735. * End relation scan.
  736. */
  737. static inline void
  738. table_endscan(TableScanDesc scan)
  739. {
  740. scan->rs_rd->rd_tableam->scan_end(scan);
  741. }
  742. /*
  743. * Restart a relation scan.
  744. */
  745. static inline void
  746. table_rescan(TableScanDesc scan,
  747. struct ScanKeyData *key)
  748. {
  749. scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
  750. }
  751. /*
  752. * Restart a relation scan after changing params.
  753. *
  754. * This call allows changing the buffer strategy, syncscan, and pagemode
  755. * options before starting a fresh scan. Note that although the actual use of
  756. * syncscan might change (effectively, enabling or disabling reporting), the
  757. * previously selected startblock will be kept.
  758. */
  759. static inline void
  760. table_rescan_set_params(TableScanDesc scan, struct ScanKeyData *key,
  761. bool allow_strat, bool allow_sync, bool allow_pagemode)
  762. {
  763. scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
  764. allow_strat, allow_sync,
  765. allow_pagemode);
  766. }
  767. /*
  768. * Update snapshot used by the scan.
  769. */
  770. extern void table_scan_update_snapshot(TableScanDesc scan, Snapshot snapshot);
  771. /*
  772. * Return next tuple from `scan`, store in slot.
  773. */
  774. static inline bool
  775. table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
  776. {
  777. slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
  778. return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
  779. }
  780. /* ----------------------------------------------------------------------------
  781. * Parallel table scan related functions.
  782. * ----------------------------------------------------------------------------
  783. */
  784. /*
  785. * Estimate the size of shared memory needed for a parallel scan of this
  786. * relation.
  787. */
  788. extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
  789. /*
  790. * Initialize ParallelTableScanDesc for a parallel scan of this
  791. * relation. `pscan` needs to be sized according to parallelscan_estimate()
  792. * for the same relation. Call this just once in the leader process; then,
  793. * individual workers attach via table_beginscan_parallel.
  794. */
  795. extern void table_parallelscan_initialize(Relation rel,
  796. ParallelTableScanDesc pscan,
  797. Snapshot snapshot);
  798. /*
  799. * Begin a parallel scan. `pscan` needs to have been initialized with
  800. * table_parallelscan_initialize(), for the same relation. The initialization
  801. * does not need to have happened in this backend.
  802. *
  803. * Caller must hold a suitable lock on the relation.
  804. */
  805. extern TableScanDesc table_beginscan_parallel(Relation rel,
  806. ParallelTableScanDesc pscan);
  807. /*
  808. * Restart a parallel scan. Call this in the leader process. Caller is
  809. * responsible for making sure that all workers have finished the scan
  810. * beforehand.
  811. */
  812. static inline void
  813. table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
  814. {
  815. rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
  816. }
  817. /* ----------------------------------------------------------------------------
  818. * Index scan related functions.
  819. * ----------------------------------------------------------------------------
  820. */
  821. /*
  822. * Prepare to fetch tuples from the relation, as needed when fetching tuples
  823. * for an index scan.
  824. *
  825. * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
  826. */
  827. static inline IndexFetchTableData *
  828. table_index_fetch_begin(Relation rel)
  829. {
  830. return rel->rd_tableam->index_fetch_begin(rel);
  831. }
  832. /*
  833. * Reset index fetch. Typically this will release cross index fetch resources
  834. * held in IndexFetchTableData.
  835. */
  836. static inline void
  837. table_index_fetch_reset(struct IndexFetchTableData *scan)
  838. {
  839. scan->rel->rd_tableam->index_fetch_reset(scan);
  840. }
  841. /*
  842. * Release resources and deallocate index fetch.
  843. */
  844. static inline void
  845. table_index_fetch_end(struct IndexFetchTableData *scan)
  846. {
  847. scan->rel->rd_tableam->index_fetch_end(scan);
  848. }
  849. /*
  850. * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
  851. * a visibility test according to `snapshot`. If a tuple was found and passed
  852. * the visibility test, returns true, false otherwise.
  853. *
  854. * *call_again needs to be false on the first call to table_index_fetch_tuple() for
  855. * a tid. If there potentially is another tuple matching the tid, *call_again
  856. * will be set to true, signalling that table_index_fetch_tuple() should be called
  857. * again for the same tid.
  858. *
  859. * *all_dead, if all_dead is not NULL, will be set to true by
  860. * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
  861. * that tuple. Index AMs can use that to avoid returning that tid in future
  862. * searches.
  863. *
  864. * The difference between this function and table_tuple_fetch_row_version()
  865. * is that this function returns the currently visible version of a row if
  866. * the AM supports storing multiple row versions reachable via a single index
  867. * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
  868. * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
  869. * lookups, table_tuple_fetch_row_version() is what's usually needed.
  870. */
  871. static inline bool
  872. table_index_fetch_tuple(struct IndexFetchTableData *scan,
  873. ItemPointer tid,
  874. Snapshot snapshot,
  875. TupleTableSlot *slot,
  876. bool *call_again, bool *all_dead)
  877. {
  878. return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
  879. slot, call_again,
  880. all_dead);
  881. }
  882. /*
  883. * This is a convenience wrapper around table_index_fetch_tuple() which
  884. * returns whether there are table tuple items corresponding to an index
  885. * entry. This likely is only useful to verify if there's a conflict in a
  886. * unique index.
  887. */
  888. extern bool table_index_fetch_tuple_check(Relation rel,
  889. ItemPointer tid,
  890. Snapshot snapshot,
  891. bool *all_dead);
  892. /* ------------------------------------------------------------------------
  893. * Functions for non-modifying operations on individual tuples
  894. * ------------------------------------------------------------------------
  895. */
  896. /*
  897. * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
  898. * `snapshot`. If a tuple was found and passed the visibility test, returns
  899. * true, false otherwise.
  900. *
  901. * See table_index_fetch_tuple's comment about what the difference between
  902. * these functions is. It is correct to use this function outside of index
  903. * entry->table tuple lookups.
  904. */
  905. static inline bool
  906. table_tuple_fetch_row_version(Relation rel,
  907. ItemPointer tid,
  908. Snapshot snapshot,
  909. TupleTableSlot *slot)
  910. {
  911. return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
  912. }
  913. /*
  914. * Verify that `tid` is a potentially valid tuple identifier. That doesn't
  915. * mean that the pointed to row needs to exist or be visible, but that
  916. * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
  917. * table_tuple_fetch_row_version()) should not error out if called with that
  918. * tid.
  919. *
  920. * `scan` needs to have been started via table_beginscan().
  921. */
  922. static inline bool
  923. table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
  924. {
  925. return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
  926. }
  927. /*
  928. * Return the latest version of the tuple at `tid`, by updating `tid` to
  929. * point at the newest version.
  930. */
  931. extern void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid);
  932. /*
  933. * Return true iff tuple in slot satisfies the snapshot.
  934. *
  935. * This assumes the slot's tuple is valid, and of the appropriate type for the
  936. * AM.
  937. *
  938. * Some AMs might modify the data underlying the tuple as a side-effect. If so
  939. * they ought to mark the relevant buffer dirty.
  940. */
  941. static inline bool
  942. table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
  943. Snapshot snapshot)
  944. {
  945. return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
  946. }
  947. /*
  948. * Compute the newest xid among the tuples pointed to by items. This is used
  949. * to compute what snapshots to conflict with when replaying WAL records for
  950. * page-level index vacuums.
  951. */
  952. static inline TransactionId
  953. table_compute_xid_horizon_for_tuples(Relation rel,
  954. ItemPointerData *items,
  955. int nitems)
  956. {
  957. return rel->rd_tableam->compute_xid_horizon_for_tuples(rel, items, nitems);
  958. }
  959. /* ----------------------------------------------------------------------------
  960. * Functions for manipulations of physical tuples.
  961. * ----------------------------------------------------------------------------
  962. */
  963. /*
  964. * Insert a tuple from a slot into table AM routine.
  965. *
  966. * The options bitmask allows the caller to specify options that may change the
  967. * behaviour of the AM. The AM will ignore options that it does not support.
  968. *
  969. * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple doesn't
  970. * need to be logged to WAL, even for a non-temp relation. It is the AMs
  971. * choice whether this optimization is supported.
  972. *
  973. * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
  974. * free space in the relation. This can save some cycles when we know the
  975. * relation is new and doesn't contain useful amounts of free space.
  976. * TABLE_INSERT_SKIP_FSM is commonly passed directly to
  977. * RelationGetBufferForTuple. See that method for more information.
  978. *
  979. * TABLE_INSERT_FROZEN should only be specified for inserts into
  980. * relfilenodes created during the current subtransaction and when
  981. * there are no prior snapshots or pre-existing portals open.
  982. * This causes rows to be frozen, which is an MVCC violation and
  983. * requires explicit options chosen by user.
  984. *
  985. * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
  986. * information for the tuple. This should solely be used during table rewrites
  987. * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
  988. * relation.
  989. *
  990. * Note that most of these options will be applied when inserting into the
  991. * heap's TOAST table, too, if the tuple requires any out-of-line data.
  992. *
  993. * The BulkInsertState object (if any; bistate can be NULL for default
  994. * behavior) is also just passed through to RelationGetBufferForTuple. If
  995. * `bistate` is provided, table_finish_bulk_insert() needs to be called.
  996. *
  997. * On return the slot's tts_tid and tts_tableOid are updated to reflect the
  998. * insertion. But note that any toasting of fields within the slot is NOT
  999. * reflected in the slots contents.
  1000. */
  1001. static inline void
  1002. table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
  1003. int options, struct BulkInsertStateData *bistate)
  1004. {
  1005. rel->rd_tableam->tuple_insert(rel, slot, cid, options,
  1006. bistate);
  1007. }
  1008. /*
  1009. * Perform a "speculative insertion". These can be backed out afterwards
  1010. * without aborting the whole transaction. Other sessions can wait for the
  1011. * speculative insertion to be confirmed, turning it into a regular tuple, or
  1012. * aborted, as if it never existed. Speculatively inserted tuples behave as
  1013. * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
  1014. *
  1015. * A transaction having performed a speculative insertion has to either abort,
  1016. * or finish the speculative insertion with
  1017. * table_tuple_complete_speculative(succeeded = ...).
  1018. */
  1019. static inline void
  1020. table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
  1021. CommandId cid, int options,
  1022. struct BulkInsertStateData *bistate,
  1023. uint32 specToken)
  1024. {
  1025. rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
  1026. bistate, specToken);
  1027. }
  1028. /*
  1029. * Complete "speculative insertion" started in the same transaction. If
  1030. * succeeded is true, the tuple is fully inserted, if false, it's removed.
  1031. */
  1032. static inline void
  1033. table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
  1034. uint32 specToken, bool succeeded)
  1035. {
  1036. rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
  1037. succeeded);
  1038. }
  1039. /*
  1040. * Insert multiple tuples into a table.
  1041. *
  1042. * This is like table_tuple_insert(), but inserts multiple tuples in one
  1043. * operation. That's often faster than calling table_tuple_insert() in a loop,
  1044. * because e.g. the AM can reduce WAL logging and page locking overhead.
  1045. *
  1046. * Except for taking `nslots` tuples as input, as an array of TupleTableSlots
  1047. * in `slots`, the parameters for table_multi_insert() are the same as for
  1048. * table_tuple_insert().
  1049. *
  1050. * Note: this leaks memory into the current memory context. You can create a
  1051. * temporary context before calling this, if that's a problem.
  1052. */
  1053. static inline void
  1054. table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
  1055. CommandId cid, int options, struct BulkInsertStateData *bistate)
  1056. {
  1057. rel->rd_tableam->multi_insert(rel, slots, nslots,
  1058. cid, options, bistate);
  1059. }
  1060. /*
  1061. * Delete a tuple.
  1062. *
  1063. * NB: do not call this directly unless prepared to deal with
  1064. * concurrent-update conditions. Use simple_table_tuple_delete instead.
  1065. *
  1066. * Input parameters:
  1067. * relation - table to be modified (caller must hold suitable lock)
  1068. * tid - TID of tuple to be deleted
  1069. * cid - delete command ID (used for visibility test, and stored into
  1070. * cmax if successful)
  1071. * crosscheck - if not InvalidSnapshot, also check tuple against this
  1072. * wait - true if should wait for any conflicting update to commit/abort
  1073. * Output parameters:
  1074. * tmfd - filled in failure cases (see below)
  1075. * changingPart - true iff the tuple is being moved to another partition
  1076. * table due to an update of the partition key. Otherwise, false.
  1077. *
  1078. * Normal, successful return value is TM_Ok, which means we did actually
  1079. * delete it. Failure return codes are TM_SelfModified, TM_Updated, and
  1080. * TM_BeingModified (the last only possible if wait == false).
  1081. *
  1082. * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  1083. * t_xmax, and, if possible, and, if possible, t_cmax. See comments for
  1084. * struct TM_FailureData for additional info.
  1085. */
  1086. static inline TM_Result
  1087. table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
  1088. Snapshot snapshot, Snapshot crosscheck, bool wait,
  1089. TM_FailureData *tmfd, bool changingPart)
  1090. {
  1091. return rel->rd_tableam->tuple_delete(rel, tid, cid,
  1092. snapshot, crosscheck,
  1093. wait, tmfd, changingPart);
  1094. }
  1095. /*
  1096. * Update a tuple.
  1097. *
  1098. * NB: do not call this directly unless you are prepared to deal with
  1099. * concurrent-update conditions. Use simple_table_tuple_update instead.
  1100. *
  1101. * Input parameters:
  1102. * relation - table to be modified (caller must hold suitable lock)
  1103. * otid - TID of old tuple to be replaced
  1104. * slot - newly constructed tuple data to store
  1105. * cid - update command ID (used for visibility test, and stored into
  1106. * cmax/cmin if successful)
  1107. * crosscheck - if not InvalidSnapshot, also check old tuple against this
  1108. * wait - true if should wait for any conflicting update to commit/abort
  1109. * Output parameters:
  1110. * tmfd - filled in failure cases (see below)
  1111. * lockmode - filled with lock mode acquired on tuple
  1112. * update_indexes - in success cases this is set to true if new index entries
  1113. * are required for this tuple
  1114. *
  1115. * Normal, successful return value is TM_Ok, which means we did actually
  1116. * update it. Failure return codes are TM_SelfModified, TM_Updated, and
  1117. * TM_BeingModified (the last only possible if wait == false).
  1118. *
  1119. * On success, the slot's tts_tid and tts_tableOid are updated to match the new
  1120. * stored tuple; in particular, slot->tts_tid is set to the TID where the
  1121. * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
  1122. * update was done. However, any TOAST changes in the new tuple's
  1123. * data are not reflected into *newtup.
  1124. *
  1125. * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
  1126. * t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
  1127. * for additional info.
  1128. */
  1129. static inline TM_Result
  1130. table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
  1131. CommandId cid, Snapshot snapshot, Snapshot crosscheck,
  1132. bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
  1133. bool *update_indexes)
  1134. {
  1135. return rel->rd_tableam->tuple_update(rel, otid, slot,
  1136. cid, snapshot, crosscheck,
  1137. wait, tmfd,
  1138. lockmode, update_indexes);
  1139. }
  1140. /*
  1141. * Lock a tuple in the specified mode.
  1142. *
  1143. * Input parameters:
  1144. * relation: relation containing tuple (caller must hold suitable lock)
  1145. * tid: TID of tuple to lock
  1146. * snapshot: snapshot to use for visibility determinations
  1147. * cid: current command ID (used for visibility test, and stored into
  1148. * tuple's cmax if lock is successful)
  1149. * mode: lock mode desired
  1150. * wait_policy: what to do if tuple lock is not available
  1151. * flags:
  1152. * If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
  1153. * also lock descendant tuples if lock modes don't conflict.
  1154. * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
  1155. * latest version.
  1156. *
  1157. * Output parameters:
  1158. * *slot: contains the target tuple
  1159. * *tmfd: filled in failure cases (see below)
  1160. *
  1161. * Function result may be:
  1162. * TM_Ok: lock was successfully acquired
  1163. * TM_Invisible: lock failed because tuple was never visible to us
  1164. * TM_SelfModified: lock failed because tuple updated by self
  1165. * TM_Updated: lock failed because tuple updated by other xact
  1166. * TM_Deleted: lock failed because tuple deleted by other xact
  1167. * TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
  1168. *
  1169. * In the failure cases other than TM_Invisible and TM_Deleted, the routine
  1170. * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax. See
  1171. * comments for struct TM_FailureData for additional info.
  1172. */
  1173. static inline TM_Result
  1174. table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
  1175. TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
  1176. LockWaitPolicy wait_policy, uint8 flags,
  1177. TM_FailureData *tmfd)
  1178. {
  1179. return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
  1180. cid, mode, wait_policy,
  1181. flags, tmfd);
  1182. }
  1183. /*
  1184. * Perform operations necessary to complete insertions made via
  1185. * tuple_insert and multi_insert with a BulkInsertState specified. This
  1186. * e.g. may e.g. used to flush the relation when inserting with
  1187. * TABLE_INSERT_SKIP_WAL specified.
  1188. */
  1189. static inline void
  1190. table_finish_bulk_insert(Relation rel, int options)
  1191. {
  1192. /* optional callback */
  1193. if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
  1194. rel->rd_tableam->finish_bulk_insert(rel, options);
  1195. }
  1196. /* ------------------------------------------------------------------------
  1197. * DDL related functionality.
  1198. * ------------------------------------------------------------------------
  1199. */
  1200. /*
  1201. * Create storage for `rel` in `newrnode`, with persistence set to
  1202. * `persistence`.
  1203. *
  1204. * This is used both during relation creation and various DDL operations to
  1205. * create a new relfilenode that can be filled from scratch. When creating
  1206. * new storage for an existing relfilenode, this should be called before the
  1207. * relcache entry has been updated.
  1208. *
  1209. * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
  1210. * that pg_class.{relfrozenxid, relminmxid} have to be set to.
  1211. */
  1212. static inline void
  1213. table_relation_set_new_filenode(Relation rel,
  1214. const RelFileNode *newrnode,
  1215. char persistence,
  1216. TransactionId *freezeXid,
  1217. MultiXactId *minmulti)
  1218. {
  1219. rel->rd_tableam->relation_set_new_filenode(rel, newrnode, persistence,
  1220. freezeXid, minmulti);
  1221. }
  1222. /*
  1223. * Remove all table contents from `rel`, in a non-transactional manner.
  1224. * Non-transactional meaning that there's no need to support rollbacks. This
  1225. * commonly only is used to perform truncations for relfilenodes created in the
  1226. * current transaction.
  1227. */
  1228. static inline void
  1229. table_relation_nontransactional_truncate(Relation rel)
  1230. {
  1231. rel->rd_tableam->relation_nontransactional_truncate(rel);
  1232. }
  1233. /*
  1234. * Copy data from `rel` into the new relfilenode `newrnode`. The new
  1235. * relfilenode may not have storage associated before this function is
  1236. * called. This is only supposed to be used for low level operations like
  1237. * changing a relation's tablespace.
  1238. */
  1239. static inline void
  1240. table_relation_copy_data(Relation rel, const RelFileNode *newrnode)
  1241. {
  1242. rel->rd_tableam->relation_copy_data(rel, newrnode);
  1243. }
  1244. /*
  1245. * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
  1246. * FULL.
  1247. *
  1248. * Additional Input parameters:
  1249. * - use_sort - if true, the table contents are sorted appropriate for
  1250. * `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
  1251. * in that index's order; if false and OldIndex is InvalidOid, no sorting is
  1252. * performed
  1253. * - OldIndex - see use_sort
  1254. * - OldestXmin - computed by vacuum_set_xid_limits(), even when
  1255. * not needed for the relation's AM
  1256. * - *xid_cutoff - ditto
  1257. * - *multi_cutoff - ditto
  1258. *
  1259. * Output parameters:
  1260. * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
  1261. * - *multi_cutoff - rel's new relminmxid value, may be invalid
  1262. * - *tups_vacuumed - stats, for logging, if appropriate for AM
  1263. * - *tups_recently_dead - stats, for logging, if appropriate for AM
  1264. */
  1265. static inline void
  1266. table_relation_copy_for_cluster(Relation OldTable, Relation NewTable,
  1267. Relation OldIndex,
  1268. bool use_sort,
  1269. TransactionId OldestXmin,
  1270. TransactionId *xid_cutoff,
  1271. MultiXactId *multi_cutoff,
  1272. double *num_tuples,
  1273. double *tups_vacuumed,
  1274. double *tups_recently_dead)
  1275. {
  1276. OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
  1277. use_sort, OldestXmin,
  1278. xid_cutoff, multi_cutoff,
  1279. num_tuples, tups_vacuumed,
  1280. tups_recently_dead);
  1281. }
  1282. /*
  1283. * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
  1284. * autovacuum. The specific actions performed by the AM will depend heavily on
  1285. * the individual AM.
  1286. *
  1287. * On entry a transaction needs to already been established, and the
  1288. * table is locked with a ShareUpdateExclusive lock.
  1289. *
  1290. * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
  1291. * routine, even if (for ANALYZE) it is part of the same VACUUM command.
  1292. */
  1293. static inline void
  1294. table_relation_vacuum(Relation rel, struct VacuumParams *params,
  1295. BufferAccessStrategy bstrategy)
  1296. {
  1297. rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
  1298. }
  1299. /*
  1300. * Prepare to analyze block `blockno` of `scan`. The scan needs to have been
  1301. * started with table_beginscan_analyze(). Note that this routine might
  1302. * acquire resources like locks that are held until
  1303. * table_scan_analyze_next_tuple() returns false.
  1304. *
  1305. * Returns false if block is unsuitable for sampling, true otherwise.
  1306. */
  1307. static inline bool
  1308. table_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
  1309. BufferAccessStrategy bstrategy)
  1310. {
  1311. return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, blockno,
  1312. bstrategy);
  1313. }
  1314. /*
  1315. * Iterate over tuples in the block selected with
  1316. * table_scan_analyze_next_block() (which needs to have returned true, and
  1317. * this routine may not have returned false for the same block before). If a
  1318. * tuple that's suitable for sampling is found, true is returned and a tuple
  1319. * is stored in `slot`.
  1320. *
  1321. * *liverows and *deadrows are incremented according to the encountered
  1322. * tuples.
  1323. */
  1324. static inline bool
  1325. table_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
  1326. double *liverows, double *deadrows,
  1327. TupleTableSlot *slot)
  1328. {
  1329. return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan, OldestXmin,
  1330. liverows, deadrows,
  1331. slot);
  1332. }
  1333. /*
  1334. * table_index_build_scan - scan the table to find tuples to be indexed
  1335. *
  1336. * This is called back from an access-method-specific index build procedure
  1337. * after the AM has done whatever setup it needs. The parent table relation
  1338. * is scanned to find tuples that should be entered into the index. Each
  1339. * such tuple is passed to the AM's callback routine, which does the right
  1340. * things to add it to the new index. After we return, the AM's index
  1341. * build procedure does whatever cleanup it needs.
  1342. *
  1343. * The total count of live tuples is returned. This is for updating pg_class
  1344. * statistics. (It's annoying not to be able to do that here, but we want to
  1345. * merge that update with others; see index_update_stats.) Note that the
  1346. * index AM itself must keep track of the number of index tuples; we don't do
  1347. * so here because the AM might reject some of the tuples for its own reasons,
  1348. * such as being unable to store NULLs.
  1349. *
  1350. * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
  1351. * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
  1352. *
  1353. * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
  1354. * any potentially broken HOT chains. Currently, we set this if there are any
  1355. * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
  1356. * very hard to detect whether they're really incompatible with the chain tip.
  1357. * This only really makes sense for heap AM, it might need to be generalized
  1358. * for other AMs later.
  1359. */
  1360. static inline double
  1361. table_index_build_scan(Relation table_rel,
  1362. Relation index_rel,
  1363. struct IndexInfo *index_info,
  1364. bool allow_sync,
  1365. bool progress,
  1366. IndexBuildCallback callback,
  1367. void *callback_state,
  1368. TableScanDesc scan)
  1369. {
  1370. return table_rel->rd_tableam->index_build_range_scan(table_rel,
  1371. index_rel,
  1372. index_info,
  1373. allow_sync,
  1374. false,
  1375. progress,
  1376. 0,
  1377. InvalidBlockNumber,
  1378. callback,
  1379. callback_state,
  1380. scan);
  1381. }
  1382. /*
  1383. * As table_index_build_scan(), except that instead of scanning the complete
  1384. * table, only the given number of blocks are scanned. Scan to end-of-rel can
  1385. * be signalled by passing InvalidBlockNumber as numblocks. Note that
  1386. * restricting the range to scan cannot be done when requesting syncscan.
  1387. *
  1388. * When "anyvisible" mode is requested, all tuples visible to any transaction
  1389. * are indexed and counted as live, including those inserted or deleted by
  1390. * transactions that are still in progress.
  1391. */
  1392. static inline double
  1393. table_index_build_range_scan(Relation table_rel,
  1394. Relation index_rel,
  1395. struct IndexInfo *index_info,
  1396. bool allow_sync,
  1397. bool anyvisible,
  1398. bool progress,
  1399. BlockNumber start_blockno,
  1400. BlockNumber numblocks,
  1401. IndexBuildCallback callback,
  1402. void *callback_state,
  1403. TableScanDesc scan)
  1404. {
  1405. return table_rel->rd_tableam->index_build_range_scan(table_rel,
  1406. index_rel,
  1407. index_info,
  1408. allow_sync,
  1409. anyvisible,
  1410. progress,
  1411. start_blockno,
  1412. numblocks,
  1413. callback,
  1414. callback_state,
  1415. scan);
  1416. }
  1417. /*
  1418. * table_index_validate_scan - second table scan for concurrent index build
  1419. *
  1420. * See validate_index() for an explanation.
  1421. */
  1422. static inline void
  1423. table_index_validate_scan(Relation table_rel,
  1424. Relation index_rel,
  1425. struct IndexInfo *index_info,
  1426. Snapshot snapshot,
  1427. struct ValidateIndexState *state)
  1428. {
  1429. table_rel->rd_tableam->index_validate_scan(table_rel,
  1430. index_rel,
  1431. index_info,
  1432. snapshot,
  1433. state);
  1434. }
  1435. /* ----------------------------------------------------------------------------
  1436. * Miscellaneous functionality
  1437. * ----------------------------------------------------------------------------
  1438. */
  1439. /*
  1440. * Return the current size of `rel` in bytes. If `forkNumber` is
  1441. * InvalidForkNumber, return the relation's overall size, otherwise the size
  1442. * for the indicated fork.
  1443. *
  1444. * Note that the overall size might not be the equivalent of the sum of sizes
  1445. * for the individual forks for some AMs, e.g. because the AMs storage does
  1446. * not neatly map onto the builtin types of forks.
  1447. */
  1448. static inline uint64
  1449. table_relation_size(Relation rel, ForkNumber forkNumber)
  1450. {
  1451. return rel->rd_tableam->relation_size(rel, forkNumber);
  1452. }
  1453. /*
  1454. * table_relation_needs_toast_table - does this relation need a toast table?
  1455. */
  1456. static inline bool
  1457. table_relation_needs_toast_table(Relation rel)
  1458. {
  1459. return rel->rd_tableam->relation_needs_toast_table(rel);
  1460. }
  1461. /* ----------------------------------------------------------------------------
  1462. * Planner related functionality
  1463. * ----------------------------------------------------------------------------
  1464. */
  1465. /*
  1466. * Estimate the current size of the relation, as an AM specific workhorse for
  1467. * estimate_rel_size(). Look there for an explanation of the parameters.
  1468. */
  1469. static inline void
  1470. table_relation_estimate_size(Relation rel, int32 *attr_widths,
  1471. BlockNumber *pages, double *tuples,
  1472. double *allvisfrac)
  1473. {
  1474. rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
  1475. allvisfrac);
  1476. }
  1477. /* ----------------------------------------------------------------------------
  1478. * Executor related functionality
  1479. * ----------------------------------------------------------------------------
  1480. */
  1481. /*
  1482. * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
  1483. * a bitmap table scan. `scan` needs to have been started via
  1484. * table_beginscan_bm(). Returns false if there are no tuples to be found on
  1485. * the page, true otherwise.
  1486. *
  1487. * Note, this is an optionally implemented function, therefore should only be
  1488. * used after verifying the presence (at plan time or such).
  1489. */
  1490. static inline bool
  1491. table_scan_bitmap_next_block(TableScanDesc scan,
  1492. struct TBMIterateResult *tbmres)
  1493. {
  1494. return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
  1495. tbmres);
  1496. }
  1497. /*
  1498. * Fetch the next tuple of a bitmap table scan into `slot` and return true if
  1499. * a visible tuple was found, false otherwise.
  1500. * table_scan_bitmap_next_block() needs to previously have selected a
  1501. * block (i.e. returned true), and no previous
  1502. * table_scan_bitmap_next_tuple() for the same block may have
  1503. * returned false.
  1504. */
  1505. static inline bool
  1506. table_scan_bitmap_next_tuple(TableScanDesc scan,
  1507. struct TBMIterateResult *tbmres,
  1508. TupleTableSlot *slot)
  1509. {
  1510. return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
  1511. tbmres,
  1512. slot);
  1513. }
  1514. /*
  1515. * Prepare to fetch tuples from the next block in a sample scan. Returns false
  1516. * if the sample scan is finished, true otherwise. `scan` needs to have been
  1517. * started via table_beginscan_sampling().
  1518. *
  1519. * This will call the TsmRoutine's NextSampleBlock() callback if necessary
  1520. * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
  1521. * underlying relation.
  1522. */
  1523. static inline bool
  1524. table_scan_sample_next_block(TableScanDesc scan,
  1525. struct SampleScanState *scanstate)
  1526. {
  1527. return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
  1528. }
  1529. /*
  1530. * Fetch the next sample tuple into `slot` and return true if a visible tuple
  1531. * was found, false otherwise. table_scan_sample_next_block() needs to
  1532. * previously have selected a block (i.e. returned true), and no previous
  1533. * table_scan_sample_next_tuple() for the same block may have returned false.
  1534. *
  1535. * This will call the TsmRoutine's NextSampleTuple() callback.
  1536. */
  1537. static inline bool
  1538. table_scan_sample_next_tuple(TableScanDesc scan,
  1539. struct SampleScanState *scanstate,
  1540. TupleTableSlot *slot)
  1541. {
  1542. return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
  1543. slot);
  1544. }
  1545. /* ----------------------------------------------------------------------------
  1546. * Functions to make modifications a bit simpler.
  1547. * ----------------------------------------------------------------------------
  1548. */
  1549. extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
  1550. extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
  1551. Snapshot snapshot);
  1552. extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
  1553. TupleTableSlot *slot, Snapshot snapshot,
  1554. bool *update_indexes);
  1555. /* ----------------------------------------------------------------------------
  1556. * Helper functions to implement parallel scans for block oriented AMs.
  1557. * ----------------------------------------------------------------------------
  1558. */
  1559. extern Size table_block_parallelscan_estimate(Relation rel);
  1560. extern Size table_block_parallelscan_initialize(Relation rel,
  1561. ParallelTableScanDesc pscan);
  1562. extern void table_block_parallelscan_reinitialize(Relation rel,
  1563. ParallelTableScanDesc pscan);
  1564. extern BlockNumber table_block_parallelscan_nextpage(Relation rel,
  1565. ParallelBlockTableScanDesc pbscan);
  1566. extern void table_block_parallelscan_startblock_init(Relation rel,
  1567. ParallelBlockTableScanDesc pbscan);
  1568. /* ----------------------------------------------------------------------------
  1569. * Functions in tableamapi.c
  1570. * ----------------------------------------------------------------------------
  1571. */
  1572. extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
  1573. extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
  1574. extern bool check_default_table_access_method(char **newval, void **extra,
  1575. GucSource source);
  1576. #endif /* TABLEAM_H */
上海开阖软件有限公司 沪ICP备12045867号-1