gooderp18绿色标准版
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

555 lines
19KB

  1. /*-------------------------------------------------------------------------
  2. *
  3. * gist_private.h
  4. * private declarations for GiST -- declarations related to the
  5. * internal implementation of GiST, not the public API
  6. *
  7. * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  8. * Portions Copyright (c) 1994, Regents of the University of California
  9. *
  10. * src/include/access/gist_private.h
  11. *
  12. *-------------------------------------------------------------------------
  13. */
  14. #ifndef GIST_PRIVATE_H
  15. #define GIST_PRIVATE_H
  16. #include "access/amapi.h"
  17. #include "access/gist.h"
  18. #include "access/itup.h"
  19. #include "fmgr.h"
  20. #include "lib/pairingheap.h"
  21. #include "storage/bufmgr.h"
  22. #include "storage/buffile.h"
  23. #include "utils/hsearch.h"
  24. #include "access/genam.h"
  25. /*
  26. * Maximum number of "halves" a page can be split into in one operation.
  27. * Typically a split produces 2 halves, but can be more if keys have very
  28. * different lengths, or when inserting multiple keys in one operation (as
  29. * when inserting downlinks to an internal node). There is no theoretical
  30. * limit on this, but in practice if you get more than a handful page halves
  31. * in one split, there's something wrong with the opclass implementation.
  32. * GIST_MAX_SPLIT_PAGES is an arbitrary limit on that, used to size some
  33. * local arrays used during split. Note that there is also a limit on the
  34. * number of buffers that can be held locked at a time, MAX_SIMUL_LWLOCKS,
  35. * so if you raise this higher than that limit, you'll just get a different
  36. * error.
  37. */
  38. #define GIST_MAX_SPLIT_PAGES 75
  39. /* Buffer lock modes */
  40. #define GIST_SHARE BUFFER_LOCK_SHARE
  41. #define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
  42. #define GIST_UNLOCK BUFFER_LOCK_UNLOCK
  43. typedef struct
  44. {
  45. BlockNumber prev;
  46. uint32 freespace;
  47. char tupledata[FLEXIBLE_ARRAY_MEMBER];
  48. } GISTNodeBufferPage;
  49. #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
  50. /* Returns free space in node buffer page */
  51. #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
  52. /* Checks if node buffer page is empty */
  53. #define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
  54. /* Checks if node buffers page don't contain sufficient space for index tuple */
  55. #define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
  56. MAXALIGN(IndexTupleSize(itup)))
  57. /*
  58. * GISTSTATE: information needed for any GiST index operation
  59. *
  60. * This struct retains call info for the index's opclass-specific support
  61. * functions (per index column), plus the index's tuple descriptor.
  62. *
  63. * scanCxt holds the GISTSTATE itself as well as any data that lives for the
  64. * lifetime of the index operation. We pass this to the support functions
  65. * via fn_mcxt, so that they can store scan-lifespan data in it. The
  66. * functions are invoked in tempCxt, which is typically short-lifespan
  67. * (that is, it's reset after each tuple). However, tempCxt can be the same
  68. * as scanCxt if we're not bothering with per-tuple context resets.
  69. */
  70. typedef struct GISTSTATE
  71. {
  72. MemoryContext scanCxt; /* context for scan-lifespan data */
  73. MemoryContext tempCxt; /* short-term context for calling functions */
  74. TupleDesc leafTupdesc; /* index's tuple descriptor */
  75. TupleDesc nonLeafTupdesc; /* truncated tuple descriptor for non-leaf
  76. * pages */
  77. TupleDesc fetchTupdesc; /* tuple descriptor for tuples returned in an
  78. * index-only scan */
  79. FmgrInfo consistentFn[INDEX_MAX_KEYS];
  80. FmgrInfo unionFn[INDEX_MAX_KEYS];
  81. FmgrInfo compressFn[INDEX_MAX_KEYS];
  82. FmgrInfo decompressFn[INDEX_MAX_KEYS];
  83. FmgrInfo penaltyFn[INDEX_MAX_KEYS];
  84. FmgrInfo picksplitFn[INDEX_MAX_KEYS];
  85. FmgrInfo equalFn[INDEX_MAX_KEYS];
  86. FmgrInfo distanceFn[INDEX_MAX_KEYS];
  87. FmgrInfo fetchFn[INDEX_MAX_KEYS];
  88. /* Collations to pass to the support functions */
  89. Oid supportCollation[INDEX_MAX_KEYS];
  90. } GISTSTATE;
  91. /*
  92. * During a GiST index search, we must maintain a queue of unvisited items,
  93. * which can be either individual heap tuples or whole index pages. If it
  94. * is an ordered search, the unvisited items should be visited in distance
  95. * order. Unvisited items at the same distance should be visited in
  96. * depth-first order, that is heap items first, then lower index pages, then
  97. * upper index pages; this rule avoids doing extra work during a search that
  98. * ends early due to LIMIT.
  99. *
  100. * To perform an ordered search, we use a pairing heap to manage the
  101. * distance-order queue. In a non-ordered search (no order-by operators),
  102. * we use it to return heap tuples before unvisited index pages, to
  103. * ensure depth-first order, but all entries are otherwise considered
  104. * equal.
  105. */
  106. /* Individual heap tuple to be visited */
  107. typedef struct GISTSearchHeapItem
  108. {
  109. ItemPointerData heapPtr;
  110. bool recheck; /* T if quals must be rechecked */
  111. bool recheckDistances; /* T if distances must be rechecked */
  112. HeapTuple recontup; /* data reconstructed from the index, used in
  113. * index-only scans */
  114. OffsetNumber offnum; /* track offset in page to mark tuple as
  115. * LP_DEAD */
  116. } GISTSearchHeapItem;
  117. /* Unvisited item, either index page or heap tuple */
  118. typedef struct GISTSearchItem
  119. {
  120. pairingheap_node phNode;
  121. BlockNumber blkno; /* index page number, or InvalidBlockNumber */
  122. union
  123. {
  124. GistNSN parentlsn; /* parent page's LSN, if index page */
  125. /* we must store parentlsn to detect whether a split occurred */
  126. GISTSearchHeapItem heap; /* heap info, if heap tuple */
  127. } data;
  128. /* numberOfOrderBys entries */
  129. IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
  130. } GISTSearchItem;
  131. #define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
  132. #define SizeOfGISTSearchItem(n_distances) \
  133. (offsetof(GISTSearchItem, distances) + \
  134. sizeof(IndexOrderByDistance) * (n_distances))
  135. /*
  136. * GISTScanOpaqueData: private state for a scan of a GiST index
  137. */
  138. typedef struct GISTScanOpaqueData
  139. {
  140. GISTSTATE *giststate; /* index information, see above */
  141. Oid *orderByTypes; /* datatypes of ORDER BY expressions */
  142. pairingheap *queue; /* queue of unvisited items */
  143. MemoryContext queueCxt; /* context holding the queue */
  144. bool qual_ok; /* false if qual can never be satisfied */
  145. bool firstCall; /* true until first gistgettuple call */
  146. /* pre-allocated workspace arrays */
  147. IndexOrderByDistance *distances; /* output area for gistindex_keytest */
  148. /* info about killed items if any (killedItems is NULL if never used) */
  149. OffsetNumber *killedItems; /* offset numbers of killed items */
  150. int numKilled; /* number of currently stored items */
  151. BlockNumber curBlkno; /* current number of block */
  152. GistNSN curPageLSN; /* pos in the WAL stream when page was read */
  153. /* In a non-ordered search, returnable heap items are stored here: */
  154. GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
  155. OffsetNumber nPageData; /* number of valid items in array */
  156. OffsetNumber curPageData; /* next item to return */
  157. MemoryContext pageDataCxt; /* context holding the fetched tuples, for
  158. * index-only scans */
  159. } GISTScanOpaqueData;
  160. typedef GISTScanOpaqueData *GISTScanOpaque;
  161. /* despite the name, gistxlogPage is not part of any xlog record */
  162. typedef struct gistxlogPage
  163. {
  164. BlockNumber blkno;
  165. int num; /* number of index tuples following */
  166. } gistxlogPage;
  167. /* SplitedPageLayout - gistSplit function result */
  168. typedef struct SplitedPageLayout
  169. {
  170. gistxlogPage block;
  171. IndexTupleData *list;
  172. int lenlist;
  173. IndexTuple itup; /* union key for page */
  174. Page page; /* to operate */
  175. Buffer buffer; /* to write after all proceed */
  176. struct SplitedPageLayout *next;
  177. } SplitedPageLayout;
  178. /*
  179. * GISTInsertStack used for locking buffers and transfer arguments during
  180. * insertion
  181. */
  182. typedef struct GISTInsertStack
  183. {
  184. /* current page */
  185. BlockNumber blkno;
  186. Buffer buffer;
  187. Page page;
  188. /*
  189. * log sequence number from page->lsn to recognize page update and compare
  190. * it with page's nsn to recognize page split
  191. */
  192. GistNSN lsn;
  193. /*
  194. * If set, we split the page while descending the tree to find an
  195. * insertion target. It means that we need to retry from the parent,
  196. * because the downlink of this page might no longer cover the new key.
  197. */
  198. bool retry_from_parent;
  199. /* offset of the downlink in the parent page, that points to this page */
  200. OffsetNumber downlinkoffnum;
  201. /* pointer to parent */
  202. struct GISTInsertStack *parent;
  203. } GISTInsertStack;
  204. /* Working state and results for multi-column split logic in gistsplit.c */
  205. typedef struct GistSplitVector
  206. {
  207. GIST_SPLITVEC splitVector; /* passed to/from user PickSplit method */
  208. Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in
  209. * splitVector.spl_left */
  210. bool spl_lisnull[INDEX_MAX_KEYS];
  211. Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in
  212. * splitVector.spl_right */
  213. bool spl_risnull[INDEX_MAX_KEYS];
  214. bool *spl_dontcare; /* flags tuples which could go to either side
  215. * of the split for zero penalty */
  216. } GistSplitVector;
  217. typedef struct
  218. {
  219. Relation r;
  220. Relation heapRel;
  221. Size freespace; /* free space to be left */
  222. bool is_build;
  223. GISTInsertStack *stack;
  224. } GISTInsertState;
  225. /* root page of a gist index */
  226. #define GIST_ROOT_BLKNO 0
  227. /*
  228. * Before PostgreSQL 9.1, we used to rely on so-called "invalid tuples" on
  229. * inner pages to finish crash recovery of incomplete page splits. If a crash
  230. * happened in the middle of a page split, so that the downlink pointers were
  231. * not yet inserted, crash recovery inserted a special downlink pointer. The
  232. * semantics of an invalid tuple was that it if you encounter one in a scan,
  233. * it must always be followed, because we don't know if the tuples on the
  234. * child page match or not.
  235. *
  236. * We no longer create such invalid tuples, we now mark the left-half of such
  237. * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
  238. * split properly the next time we need to insert on that page. To retain
  239. * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
  240. * the offset number of all inner tuples. If we encounter any invalid tuples
  241. * with 0xfffe during insertion, we throw an error, though scans still handle
  242. * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
  243. * gist index which already has invalid tuples in it because of a crash. That
  244. * should be rare, and you are recommended to REINDEX anyway if you have any
  245. * invalid tuples in an index, so throwing an error is as far as we go with
  246. * supporting that.
  247. */
  248. #define TUPLE_IS_VALID 0xffff
  249. #define TUPLE_IS_INVALID 0xfffe
  250. #define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
  251. #define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
  252. /*
  253. * A buffer attached to an internal node, used when building an index in
  254. * buffering mode.
  255. */
  256. typedef struct
  257. {
  258. BlockNumber nodeBlocknum; /* index block # this buffer is for */
  259. int32 blocksCount; /* current # of blocks occupied by buffer */
  260. BlockNumber pageBlocknum; /* temporary file block # */
  261. GISTNodeBufferPage *pageBuffer; /* in-memory buffer page */
  262. /* is this buffer queued for emptying? */
  263. bool queuedForEmptying;
  264. /* is this a temporary copy, not in the hash table? */
  265. bool isTemp;
  266. int level; /* 0 == leaf */
  267. } GISTNodeBuffer;
  268. /*
  269. * Does specified level have buffers? (Beware of multiple evaluation of
  270. * arguments.)
  271. */
  272. #define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
  273. ((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
  274. (nlevel) != (gfbb)->rootlevel)
  275. /* Is specified buffer at least half-filled (should be queued for emptying)? */
  276. #define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
  277. ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
  278. /*
  279. * Is specified buffer full? Our buffers can actually grow indefinitely,
  280. * beyond the "maximum" size, so this just means whether the buffer has grown
  281. * beyond the nominal maximum size.
  282. */
  283. #define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
  284. ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
  285. /*
  286. * Data structure with general information about build buffers.
  287. */
  288. typedef struct GISTBuildBuffers
  289. {
  290. /* Persistent memory context for the buffers and metadata. */
  291. MemoryContext context;
  292. BufFile *pfile; /* Temporary file to store buffers in */
  293. long nFileBlocks; /* Current size of the temporary file */
  294. /*
  295. * resizable array of free blocks.
  296. */
  297. long *freeBlocks;
  298. int nFreeBlocks; /* # of currently free blocks in the array */
  299. int freeBlocksLen; /* current allocated length of the array */
  300. /* Hash for buffers by block number */
  301. HTAB *nodeBuffersTab;
  302. /* List of buffers scheduled for emptying */
  303. List *bufferEmptyingQueue;
  304. /*
  305. * Parameters to the buffering build algorithm. levelStep determines which
  306. * levels in the tree have buffers, and pagesPerBuffer determines how
  307. * large each buffer is.
  308. */
  309. int levelStep;
  310. int pagesPerBuffer;
  311. /* Array of lists of buffers on each level, for final emptying */
  312. List **buffersOnLevels;
  313. int buffersOnLevelsLen;
  314. /*
  315. * Dynamically-sized array of buffers that currently have their last page
  316. * loaded in main memory.
  317. */
  318. GISTNodeBuffer **loadedBuffers;
  319. int loadedBuffersCount; /* # of entries in loadedBuffers */
  320. int loadedBuffersLen; /* allocated size of loadedBuffers */
  321. /* Level of the current root node (= height of the index tree - 1) */
  322. int rootlevel;
  323. } GISTBuildBuffers;
  324. /*
  325. * Storage type for GiST's reloptions
  326. */
  327. typedef struct GiSTOptions
  328. {
  329. int32 vl_len_; /* varlena header (do not touch directly!) */
  330. int fillfactor; /* page fill factor in percent (0..100) */
  331. int bufferingModeOffset; /* use buffering build? */
  332. } GiSTOptions;
  333. /* gist.c */
  334. extern void gistbuildempty(Relation index);
  335. extern bool gistinsert(Relation r, Datum *values, bool *isnull,
  336. ItemPointer ht_ctid, Relation heapRel,
  337. IndexUniqueCheck checkUnique,
  338. struct IndexInfo *indexInfo);
  339. extern MemoryContext createTempGistContext(void);
  340. extern GISTSTATE *initGISTstate(Relation index);
  341. extern void freeGISTstate(GISTSTATE *giststate);
  342. extern void gistdoinsert(Relation r,
  343. IndexTuple itup,
  344. Size freespace,
  345. GISTSTATE *GISTstate,
  346. Relation heapRel,
  347. bool is_build);
  348. /* A List of these is returned from gistplacetopage() in *splitinfo */
  349. typedef struct
  350. {
  351. Buffer buf; /* the split page "half" */
  352. IndexTuple downlink; /* downlink for this half. */
  353. } GISTPageSplitInfo;
  354. extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
  355. Buffer buffer,
  356. IndexTuple *itup, int ntup,
  357. OffsetNumber oldoffnum, BlockNumber *newblkno,
  358. Buffer leftchildbuf,
  359. List **splitinfo,
  360. bool markleftchild,
  361. Relation heapRel,
  362. bool is_build);
  363. extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
  364. int len, GISTSTATE *giststate);
  365. /* gistxlog.c */
  366. extern XLogRecPtr gistXLogPageDelete(Buffer buffer,
  367. FullTransactionId xid, Buffer parentBuffer,
  368. OffsetNumber downlinkOffset);
  369. extern void gistXLogPageReuse(Relation rel, BlockNumber blkno,
  370. FullTransactionId latestRemovedXid);
  371. extern XLogRecPtr gistXLogUpdate(Buffer buffer,
  372. OffsetNumber *todelete, int ntodelete,
  373. IndexTuple *itup, int ntup,
  374. Buffer leftchild);
  375. extern XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete,
  376. int ntodelete, TransactionId latestRemovedXid);
  377. extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
  378. SplitedPageLayout *dist,
  379. BlockNumber origrlink, GistNSN oldnsn,
  380. Buffer leftchild, bool markfollowright);
  381. /* gistget.c */
  382. extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
  383. extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
  384. extern bool gistcanreturn(Relation index, int attno);
  385. /* gistvalidate.c */
  386. extern bool gistvalidate(Oid opclassoid);
  387. /* gistutil.c */
  388. #define GiSTPageSize \
  389. ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
  390. #define GIST_MIN_FILLFACTOR 10
  391. #define GIST_DEFAULT_FILLFACTOR 90
  392. extern bytea *gistoptions(Datum reloptions, bool validate);
  393. extern bool gistproperty(Oid index_oid, int attno,
  394. IndexAMProperty prop, const char *propname,
  395. bool *res, bool *isnull);
  396. extern bool gistfitpage(IndexTuple *itvec, int len);
  397. extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace);
  398. extern void gistcheckpage(Relation rel, Buffer buf);
  399. extern Buffer gistNewBuffer(Relation r);
  400. extern bool gistPageRecyclable(Page page);
  401. extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
  402. OffsetNumber off);
  403. extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
  404. extern IndexTuple *gistjoinvector(IndexTuple *itvec, int *len,
  405. IndexTuple *additvec, int addlen);
  406. extern IndexTupleData *gistfillitupvec(IndexTuple *vec, int veclen, int *memlen);
  407. extern IndexTuple gistunion(Relation r, IndexTuple *itvec,
  408. int len, GISTSTATE *giststate);
  409. extern IndexTuple gistgetadjusted(Relation r,
  410. IndexTuple oldtup,
  411. IndexTuple addtup,
  412. GISTSTATE *giststate);
  413. extern IndexTuple gistFormTuple(GISTSTATE *giststate,
  414. Relation r, Datum *attdata, bool *isnull, bool isleaf);
  415. extern OffsetNumber gistchoose(Relation r, Page p,
  416. IndexTuple it,
  417. GISTSTATE *giststate);
  418. extern void GISTInitBuffer(Buffer b, uint32 f);
  419. extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
  420. Datum k, Relation r, Page pg, OffsetNumber o,
  421. bool l, bool isNull);
  422. extern float gistpenalty(GISTSTATE *giststate, int attno,
  423. GISTENTRY *key1, bool isNull1,
  424. GISTENTRY *key2, bool isNull2);
  425. extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
  426. Datum *attr, bool *isnull);
  427. extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
  428. extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
  429. OffsetNumber o, GISTENTRY *attdata, bool *isnull);
  430. extern HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r,
  431. IndexTuple tuple);
  432. extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
  433. GISTENTRY *entry1, bool isnull1,
  434. GISTENTRY *entry2, bool isnull2,
  435. Datum *dst, bool *dstisnull);
  436. extern XLogRecPtr gistGetFakeLSN(Relation rel);
  437. /* gistvacuum.c */
  438. extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info,
  439. IndexBulkDeleteResult *stats,
  440. IndexBulkDeleteCallback callback,
  441. void *callback_state);
  442. extern IndexBulkDeleteResult *gistvacuumcleanup(IndexVacuumInfo *info,
  443. IndexBulkDeleteResult *stats);
  444. /* gistsplit.c */
  445. extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
  446. int len, GISTSTATE *giststate,
  447. GistSplitVector *v,
  448. int attno);
  449. /* gistbuild.c */
  450. extern IndexBuildResult *gistbuild(Relation heap, Relation index,
  451. struct IndexInfo *indexInfo);
  452. extern void gistValidateBufferingOption(const char *value);
  453. /* gistbuildbuffers.c */
  454. extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep,
  455. int maxLevel);
  456. extern GISTNodeBuffer *gistGetNodeBuffer(GISTBuildBuffers *gfbb,
  457. GISTSTATE *giststate,
  458. BlockNumber blkno, int level);
  459. extern void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb,
  460. GISTNodeBuffer *nodeBuffer, IndexTuple item);
  461. extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb,
  462. GISTNodeBuffer *nodeBuffer, IndexTuple *item);
  463. extern void gistFreeBuildBuffers(GISTBuildBuffers *gfbb);
  464. extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb,
  465. GISTSTATE *giststate, Relation r,
  466. int level, Buffer buffer,
  467. List *splitinfo);
  468. extern void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb);
  469. #endif /* GIST_PRIVATE_H */
上海开阖软件有限公司 沪ICP备12045867号-1