-
Notifications
You must be signed in to change notification settings - Fork 0
/
kNNJoin.c
13683 lines (11193 loc) · 452 KB
/
kNNJoin.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* this extension is for building a custom scan node to implement the K-inCircle search
* algorithm mentioned in the paper
*/
#define _GNU_SOURCE
#include "postgres.h"
#include "miscadmin.h"
#include "fmgr.h"
#include "pgstat.h"
#include <sys/time.h>
#include <time.h>
#include <math.h>
#include "utils/elog.h"
#include "utils/geo_decls.h"
#include "utils/lsyscache.h"
#include "utils/relcache.h"
#include "utils/rel.h"
#include "utils/memutils.h"
#include "utils/builtins.h"
#include "utils/geo_decls.h"
#include "utils/datum.h"
#include "utils/index_selfuncs.h"
#include "utils/selfuncs.h"
#include "utils/pg_locale.h"
#include "libpq/pqformat.h" /* needed for send/recv functions */
#include "libpq/libpq.h"
#include "optimizer/planner.h"
#include "optimizer/paths.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/var.h"
#include "optimizer/planmain.h"
#include "optimizer/subselect.h"
#include "optimizer/tlist.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/placeholder.h"
#include "catalog/pg_class.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "nodes/print.h"
#include "nodes/pg_list.h"
#include "nodes/nodes.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/bitmapset.h"
#include "nodes/extensible.h"
#include "access/sysattr.h"
#include "access/parallel.h"
#include "access/xact.h"
#include "access/gist.h"
#include "access/relscan.h"
#include "access/amapi.h"
#include "access/spgist_private.h"
#include "executor/executor.h"
#include "executor/nodeIndexscan.h"
#include "rewrite/rewriteManip.h"
#include "parser/parsetree.h"
#include "storage/dsm_impl.h"
#include "storage/off.h"
#include "storage/bufpage.h"
#include "storage/buf.h"
#include "storage/bufmgr.h"
#include "storage/predicate.h"
//#ifdef PG_MODULE_MAGIC
/* >= 8.2 */
PG_MODULE_MAGIC;
//#endif
#ifndef OPTIMIZER_DEBUG
#define OPTIMIZER_DEBUG
#endif
#undef SPGISTNProc
#define SPGISTNProc 6
#define SPGIST_DISTANCE_POINT_PROC 6
// #define MY_SPGIST_POINT_DISTANCE 6
// ================================
// For hook functions
//=================================
static planner_hook_type prev_planner = NULL;
static set_rel_pathlist_hook_type prev_set_rel_pathlist = NULL;
static join_search_hook_type prev_join_search_hook = NULL;
static set_join_pathlist_hook_type prev_set_join_pathlist_hook = NULL;
static create_upper_paths_hook_type prev_create_upper_paths_hook = NULL;
static ExecutorStart_hook_type prev_ExecutorStart_hook = NULL;
static ExecutorRun_hook_type prev_ExecutorRun_hook = NULL;
static ExecutorFinish_hook_type prev_ExecutorFinish_hook = NULL;
static ExecutorEnd_hook_type prev_ExecutorEnd_hook = NULL;
void _PG_init(void);
void _PG_fini(void);
static PlannedStmt *myplanner(Query *parse, int cursorOptions,
ParamListInfo boundParams);
static void my_set_relpathlist(PlannerInfo *root,
RelOptInfo *rel,
Index rti,
RangeTblEntry *rte);
static RelOptInfo * my_join_search_hook(PlannerInfo *root,
int levels_needed,
List *initial_rels);
static void my_set_join_pathlist_hook(PlannerInfo * root,
RelOptInfo * joinrel,
RelOptInfo * outerrel,
RelOptInfo * innerrel,
JoinType jointype,
JoinPathExtraData *extra);
static void my_create_upper_paths_hook (PlannerInfo *root,
UpperRelationKind stage,
RelOptInfo *input_rel,
RelOptInfo *output_rel);
static void my_ExecutorStart_hook(QueryDesc *queryDesc, int eflags);
//Version 9.6.4
static void my_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 count);
//Version 4
//void my_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once);
static void my_ExecutorFinish_hook();
static void my_ExecutorEnd_hook();
void _PG_init(void)
{
// install hooks
elog(NOTICE, "\nKNN Hook lib is loaded\n\n\n");
prev_planner = planner_hook;
planner_hook = myplanner;
prev_set_rel_pathlist = set_rel_pathlist_hook;
set_rel_pathlist_hook = my_set_relpathlist;
prev_create_upper_paths_hook = create_upper_paths_hook;
create_upper_paths_hook = my_create_upper_paths_hook;
prev_join_search_hook = join_search_hook;
join_search_hook = my_join_search_hook;
prev_set_join_pathlist_hook = set_join_pathlist_hook;
set_join_pathlist_hook = my_set_join_pathlist_hook;
prev_ExecutorStart_hook = ExecutorStart_hook;
ExecutorStart_hook = my_ExecutorStart_hook;
prev_ExecutorRun_hook = ExecutorRun_hook;
ExecutorRun_hook = my_ExecutorRun_hook;
// prev_ExecutorFinish_hook = ExecutorFinish_hook;
// ExecutorFinish_hook = my_ExecutorFinish_hook;
// prev_ExecutorEnd_hook = ExecutorEnd_hook;
// ExecutorEnd_hook = my_ExecutorEnd_hook;
}
void
_PG_fini(void)
{
/* Uninstall hooks. */
planner_hook = prev_planner;
set_rel_pathlist_hook = prev_set_rel_pathlist;
create_upper_paths_hook = prev_create_upper_paths_hook;
join_search_hook = prev_join_search_hook;
}
//===========================================
// other DataStructure Definitions
//===========================================
typedef struct
{
pairingheap_node ph_node;
HeapTuple htup;
Datum *orderbyvals;
bool *orderbynulls;
} ReorderTuple;
//-----------------
typedef struct KInCircle_data
{
int k ;
IndexScan indexscanNode;
// OpExpr* opr; // to hold the <-> operator with both arguments
// Oid indexid; /* OID of index to scan */
} KInCircle_data;
//-----------------
//define a larger sttructure for customscanstate
typedef struct KInCircleState
{
CustomScanState base;
IndexScanState indexstate;
//List *indexorderbyorig;
//Relation iss_RelationDesc; // Index Realation Desc
Oid indexid; /* OID of index to scan */
} KInCircleState;
typedef struct CacheCustomScanState
{
CustomScanState base;
/* define the hash table for the caching */
} CacheCustomScanState;
/*
* During a GiST index search, we must maintain a queue of unvisited items,
* which can be either individual heap tuples or whole index pages. If it
* is an ordered search, the unvisited items should be visited in distance
* order. Unvisited items at the same distance should be visited in
* depth-first order, that is heap items first, then lower index pages, then
* upper index pages; this rule avoids doing extra work during a search that
* ends early due to LIMIT.
*
* To perform an ordered search, we use a pairing heap to manage the
* distance-order queue. In a non-ordered search (no order-by operators),
* we use it to return heap tuples before unvisited index pages, to
* ensure depth-first order, but all entries are otherwise considered
* equal.
*/
/* Individual heap tuple to be visited */
typedef struct SPGISTSearchHeapItem
{
ItemPointerData heapPtr;
bool recheck; /* T if quals must be rechecked */
bool recheckDistances; /* T if distances must be rechecked */
IndexTuple ftup; /* data fetched back from the index, used in
* index-only scans */
OffsetNumber offnum; /* track offset in page to mark tuple as
* LP_DEAD */
} SPGISTSearchHeapItem;
/* Unvisited item, either index page or heap tuple */
typedef struct SPGISTSearchItem
{
pairingheap_node phNode;
BlockNumber blkno; /* index page number, or InvalidBlockNumber */
ItemPointerData ptr; /* block and offset to scan from */
int level;
Point P_center; /* parent center */
Point P_min, P_max; /* corner points of parent bounding box*/
union
{
//GistNSN parentlsn; /* parent page's LSN, if index page */
/* we must store parentlsn to detect whether a split occurred */
SPGISTSearchHeapItem heap; /* heap info, if heap tuple */
} data;
double distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys
* entries */
} SPGISTSearchItem;
#define SPGISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
// #define SPGISTSearchItemIsHeap(item) ((item).ptr. == InvalidBlockNumber)
#define SizeOfSPGISTSearchItem(n_distances) (offsetof(SPGISTSearchItem, distances) + sizeof(double) * (n_distances))
typedef struct my_SpGistScanOpaqueData
{
/* This is the original defenition of the opaque data*/
SpGistState state; /* see above */
MemoryContext tempCxt; /* short-lived memory context */
/* Control flags showing whether to search nulls and/or non-nulls */
bool searchNulls; /* scan matches (all) null entries */
bool searchNonNulls; /* scan matches (some) non-null entries */
/* Index quals to be passed to opclass (null-related quals removed) */
int numberOfKeys; /* number of index qualifier conditions */
ScanKey keyData; /* array of index qualifier descriptors */
/* Stack of yet-to-be-visited pages */
List *scanStack; /* List of ScanStackEntrys */
/* These fields are only used in amgetbitmap scans: */
TIDBitmap *tbm; /* bitmap being filled */
int64 ntids; /* number of TIDs passed to bitmap */
/* These fields are only used in amgettuple scans: */
bool want_itup; /* are we reconstructing tuples? */
TupleDesc indexTupDesc; /* if so, tuple descriptor for them */
int nPtrs; /* number of TIDs found on current page */
int iPtr; /* index for scanning through same */
ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */
bool recheck[MaxIndexTuplesPerPage]; /* their recheck flags */
IndexTuple indexTups[MaxIndexTuplesPerPage]; /* reconstructed tuples */
/* --------------------------------------
* ---- ORDER By support
* -------------------------------------- */
MemoryContext scanCxt; /* context for scan-lifespan data */
Oid *orderByTypes; /* datatypes of ORDER BY expressions */
pairingheap *queue; /* queue of unvisited items */
MemoryContext queueCxt; /* context holding the queue */
bool qual_ok; /* false if qual can never be satisfied */
bool firstCall; /* true until first gistgettuple call */
/* pre-allocated workspace arrays */
double *distances; /* output area for gistindex_keytest */
/* info about killed items if any (killedItems is NULL if never used) */
OffsetNumber *killedItems; /* offset numbers of killed items */
int numKilled; /* number of currently stored items */
BlockNumber curBlkno; /* current number of block */
//GistNSN curPageLSN; /* pos in the WAL stream when page was read */
/* In a non-ordered search, returnable heap items are stored here: */
SPGISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
OffsetNumber nPageData; /* number of valid items in array */
OffsetNumber curPageData; /* next item to return */
} my_SpGistScanOpaqueData;
typedef my_SpGistScanOpaqueData *my_SpGistScanOpaque;
typedef struct mySpGistScanOpaqueData
{
/* This is the original defenition of the opaque data*/
SpGistState state; /* see above */
MemoryContext tempCxt; /* short-lived memory context */
/* Control flags showing whether to search nulls and/or non-nulls */
bool searchNulls; /* scan matches (all) null entries */
bool searchNonNulls; /* scan matches (some) non-null entries */
/* Index quals to be passed to opclass (null-related quals removed) */
int numberOfKeys; /* number of index qualifier conditions */
ScanKey keyData; /* array of index qualifier descriptors */
/* Stack of yet-to-be-visited pages */
List *scanStack; /* List of ScanStackEntrys */
/* These fields are only used in amgetbitmap scans: */
TIDBitmap *tbm; /* bitmap being filled */
int64 ntids; /* number of TIDs passed to bitmap */
/* These fields are only used in amgettuple scans: */
bool want_itup; /* are we reconstructing tuples? */
TupleDesc indexTupDesc; /* if so, tuple descriptor for them */
int nPtrs; /* number of TIDs found on current page */
int iPtr; /* index for scanning through same */
ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */
bool recheck[MaxIndexTuplesPerPage]; /* their recheck flags */
IndexTuple indexTups[MaxIndexTuplesPerPage]; /* reconstructed tuples */
/* --------------------------------------
* ---- ORDER By support
* -------------------------------------- */
MemoryContext scanCxt; /* context for scan-lifespan data */
Oid *orderByTypes; /* datatypes of ORDER BY expressions */
pairingheap *queue; /* queue of unvisited items */
MemoryContext queueCxt; /* context holding the queue */
bool qual_ok; /* false if qual can never be satisfied */
bool firstCall; /* true until first gistgettuple call */
/* pre-allocated workspace arrays */
double *distances; /* output area for gistindex_keytest */
/* info about killed items if any (killedItems is NULL if never used) */
OffsetNumber *killedItems; /* offset numbers of killed items */
int numKilled; /* number of currently stored items */
BlockNumber curBlkno; /* current number of block */
//GistNSN curPageLSN; /* pos in the WAL stream when page was read */
/* In a non-ordered search, returnable heap items are stored here: */
// SPGISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
// OffsetNumber nPageData; /* number of valid items in array */
// OffsetNumber curPageData; /* next item to return */
} mySpGistScanOpaqueData;
typedef mySpGistScanOpaqueData *mySpGistScanOpaque;
typedef struct {
CustomPath custompath;
JoinType jointype;
Path *outerjoinpath; /* path for the outer side of the join */
Path *innerjoinpath; /* path for the inner side of the join */
List *joinrestrictinfo; /* RestrictInfos to apply to join */
}KInCircleJoinPath;
//===========================================
// other functions definitions
//===========================================
PlannerInfo * my_set_subquery_pathlist(PlannerInfo *root,RelOptInfo *rel, Index rti, RangeTblEntry *rte);
void my_set_Customsubquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte);
void my_set_relpathlist2(PlannerInfo *root,RelOptInfo *rel,Index rti,RangeTblEntry *rte);
void my_set_relpathlist3(PlannerInfo *root,RelOptInfo *rel, Index rti, RangeTblEntry *rte);
void my_set_relpathlist4(PlannerInfo *root,RelOptInfo *rel,Index rti,RangeTblEntry *rte);
void my_set_relpathlist5(PlannerInfo *root,RelOptInfo *rel,Index rti,RangeTblEntry *rte);
void my_set_relpathlist6(PlannerInfo *root,RelOptInfo *rel,Index rti,RangeTblEntry *rte);
// debug functions
void my_debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
static void my_print_path(PlannerInfo *root, Path *path, int indent);
static void my_print_restrictclauses(PlannerInfo *root, List *clauses);
static void my_print_relids(PlannerInfo *root, Relids relids);
//helper functions for re-planning
void my_recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual);
void my_subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual);
void my_remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel);
// static void my_get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, QualCost *qpqual_cost);
//CustomPath * create_KInCircle_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer, int parallel_workers, OpExpr* KNN_op , int k , List *pathkeys ,Path *subpath);
CustomPath * create_KInCircle_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer, OpExpr* KNN_op , int k ,Path *subpath);
CustomPath * create_basicCustomScan_path(PlannerInfo *root, RelOptInfo *rel , Path * child_path, OpExpr* KNN_op, int k);
// CustomPath * create_cacheCustomScan_path(PlannerInfo *root, RelOptInfo *rel , List * child_path);
CustomPath * replace_cacheCustomScan_path(PlannerInfo *root, RelOptInfo *rel , Path * currPath, List * child_path);
CustomPath * create_materialCustom_path(Path* currentPath , Path* childPath);
CustomPath * create_materialCustom_path2(Path* currentPath , Path* childPath);
CustomPath * create_materialGenericCustom_path(Path* currentPath , Path* childPath);
CustomPath * create_CacheGenericCustom_path(Path* currentPath , Path* childPath);
// KInCircleJoinPath * create_kInCircle_Join_path(PlannerInfo *root, JoinPath * currentPath);
CustomPath * create_kInCircle_Join_path(PlannerInfo *root, JoinPath * currentPath);
CustomPath* create_kInCircle_Join_path2(PlannerInfo *root, JoinPath * currentPath, Path * innerPath);
static Plan * Plan_KInCirclePath(PlannerInfo *root,RelOptInfo *rel,struct CustomPath *best_path,List *tlist,List *clauses,List *custom_plans);
static Plan * Plan_BasicCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_CacheCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_CacheCustomPath2(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_MaterialCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_MaterialGenericCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_CacheGenericCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
static Plan * Plan_KInCircleJoinCustomPath(PlannerInfo *root, RelOptInfo *rel, struct CustomPath *best_path, List *tlist, List *clauses, List *custom_plans);
Node *create_KInCircleScan_state(CustomScan *cscan);
Node *create_BasicCustomScan_state(CustomScan *cscan);
Node *create_CacheCustomScan_state(CustomScan *cscan);
Node *create_CacheCustomScan_state2(CustomScan *cscan);
Node *create_MaterialCustomScan_state(CustomScan *cscan);
Node *create_MaterialGenericCustomScan_state(CustomScan *cscan);
Node *create_CacheGenericCustomScan_state(CustomScan *cscan);
Node *create_KInCircleJoinCustomScan_state(CustomScan *cscan);
void Begin_KInCircleScan (CustomScanState *node, EState *estate, int eflags);
void Begin_BasicCustomScan (CustomScanState *node, EState *estate, int eflags);
void Begin_CacheCustomScan (CustomScanState *node, EState *estate, int eflags);
void Begin_MaterialCustomScan2 (CustomScanState *node, EState *estate, int eflags);
void Begin_MaterialGenericCustomScan (CustomScanState *node, EState *estate, int eflags);
void Begin_CacheGenericCustomScan (CustomScanState *node, EState *estate, int eflags);
void Begin_KInCircleJoinCustomScan (CustomScanState *node, EState *estate, int eflags);
TupleTableSlot * Exec_KInCircleScan (CustomScanState *node);
TupleTableSlot * Exec_BasicCustomScan (CustomScanState *node);
TupleTableSlot * Exec_CacheCustomScan (CustomScanState *node);
TupleTableSlot * Exec_MaterialCustomScan (CustomScanState *node);
TupleTableSlot * Exec_MaterialGenericCustomScan (CustomScanState *node);
TupleTableSlot * Exec_CacheGenericCustomScan (CustomScanState *node);
TupleTableSlot * Exec_KInCircleJoinCustomScan (CustomScanState *node);
void End_KInCircleScan (CustomScanState *node);
void End_BasicCustomScan (CustomScanState *node);
void End_CacheCustomScan (CustomScanState *node);
void End_MaterialCustomScan (CustomScanState *node);
void End_MaterialGenericCustomScan (CustomScanState *node);
void End_CacheGenericCustomScan (CustomScanState *node);
void End_KInCircleJoinCustomScan (CustomScanState *node);
void ReScan_KInCircleScan (CustomScanState *node);
void ReScan_BasicCustomScan (CustomScanState *node);
void ReScan_CacheCustomScan (CustomScanState *node);
void ReScan_MaterialCustomScan (CustomScanState *node);
void ReScan_MaterialGenericCustomScan (CustomScanState *node);
void ReScan_CacheGenericCustomScan (CustomScanState *node);
void ReScan_KInCircleJoinCustomScan (CustomScanState *node);
static bool BasicCustomRecheck(CustomScanState *node, TupleTableSlot *slot);
static bool KInCircle_Recheck(CustomScanState *node, TupleTableSlot *slot);
static bool CacheCustomRecheck(CustomScanState *node, TupleTableSlot *slot);
static bool MaterialCustomRecheck(CustomScanState *node, TupleTableSlot *slot);
static bool MaterialGenericCustomRecheck(CustomScanState *node, TupleTableSlot *slot);
static bool CacheGenericCustomRecheck(CustomScanState *node, TupleTableSlot *slot);
static TupleTableSlot * KInCircle_Next(CustomScanState *node);
static TupleTableSlot * BasicCustomNext(CustomScanState *node);
static TupleTableSlot * CacheCustomNext(CustomScanState *node);
static TupleTableSlot * MaterialCustomNext(CustomScanState *node);
static TupleTableSlot * MaterialGenericCustomNext(CustomScanState *node);
static TupleTableSlot * CacheGenericCustomNext(CustomScanState *node);
static void Explain_CacheCustomScan(CustomScanState *node, List *ancestors, ExplainState *es);
static void Explain_MaterialGenericCustomScan(CustomScanState *node, List *ancestors, ExplainState *es);
static void Explain_CacheGenericCustomScan(CustomScanState *node, List *ancestors, ExplainState *es);
Path * walkPath( Path * pathRoot );
void walkPlan( Plan * planRoot, List ** custom_plans );
void walkPlanState( PlanState * planRoot, List ** custom_plans );
void cost_KInCircleScan(CustomPath *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info , int k);
static CustomScan * make_customScan(List *tlist, List *qual, Index scanrelid, List * custom_plans, List * custom_private);
void my_ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys);
static int pairingheap_SpGISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg);
void my_spgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys);
void myspgrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, ScanKey orderbys, int norderbys);
void my_index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys);
IndexScanDesc my_spgbeginscan(Relation rel, int keysz, int orderbysz);
IndexScanDesc myspgbeginscan(Relation rel, int keysz, int orderbysz);
// static IndexScanDesc my_index_beginscan_internal(Relation indexRelation, int nkeys, int norderbys, Snapshot snapshot);
IndexScanDesc my_index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys);
// static int cmp_orderbyvals(const Datum *adist, const bool *anulls, const Datum *bdist, const bool *bnulls, IndexScanState *node);
// static int reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg);
// static HeapTuple reorderqueue_pop(IndexScanState *node);
// static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
// static void reorderqueue_push(IndexScanState *node, HeapTuple tuple, Datum *orderbyvals, bool *orderbynulls);
// ItemPointer my_index_getnext_tid(IndexScanDesc scan, ScanDirection direction);
// HeapTuple my_index_getnext(IndexScanDesc scan, ScanDirection direction);
// static void resetSpGistScanOpaque(my_SpGistScanOpaque so);
// static void spgPrepareScanKeys(IndexScanDesc scan);
// static void spgistScanPage(IndexScanDesc scan, SPGISTSearchItem *pageItem);
/* this function scans the index and compute the distance from the bounding boxes computed by compute Bounding Box function*/
// static void spgistScanPage2(IndexScanDesc scan, SPGISTSearchItem *pageItem, bool *);
/* this function should scan the index and compute the distance from the corner points that are calculated on the fly */
static void spgistScanPage3(IndexScanDesc scan, SPGISTSearchItem *pageItem, bool *);
// bool my_spggettuple(IndexScanDesc scan, ScanDirection dir);
bool myspggettuple(IndexScanDesc scan, ScanDirection dir);
// static bool spgistindex_keytest(IndexScanDesc scan, IndexTuple tuple, Page page, OffsetNumber offset, bool *recheck_p, bool *recheck_distances_p);
// static bool spgistindex_keytest_computeDistance(IndexScanDesc scan, SpGistNodeTuple tuple, Page page, OffsetNumber offset, bool *recheck_p, bool *recheck_distances_p);
// static bool my_computeDistance(IndexScanDesc scan, SPGISTSearchItem * item, int which, bool isLeaf , Point * leafVal);
static SPGISTSearchItem * getNextSPGISTSearchItem(mySpGistScanOpaque so);
static bool my_getNextNearest(IndexScanDesc scan);
// static void spgistGetBlock(IndexScanDesc scan, SPGISTSearchItem *pageItem, bool *);
// bool my_spggettuple2(IndexScanDesc scan, ScanDirection dir);
BOX Compute_BoundingBox(ItemPointer itptr, Relation index );
void start_Compute_BoundingBox(Relation index, Oid indexid);
void Compute_BoundingBoxes(Relation index);
PG_FUNCTION_INFO_V1(myspgist_point_distance);
Datum myspgist_point_distance(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(myspghandler);
Datum myspghandler(PG_FUNCTION_ARGS);
void myspgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Cost *indexStartupCost, Cost *indexTotalCost,
Selectivity *indexSelectivity, double *indexCorrelation);
//=========================================
// Catalog builder
//=========================================
//----------------------------
/*
catalog Builder for kNN-Select Pre-filter
*/
// Build Catalog function
// Input : index name
#include "catalog/pg_am.h"
#include "catalog/namespace.h"
#define MAX_NO_LEAF_PAGE 50000
#define MAX_NO_LEAF_OFFSETS 2000
#define MAX_SIZE_KEY_CATALOG 100001
#define MAX_NO_POINTS_BLOCK 10000
#define MAX_K 10000
#define PAIRINGHEAP_DEBUG
#define max(a,b) \
({ __typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a > _b ? _a : _b; })
#define min(a,b) \
({ __typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a < _b ? _a : _b; })
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) // rd_rel relation typle from pg_class
#define IS_SPGIST(r) ((r)->rd_rel->relam == SPGIST_AM_OID)
#define SPGIST2_AM_OID 65731 // MAC
//#define SPGIST2_AM_OID 24590 // Linux Server
#define IS_SPGIST2(r) ((r)->rd_rel->relam == SPGIST2_AM_OID)
/* instead of storing the catalog as ([K1,K2], cost)
store the catalog as an array of K2 values only , and the cost array is the corresponding cost for each K
apply binary search on this ordered array to find the cost for the specific k */
typedef struct {
int size;
int key[MAX_SIZE_KEY_CATALOG];
int cost[MAX_SIZE_KEY_CATALOG];
} CATALOG;
// all the info and statistics required to be known about the data block
typedef struct {
/* related to the tree*/
BlockNumber blkno; /* index page number, or InvalidBlockNumber */
OffsetNumber offset[MAX_NO_LEAF_PAGE];
ItemPointerData ptr[MAX_NO_LEAF_PAGE]; /* block and offset to scan from */
int level;
Point P_center; /* parent center */
Point P_min, P_max; /* corner points of parent bounding box*/
/* related to the catalog-builder logic*/
double dist; // distance between this data block and the center of the block we are focus on
double dist_c1,dist_c2,dist_c3,dist_c4;
CATALOG catalog_center;
CATALOG catalog_corner_UL; // upper Left
CATALOG catalog_corner_UR; // upper Right
CATALOG catalog_corner_LL; // Lower Left
CATALOG catalog_corner_LR; // Lower Right
} dataBlock;
typedef struct {
BlockNumber blkno; /* index page number, or InvalidBlockNumber */
OffsetNumber offset;
ItemPointerData ptr; /* block and offset to scan from */
int level;
Point P_center; /* parent center */
Point P_min, P_max; /* corner points of parent bounding box*/
} stackItemData;
typedef struct
{
pairingheap_node phNode;
double dist;
Point p;
} TuplePoint_info;
typedef struct
{
pairingheap_node phNode;
double dist;
int blkno; // the block no in the data block array so I can retrieve its data
int offset;
} DataBlockHeap_info;
void init_Block_arr(void);
void ReadDataBlocks(Relation index, SpGistState *state);
static int pairingheap_dataBlockCenter_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg);
static int pairingheap_TuplePointInfo_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg);
void fill_blockQ(pairingheap * blockQ , Point q );
void FillTupleQ(pairingheap* tupleQ , int p_blkno, int p_offset , Point q, Relation index, SpGistState *state);
void Fill_catalog_center(pairingheap* blockQ, int blkno, int offset, Point q ,Relation index, SpGistState *state);
void BuildCatalogLogic(SpGistState *state , Relation index);
void print_catalog(CATALOG* _catalog);
void add_newItem_Catalog(CATALOG* _catalog, int cost , int key , int size);
void print_block_arr(Relation index , SpGistState * state);
void print_pairingheap(pairingheap *heap);
// static void print_pairingheap_recurse( pairingheap_node *node, int depth, pairingheap_node *prev_or_parent);
static TuplePoint_info * getNextTuple(pairingheap* tupleQ);
static DataBlockHeap_info * getNextDataBlock(pairingheap* blockQ);
static TuplePoint_info * Tuple_top(pairingheap* tupleQ);
static DataBlockHeap_info * DataBlock_top(pairingheap* blockQ);
PG_FUNCTION_INFO_V1(build_catalog2);
Datum build_catalog2(PG_FUNCTION_ARGS);
// dataBlock *Block_arr[MAX_NO_LEAF_PAGE][MAX_NO_LEAF_PAGE]; // list of the datablocks pointers
static dataBlock *Block_arr[MAX_NO_LEAF_PAGE]; // list of the datablocks pointers
static Point minP;
static Point maxP;
int FindPoint(Relation index , SpGistState *state , Point * queryPoint);
int myspgWalk(Oid oid , Point * queryPoint, char * indexName);
static bool searchCatalog(CATALOG* _catalog, int k, int *i);
int FindCost_catalog(int blkno , int k);
int FindCost_catalogTbl(char * indexName, BlockNumber blkno, int k);
int FindCost_catalogTbl2(char * indexName, BlockNumber blkno, int k);
int FindCost_catalogTbl_Bin(Oid indexoid, BlockNumber blkno, int k);
int ReadGrid(Oid , Point*);
//==========================================
// Function implementations
//==========================================
#define KNN_SELECT_PRE_FILTER false
#define KNN_SELECT_POST_FILTER false
#define KNN_JOIN_PRE_FILTER true
#define KNN_JOIN_POST_FILTER false
/*=====================*/
static void my_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)
{
// elog(NOTICE, "\n\n\n ExecutorStart() - start");
clock_t start, end;
double cpu_time_used;
start = clock();
standard_ExecutorStart(queryDesc, eflags);
// elog(NOTICE, "ExecutorStart() - Finish\n\n\n");
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
// elog(NOTICE, "\n\n\n ExecutorStart() - time = %f secs\n\n\n" , cpu_time_used);
printf("\n%f\n" , cpu_time_used);
}
//Postgresql version 9.6.4
static void my_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
{
// elog(NOTICE, "\n\n\n my_ExecutorRun_hook() - start");
clock_t start, end;
double cpu_time_used;
start = clock();
standard_ExecutorRun(queryDesc, direction, count);
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
// elog(NOTICE, "\n\n\n ExecutorRun() - time = %f secs\n\n\n" , cpu_time_used);
printf("\n%f\n" , cpu_time_used);
// elog(NOTICE, "my_ExecutorRun_hook() - Finish\n\n\n");
}
//PostgreSQL version 4
// void my_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 count, bool execute_once)
// {
// standard_ExecutorRun(queryDesc, direction, count, execute_once);
// }
static void my_create_upper_paths_hook (PlannerInfo *root,
UpperRelationKind stage,
RelOptInfo *input_rel,
RelOptInfo *output_rel)
{
// printf("\n------------------------------------ my_create_upper_paths_hook:\n\n");
// elog(NOTICE , "my_create_upper_paths_hook --------------- start :");
// switch(stage)
// {
// case UPPERREL_SETOP: /* result of UNION/INTERSECT/EXCEPT, if any */
// printf("UPPERREL_SETOP\n");
// break;
// case UPPERREL_GROUP_AGG: /* result of grouping/aggregation, if any */
// printf("UPPERREL_GROUP_AGG\n");
// elog(NOTICE , "my_create_upper_paths_hook --------------- UPPERREL_GROUP_AGG :");
// break;
// case UPPERREL_WINDOW: result of window functions, if any
// printf("UPPERREL_WINDOW\n");
// break;
// case UPPERREL_DISTINCT: /* result of "SELECT DISTINCT", if any */
// printf("UPPERREL_DISTINCT\n");
// break;
// case UPPERREL_ORDERED:
// printf("UPPERREL_ORDERED\n");
// elog(NOTICE , "my_create_upper_paths_hook --------------- UPPERREL_ORDERED :");
// break;
// case UPPERREL_FINAL:
// printf("UPPERREL_FINAL\n");
// elog(NOTICE , "my_create_upper_paths_hook --------------- UPPERREL_FINAL:");
// //=======================================
// this is for knn-Join pre-Filter
// I need to cache the output of the knn-Join
// {
// RelOptInfo * rel = output_rel;
// ListCell * cell2;
// Path * p = NULL;
// foreach(cell2, rel->pathlist)
// {
// if(IsA( (Path*) lfirst(cell2) , LimitPath))
// {
// p = (Path *) lfirst(cell2);
// break;
// }
// }
// if(p != NULL && IsA( p , LimitPath))
// {
// rel->pathlist = NIL;//list_delete_first(rel->pathlist);
// elog(NOTICE, "\n\nmy_create_upper_paths_hook =========== Add Material Path \n\n");
// printf("\n\nMaterial Path 1\n\n");
// pprint(p);
// MaterialPath * pp = create_material_path(rel ,(Path*) p);
// printf("\n\nMaterial Path\n\n");
// pprint(pp);
// add_path(rel, (Path *) pp);
// p = NULL;
// }
// }
//=======================================
//=======================================
// this is a trial to add materialze above the relational predicate output
// to cache the output for pre-Filter KNN-Join
// {
// RelOptInfo * rel = output_rel;
// ListCell * cell2;
// if(rel->rows == 10000)
// {
// Path * p = NULL;
// foreach(cell2, rel->pathlist)
// {
// p = (Path *) lfirst(cell2);
// elog(NOTICE, "\n\nmy_create_upper_paths_hook =========== Add Material Path \n\n");
// printf("\n\nMaterial Path 1\n\n");
// pprint(p);
// MaterialPath * pp = create_material_path(rel ,(Path*) p);
// printf("\n\nMaterial Path\n\n");
// pprint(pp);
// p = (Path *) pp;
// // add_path(rel, (Path *) pp);
// // p = NULL;
// }
// }
// }
//=======================================
//add cutom path representing K-In-Circle path instead of Limit path:
// inputs: K , operator <-> args
// int k = 6;
// OpExpr* KNN_op = NULL;
// ListCell * lc;
// ListCell * next = NULL;
// ListCell * prev = NULL;
// for(lc = list_head(output_rel->pathlist) ; lc != NULL ; lc = next)
// {
// next = lnext(lc);
// if(IsA(lfirst(lc), SubqueryScanPath))
// {
// SubqueryScanPath * pathnode = lfirst(lc);
// Path * subpath = pathnode->subpath;
// Path * p = (Path *) create_KInCircle_path(root, output_rel, NULL, KNN_op , k ,subpath);
// // TODO : assign pathtarget to input_rel->reltarget OR pass the input_rel instead of output_rel
// p->pathtarget = input_rel->reltarget;
// //^_^
// root->glob->subroots = lappend(root->glob->subroots , input_rel->subroot);
// output_rel->pathlist = list_delete_cell(output_rel->pathlist , lc , prev);
// add_path(output_rel , p);
// }
// else
// prev = lc;
// }
// break;
// default:
// break;
// }
// printf("\nmy_create_upper_paths_hook==============root\n");
// pprint(root);
// printf("\nmy_create_upper_paths_hook==============input_rel\n");
// pprint(input_rel);
// printf("\nmy_create_upper_paths_hook==============output_rel\n");
// pprint(output_rel);
}
//Version 9.6.4
static PlannedStmt *
myplanner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
// printf("\n============================================\n");
// printf("=========== myPlanner is called ===========\n");
// printf("============================================\n\n");
PlannedStmt *result;
PlannerGlobal *glob;
double tuple_fraction;
PlannerInfo *root;
RelOptInfo *final_rel;
Path *best_path;
Plan *top_plan;
ListCell *lp,
*lr;
/* Cursor options may come from caller or from DECLARE CURSOR stmt */
if (parse->utilityStmt &&
IsA(parse->utilityStmt, DeclareCursorStmt))
cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
/*
* Set up global state for this planner invocation. This data is needed
* across all levels of sub-Query that might exist in the given command,
* so we keep it in a separate struct that's linked to by each per-Query
* PlannerInfo.
*/
glob = makeNode(PlannerGlobal);
glob->boundParams = boundParams;
glob->subplans = NIL;
glob->subroots = NIL;
glob->rewindPlanIDs = NULL;
glob->finalrtable = NIL;
glob->finalrowmarks = NIL;
glob->resultRelations = NIL;
glob->relationOids = NIL;
glob->invalItems = NIL;
glob->nParamExec = 0;
glob->lastPHId = 0;
glob->lastRowMarkId = 0;
glob->lastPlanNodeId = 0;
glob->transientPlan = false;
glob->dependsOnRole = false;
/*
* Assess whether it's feasible to use parallel mode for this query. We
* can't do this in a standalone backend, or if the command will try to
* modify any data, or if this is a cursor operation, or if GUCs are set
* to values that don't permit parallelism, or if parallel-unsafe
* functions are present in the query tree.
*
* For now, we don't try to use parallel mode if we're running inside a
* parallel worker. We might eventually be able to relax this
* restriction, but for now it seems best not to have parallel workers
* trying to create their own parallel workers.
*
* We can't use parallelism in serializable mode because the predicate
* locking code is not parallel-aware. It's not catastrophic if someone
* tries to run a parallel plan in serializable mode; it just won't get
* any workers and will run serially. But it seems like a good heuristic
* to assume that the same serialization level will be in effect at plan
* time and execution time, so don't generate a parallel plan if we're in
* serializable mode.
*/
glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
IsUnderPostmaster && dynamic_shared_memory_type != DSM_IMPL_NONE &&
parse->commandType == CMD_SELECT && !parse->hasModifyingCTE &&
parse->utilityStmt == NULL && max_parallel_workers_per_gather > 0 &&
!IsParallelWorker() && !IsolationIsSerializable() &&
!has_parallel_hazard((Node *) parse, true);
/*
* glob->parallelModeNeeded should tell us whether it's necessary to
* impose the parallel mode restrictions, but we don't actually want to
* impose them unless we choose a parallel plan, so it is normally set
* only if a parallel plan is chosen (see create_gather_plan). That way,
* people who mislabel their functions but don't use parallelism anyway
* aren't harmed. But when force_parallel_mode is set, we enable the
* restrictions whenever possible for testing purposes.
*/
glob->parallelModeNeeded = glob->parallelModeOK &&
(force_parallel_mode != FORCE_PARALLEL_OFF);
/* Determine what fraction of the plan is likely to be scanned */
if (cursorOptions & CURSOR_OPT_FAST_PLAN)
{
/*
* We have no real idea how many tuples the user will ultimately FETCH
* from a cursor, but it is often the case that he doesn't want 'em