xref: /petsc/src/vec/is/utils/kdtree.c (revision 98480730077ed2eb9d6c4a96b667530324fc8426)
1*98480730SJames Wright #include <petsc.h>
2*98480730SJames Wright #include <petscis.h>
3*98480730SJames Wright #include <petsc/private/petscimpl.h>
4*98480730SJames Wright 
5*98480730SJames Wright typedef struct {
6*98480730SJames Wright   PetscInt   axis;                                // Could make this into a uint8_t to save on memory and bandwidth?
7*98480730SJames Wright   PetscBool  is_greater_leaf, is_less_equal_leaf; // Could possibly use PetscBT to save on memory and bandwidth?
8*98480730SJames Wright   PetscReal  split;
9*98480730SJames Wright   PetscCount greater_handle, less_equal_handle;
10*98480730SJames Wright } KDStem;
11*98480730SJames Wright 
12*98480730SJames Wright typedef struct {
13*98480730SJames Wright   PetscInt   count;
14*98480730SJames Wright   PetscCount indices_handle, coords_handle;
15*98480730SJames Wright } KDLeaf;
16*98480730SJames Wright 
17*98480730SJames Wright struct _n_PetscKDTree {
18*98480730SJames Wright   PetscInt dim;
19*98480730SJames Wright   PetscInt max_bucket_size;
20*98480730SJames Wright 
21*98480730SJames Wright   PetscBool  is_root_leaf;
22*98480730SJames Wright   PetscCount root_handle;
23*98480730SJames Wright 
24*98480730SJames Wright   PetscCount       num_coords, num_leaves, num_stems, num_bucket_indices;
25*98480730SJames Wright   const PetscReal *coords, *coords_owned; // Only free owned on Destroy
26*98480730SJames Wright   KDLeaf          *leaves;
27*98480730SJames Wright   KDStem          *stems;
28*98480730SJames Wright   PetscCount      *bucket_indices;
29*98480730SJames Wright };
30*98480730SJames Wright 
31*98480730SJames Wright /*@C
32*98480730SJames Wright   PetscKDTreeDestroy - destroy a `PetscKDTree`
33*98480730SJames Wright 
34*98480730SJames Wright   Not Collective, No Fortran Support
35*98480730SJames Wright 
36*98480730SJames Wright   Input Parameters:
37*98480730SJames Wright . tree - tree to destroy
38*98480730SJames Wright 
39*98480730SJames Wright   Level: advanced
40*98480730SJames Wright 
41*98480730SJames Wright .seealso: `PetscKDTree`, `PetscKDTreeCreate()`
42*98480730SJames Wright @*/
43*98480730SJames Wright PetscErrorCode PetscKDTreeDestroy(PetscKDTree *tree)
44*98480730SJames Wright {
45*98480730SJames Wright   PetscFunctionBeginUser;
46*98480730SJames Wright   if (*tree == NULL) PetscFunctionReturn(PETSC_SUCCESS);
47*98480730SJames Wright   PetscCall(PetscFree((*tree)->stems));
48*98480730SJames Wright   PetscCall(PetscFree((*tree)->leaves));
49*98480730SJames Wright   PetscCall(PetscFree((*tree)->bucket_indices));
50*98480730SJames Wright   PetscCall(PetscFree((*tree)->coords_owned));
51*98480730SJames Wright   PetscCall(PetscFree(*tree));
52*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
53*98480730SJames Wright }
54*98480730SJames Wright 
55*98480730SJames Wright PetscLogEvent         PetscKDTree_Build, PetscKDTree_Query;
56*98480730SJames Wright static PetscErrorCode PetscKDTreeRegisterLogEvents()
57*98480730SJames Wright {
58*98480730SJames Wright   static PetscBool is_initialized = PETSC_FALSE;
59*98480730SJames Wright 
60*98480730SJames Wright   PetscFunctionBeginUser;
61*98480730SJames Wright   if (is_initialized) PetscFunctionReturn(PETSC_SUCCESS);
62*98480730SJames Wright   PetscCall(PetscLogEventRegister("KDTreeBuild", IS_CLASSID, &PetscKDTree_Build));
63*98480730SJames Wright   PetscCall(PetscLogEventRegister("KDTreeQuery", IS_CLASSID, &PetscKDTree_Query));
64*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
65*98480730SJames Wright }
66*98480730SJames Wright 
67*98480730SJames Wright // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
68*98480730SJames Wright static inline uint32_t RoundToNextPowerOfTwo(uint32_t v)
69*98480730SJames Wright {
70*98480730SJames Wright   v--;
71*98480730SJames Wright   v |= v >> 1;
72*98480730SJames Wright   v |= v >> 2;
73*98480730SJames Wright   v |= v >> 4;
74*98480730SJames Wright   v |= v >> 8;
75*98480730SJames Wright   v |= v >> 16;
76*98480730SJames Wright   v++;
77*98480730SJames Wright   return v;
78*98480730SJames Wright }
79*98480730SJames Wright 
80*98480730SJames Wright typedef struct {
81*98480730SJames Wright   PetscInt    initial_axis;
82*98480730SJames Wright   PetscKDTree tree;
83*98480730SJames Wright } *KDTreeSortContext;
84*98480730SJames Wright 
85*98480730SJames Wright // Sort nodes based on "superkey"
86*98480730SJames Wright // See "Building a Balanced k-d Tree in O(kn log n) Time" https://jcgt.org/published/0004/01/03/
87*98480730SJames Wright static inline int PetscKDTreeSortFunc(PetscCount left, PetscCount right, PetscKDTree tree, PetscInt axis)
88*98480730SJames Wright {
89*98480730SJames Wright   const PetscReal *coords = tree->coords;
90*98480730SJames Wright   const PetscInt   dim    = tree->dim;
91*98480730SJames Wright 
92*98480730SJames Wright   for (PetscInt i = 0; i < dim; i++) {
93*98480730SJames Wright     PetscReal diff = coords[left * dim + axis] - coords[right * dim + axis];
94*98480730SJames Wright     if (PetscUnlikely(diff == 0)) {
95*98480730SJames Wright       axis = (axis + 1) % dim;
96*98480730SJames Wright       continue;
97*98480730SJames Wright     } else return PetscSign(diff);
98*98480730SJames Wright   }
99*98480730SJames Wright   return 0; // All components are the same
100*98480730SJames Wright }
101*98480730SJames Wright 
102*98480730SJames Wright static int PetscKDTreeTimSort(const void *l, const void *r, void *ctx)
103*98480730SJames Wright {
104*98480730SJames Wright   KDTreeSortContext kd_ctx = (KDTreeSortContext)ctx;
105*98480730SJames Wright   return PetscKDTreeSortFunc(*(PetscCount *)l, *(PetscCount *)r, kd_ctx->tree, kd_ctx->initial_axis);
106*98480730SJames Wright }
107*98480730SJames Wright 
108*98480730SJames Wright static PetscErrorCode PetscKDTreeVerifySortedIndices(PetscKDTree tree, PetscCount sorted_indices[], PetscCount temp[], PetscCount start, PetscCount end)
109*98480730SJames Wright {
110*98480730SJames Wright   PetscCount num_coords = tree->num_coords, range_size = end - start, location;
111*98480730SJames Wright   PetscBool  has_duplicates;
112*98480730SJames Wright 
113*98480730SJames Wright   PetscFunctionBeginUser;
114*98480730SJames Wright   PetscCall(PetscArraycpy(temp, &sorted_indices[0 * num_coords + start], range_size));
115*98480730SJames Wright   PetscCall(PetscSortCount(range_size, temp));
116*98480730SJames Wright   PetscCall(PetscSortedCheckDupsCount(range_size, temp, &has_duplicates));
117*98480730SJames Wright   PetscCheck(has_duplicates == PETSC_FALSE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sorted indices must have unique entries, but found duplicates");
118*98480730SJames Wright   for (PetscInt d = 1; d < tree->dim; d++) {
119*98480730SJames Wright     for (PetscCount i = start; i < end; i++) {
120*98480730SJames Wright       PetscCall(PetscFindCount(sorted_indices[d * num_coords + i], range_size, temp, &location));
121*98480730SJames Wright       PetscCheck(location > -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Sorted indices are not consistent. Could not find %" PetscCount_FMT " from %" PetscInt_FMT " dimensional index in 0th dimension", sorted_indices[d * num_coords + i], d);
122*98480730SJames Wright     }
123*98480730SJames Wright   }
124*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
125*98480730SJames Wright }
126*98480730SJames Wright 
127*98480730SJames Wright typedef struct {
128*98480730SJames Wright   PetscKDTree    tree;
129*98480730SJames Wright   PetscSegBuffer stems, leaves, bucket_indices, bucket_coords;
130*98480730SJames Wright   PetscBool      debug_build, copy_coords;
131*98480730SJames Wright } *KDTreeBuild;
132*98480730SJames Wright 
133*98480730SJames Wright // The range is end exclusive, so [start,end).
134*98480730SJames Wright static PetscErrorCode PetscKDTreeBuildStemAndLeaves(KDTreeBuild kd_build, PetscCount sorted_indices[], PetscCount temp[], PetscCount start, PetscCount end, PetscInt depth, PetscBool *is_node_leaf, PetscCount *node_handle)
135*98480730SJames Wright {
136*98480730SJames Wright   PetscKDTree tree = kd_build->tree;
137*98480730SJames Wright   PetscInt    dim  = tree->dim;
138*98480730SJames Wright 
139*98480730SJames Wright   PetscFunctionBeginUser;
140*98480730SJames Wright   PetscCheck(start <= end, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Start %" PetscCount_FMT " must be less than or equal to end %" PetscCount_FMT, start, end);
141*98480730SJames Wright   if (kd_build->debug_build) PetscCall(PetscKDTreeVerifySortedIndices(tree, sorted_indices, temp, start, end));
142*98480730SJames Wright   if (end - start <= tree->max_bucket_size) {
143*98480730SJames Wright     KDLeaf     *leaf;
144*98480730SJames Wright     PetscCount *bucket_indices;
145*98480730SJames Wright 
146*98480730SJames Wright     PetscCall(PetscSegBufferGetSize(kd_build->leaves, node_handle));
147*98480730SJames Wright     PetscCall(PetscSegBufferGet(kd_build->leaves, 1, &leaf));
148*98480730SJames Wright     PetscCall(PetscMemzero(leaf, sizeof(KDLeaf)));
149*98480730SJames Wright     *is_node_leaf = PETSC_TRUE;
150*98480730SJames Wright 
151*98480730SJames Wright     PetscCall(PetscIntCast(end - start, &leaf->count));
152*98480730SJames Wright     PetscCall(PetscSegBufferGetSize(kd_build->bucket_indices, &leaf->indices_handle));
153*98480730SJames Wright     PetscCall(PetscSegBufferGet(kd_build->bucket_indices, leaf->count, &bucket_indices));
154*98480730SJames Wright     PetscCall(PetscArraycpy(bucket_indices, &sorted_indices[start], leaf->count));
155*98480730SJames Wright     if (kd_build->copy_coords) {
156*98480730SJames Wright       PetscReal *bucket_coords;
157*98480730SJames Wright       PetscCall(PetscSegBufferGetSize(kd_build->bucket_coords, &leaf->coords_handle));
158*98480730SJames Wright       PetscCall(PetscSegBufferGet(kd_build->bucket_coords, leaf->count * dim, &bucket_coords));
159*98480730SJames Wright       // Coords are saved in axis-major ordering for better vectorization
160*98480730SJames Wright       for (PetscCount i = 0; i < leaf->count; i++) {
161*98480730SJames Wright         for (PetscInt d = 0; d < dim; d++) bucket_coords[d * leaf->count + i] = tree->coords[bucket_indices[i] * dim + d];
162*98480730SJames Wright       }
163*98480730SJames Wright     } else leaf->coords_handle = -1;
164*98480730SJames Wright   } else {
165*98480730SJames Wright     KDStem    *stem;
166*98480730SJames Wright     PetscCount num_coords = tree->num_coords;
167*98480730SJames Wright     PetscInt   axis       = depth % dim;
168*98480730SJames Wright     PetscCount median     = start + PetscCeilInt64(end - start, 2) - 1, lower;
169*98480730SJames Wright     PetscCount median_idx = sorted_indices[median], medianp1_idx = sorted_indices[median + 1];
170*98480730SJames Wright 
171*98480730SJames Wright     PetscCall(PetscSegBufferGetSize(kd_build->stems, node_handle));
172*98480730SJames Wright     PetscCall(PetscSegBufferGet(kd_build->stems, 1, &stem));
173*98480730SJames Wright     PetscCall(PetscMemzero(stem, sizeof(KDStem)));
174*98480730SJames Wright     *is_node_leaf = PETSC_FALSE;
175*98480730SJames Wright 
176*98480730SJames Wright     stem->axis = axis;
177*98480730SJames Wright     // Place split halfway between the "boundary" nodes of the partitioning
178*98480730SJames Wright     stem->split = (tree->coords[tree->dim * median_idx + axis] + tree->coords[tree->dim * medianp1_idx + axis]) / 2;
179*98480730SJames Wright     PetscCall(PetscArraycpy(temp, &sorted_indices[0 * num_coords + start], end - start));
180*98480730SJames Wright     lower = median; // Set lower in case dim == 1
181*98480730SJames Wright     for (PetscInt d = 1; d < tree->dim; d++) {
182*98480730SJames Wright       PetscCount upper = median;
183*98480730SJames Wright       lower            = start - 1;
184*98480730SJames Wright       for (PetscCount i = start; i < end; i++) {
185*98480730SJames Wright         // In case of duplicate coord point equal to the median coord point, limit lower partition to median, ensuring balanced tree
186*98480730SJames Wright         if (lower < median && PetscKDTreeSortFunc(sorted_indices[d * num_coords + i], median_idx, tree, axis) <= 0) {
187*98480730SJames Wright           sorted_indices[(d - 1) * num_coords + (++lower)] = sorted_indices[d * num_coords + i];
188*98480730SJames Wright         } else {
189*98480730SJames Wright           sorted_indices[(d - 1) * num_coords + (++upper)] = sorted_indices[d * num_coords + i];
190*98480730SJames Wright         }
191*98480730SJames Wright       }
192*98480730SJames Wright       PetscCheck(lower == median, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Partitioning into less_equal bin failed. Range upper bound should be %" PetscCount_FMT " but partitioning resulted in %" PetscCount_FMT, median, lower);
193*98480730SJames Wright       PetscCheck(upper == end - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Partitioning into greater bin failed. Range upper bound should be %" PetscCount_FMT " but partitioning resulted in %" PetscCount_FMT, upper, end - 1);
194*98480730SJames Wright     }
195*98480730SJames Wright     PetscCall(PetscArraycpy(&sorted_indices[(tree->dim - 1) * num_coords + start], temp, end - start));
196*98480730SJames Wright 
197*98480730SJames Wright     PetscCall(PetscKDTreeBuildStemAndLeaves(kd_build, sorted_indices, temp, start, lower + 1, depth + 1, &stem->is_less_equal_leaf, &stem->less_equal_handle));
198*98480730SJames Wright     PetscCall(PetscKDTreeBuildStemAndLeaves(kd_build, sorted_indices, temp, lower + 1, end, depth + 1, &stem->is_greater_leaf, &stem->greater_handle));
199*98480730SJames Wright   }
200*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
201*98480730SJames Wright }
202*98480730SJames Wright 
203*98480730SJames Wright /*@C
204*98480730SJames Wright   PetscKDTreeCreate - create a `PetscKDTree`
205*98480730SJames Wright 
206*98480730SJames Wright   Not Collective, No Fortran Support
207*98480730SJames Wright 
208*98480730SJames Wright   Input Parameters:
209*98480730SJames Wright + num_coords      - number of coordinate points to build the `PetscKDTree`
210*98480730SJames Wright . dim             - the dimension of the coordinates
211*98480730SJames Wright . coords          - array of the coordinates, in point-major order
212*98480730SJames Wright . copy_mode       - behavior handling `coords`, `PETSC_COPY_VALUES` generally more performant
213*98480730SJames Wright - max_bucket_size - maximum number of points stored at each leaf
214*98480730SJames Wright 
215*98480730SJames Wright   Output Parameter:
216*98480730SJames Wright . new_tree - the resulting `PetscKDTree`
217*98480730SJames Wright 
218*98480730SJames Wright   Level: advanced
219*98480730SJames Wright 
220*98480730SJames Wright   Note:
221*98480730SJames Wright   When `copy_mode == PETSC_COPY_VALUES`, the coordinates are copied and organized to optimize vectorization and cache-coherency.
222*98480730SJames Wright   It is recommended to run this way if the extra memory use is not a concern and it has very little impact on the `PetscKDTree` creation time.
223*98480730SJames Wright 
224*98480730SJames Wright   Developer Note:
225*98480730SJames Wright   Building algorithm detailed in 'Building a Balanced k-d Tree in O(kn log n) Time' Brown, 2015
226*98480730SJames Wright 
227*98480730SJames Wright .seealso: `PetscKDTree`, `PetscKDTreeDestroy()`, `PetscKDTreeQueryPointsNearestNeighbor()`
228*98480730SJames Wright @*/
229*98480730SJames Wright PetscErrorCode PetscKDTreeCreate(PetscCount num_coords, PetscInt dim, const PetscReal coords[], PetscCopyMode copy_mode, PetscInt max_bucket_size, PetscKDTree *new_tree)
230*98480730SJames Wright {
231*98480730SJames Wright   PetscKDTree tree;
232*98480730SJames Wright   PetscCount *sorted_indices, *temp;
233*98480730SJames Wright 
234*98480730SJames Wright   PetscFunctionBeginUser;
235*98480730SJames Wright   PetscCall(PetscKDTreeRegisterLogEvents());
236*98480730SJames Wright   PetscCall(PetscLogEventBegin(PetscKDTree_Build, 0, 0, 0, 0));
237*98480730SJames Wright   PetscCheck(dim > 0, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Dimension of PetscKDTree must be greater than 0, recieved %" PetscInt_FMT, dim);
238*98480730SJames Wright   PetscCheck(num_coords > -1, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Number of coordinates may not be negative, recieved %" PetscCount_FMT, num_coords);
239*98480730SJames Wright   if (num_coords == 0) {
240*98480730SJames Wright     *new_tree = NULL;
241*98480730SJames Wright     PetscFunctionReturn(PETSC_SUCCESS);
242*98480730SJames Wright   }
243*98480730SJames Wright   PetscAssertPointer(coords, 3);
244*98480730SJames Wright   PetscAssertPointer(new_tree, 6);
245*98480730SJames Wright   PetscCall(PetscNew(&tree));
246*98480730SJames Wright   tree->dim             = dim;
247*98480730SJames Wright   tree->max_bucket_size = max_bucket_size == PETSC_DECIDE ? 32 : max_bucket_size;
248*98480730SJames Wright   tree->num_coords      = num_coords;
249*98480730SJames Wright 
250*98480730SJames Wright   switch (copy_mode) {
251*98480730SJames Wright   case PETSC_OWN_POINTER:
252*98480730SJames Wright     tree->coords_owned = coords;
253*98480730SJames Wright   case PETSC_USE_POINTER:
254*98480730SJames Wright     tree->coords = coords;
255*98480730SJames Wright     break;
256*98480730SJames Wright   case PETSC_COPY_VALUES:
257*98480730SJames Wright     PetscCall(PetscMalloc1(num_coords * dim, &tree->coords_owned));
258*98480730SJames Wright     PetscCall(PetscArraycpy((PetscReal *)tree->coords_owned, coords, num_coords * dim));
259*98480730SJames Wright     tree->coords = tree->coords_owned;
260*98480730SJames Wright     break;
261*98480730SJames Wright   }
262*98480730SJames Wright 
263*98480730SJames Wright   KDTreeSortContext kd_ctx;
264*98480730SJames Wright   PetscCall(PetscMalloc2(num_coords * dim, &sorted_indices, num_coords, &temp));
265*98480730SJames Wright   PetscCall(PetscNew(&kd_ctx));
266*98480730SJames Wright   kd_ctx->tree = tree;
267*98480730SJames Wright   for (PetscInt j = 0; j < dim; j++) {
268*98480730SJames Wright     for (PetscCount i = 0; i < num_coords; i++) sorted_indices[num_coords * j + i] = i;
269*98480730SJames Wright     kd_ctx->initial_axis = j;
270*98480730SJames Wright     PetscCall(PetscTimSort((PetscInt)num_coords, &sorted_indices[num_coords * j], sizeof(*sorted_indices), PetscKDTreeTimSort, kd_ctx));
271*98480730SJames Wright   }
272*98480730SJames Wright   PetscCall(PetscFree(kd_ctx));
273*98480730SJames Wright 
274*98480730SJames Wright   PetscInt    num_leaves = (PetscInt)PetscCeilInt64(num_coords, tree->max_bucket_size);
275*98480730SJames Wright   PetscInt    num_stems  = RoundToNextPowerOfTwo((uint32_t)num_leaves);
276*98480730SJames Wright   KDTreeBuild kd_build;
277*98480730SJames Wright   PetscCall(PetscNew(&kd_build));
278*98480730SJames Wright   kd_build->tree        = tree;
279*98480730SJames Wright   kd_build->copy_coords = copy_mode == PETSC_COPY_VALUES ? PETSC_TRUE : PETSC_FALSE;
280*98480730SJames Wright   PetscCall(PetscOptionsGetBool(NULL, NULL, "-kdtree_debug", &kd_build->debug_build, NULL));
281*98480730SJames Wright   PetscCall(PetscSegBufferCreate(sizeof(KDStem), num_stems, &kd_build->stems));
282*98480730SJames Wright   PetscCall(PetscSegBufferCreate(sizeof(KDLeaf), num_leaves, &kd_build->leaves));
283*98480730SJames Wright   PetscCall(PetscSegBufferCreate(sizeof(PetscCount), num_coords, &kd_build->bucket_indices));
284*98480730SJames Wright   if (kd_build->copy_coords) PetscCall(PetscSegBufferCreate(sizeof(PetscReal), num_coords * dim, &kd_build->bucket_coords));
285*98480730SJames Wright 
286*98480730SJames Wright   PetscCall(PetscKDTreeBuildStemAndLeaves(kd_build, sorted_indices, temp, 0, num_coords, 0, &tree->is_root_leaf, &tree->root_handle));
287*98480730SJames Wright 
288*98480730SJames Wright   PetscCall(PetscSegBufferGetSize(kd_build->stems, &tree->num_stems));
289*98480730SJames Wright   PetscCall(PetscSegBufferGetSize(kd_build->leaves, &tree->num_leaves));
290*98480730SJames Wright   PetscCall(PetscSegBufferGetSize(kd_build->bucket_indices, &tree->num_bucket_indices));
291*98480730SJames Wright   PetscCall(PetscSegBufferExtractAlloc(kd_build->stems, &tree->stems));
292*98480730SJames Wright   PetscCall(PetscSegBufferExtractAlloc(kd_build->leaves, &tree->leaves));
293*98480730SJames Wright   PetscCall(PetscSegBufferExtractAlloc(kd_build->bucket_indices, &tree->bucket_indices));
294*98480730SJames Wright   if (kd_build->copy_coords) {
295*98480730SJames Wright     PetscCall(PetscFree(tree->coords_owned));
296*98480730SJames Wright     PetscCall(PetscSegBufferExtractAlloc(kd_build->bucket_coords, &tree->coords_owned));
297*98480730SJames Wright     tree->coords = tree->coords_owned;
298*98480730SJames Wright     PetscCall(PetscSegBufferDestroy(&kd_build->bucket_coords));
299*98480730SJames Wright   }
300*98480730SJames Wright   PetscCall(PetscSegBufferDestroy(&kd_build->stems));
301*98480730SJames Wright   PetscCall(PetscSegBufferDestroy(&kd_build->leaves));
302*98480730SJames Wright   PetscCall(PetscSegBufferDestroy(&kd_build->bucket_indices));
303*98480730SJames Wright   PetscCall(PetscFree(kd_build));
304*98480730SJames Wright   PetscCall(PetscFree2(sorted_indices, temp));
305*98480730SJames Wright   *new_tree = tree;
306*98480730SJames Wright   PetscCall(PetscLogEventEnd(PetscKDTree_Build, 0, 0, 0, 0));
307*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
308*98480730SJames Wright }
309*98480730SJames Wright 
310*98480730SJames Wright static inline PetscReal PetscSquareDistance(PetscInt dim, const PetscReal *PETSC_RESTRICT x, const PetscReal *PETSC_RESTRICT y)
311*98480730SJames Wright {
312*98480730SJames Wright   PetscReal dist = 0;
313*98480730SJames Wright   for (PetscInt j = 0; j < dim; j++) dist += PetscSqr(x[j] - y[j]);
314*98480730SJames Wright   return dist;
315*98480730SJames Wright }
316*98480730SJames Wright 
317*98480730SJames Wright static inline PetscErrorCode PetscKDTreeQueryLeaf(PetscKDTree tree, KDLeaf leaf, const PetscReal point[], PetscCount *index, PetscReal *distance_sqr)
318*98480730SJames Wright {
319*98480730SJames Wright   PetscInt dim = tree->dim;
320*98480730SJames Wright 
321*98480730SJames Wright   PetscFunctionBeginUser;
322*98480730SJames Wright   *distance_sqr = PETSC_MAX_REAL;
323*98480730SJames Wright   *index        = -1;
324*98480730SJames Wright   for (PetscInt i = 0; i < leaf.count; i++) {
325*98480730SJames Wright     PetscCount point_index = tree->bucket_indices[leaf.indices_handle + i];
326*98480730SJames Wright     PetscReal  dist        = PetscSquareDistance(dim, point, &tree->coords[point_index * dim]);
327*98480730SJames Wright     if (dist < *distance_sqr) {
328*98480730SJames Wright       *distance_sqr = dist;
329*98480730SJames Wright       *index        = point_index;
330*98480730SJames Wright     }
331*98480730SJames Wright   }
332*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
333*98480730SJames Wright }
334*98480730SJames Wright 
335*98480730SJames Wright static inline PetscErrorCode PetscKDTreeQueryLeaf_CopyCoords(PetscKDTree tree, KDLeaf leaf, const PetscReal point[], PetscCount *index, PetscReal *distance_sqr)
336*98480730SJames Wright {
337*98480730SJames Wright   PetscInt dim = tree->dim;
338*98480730SJames Wright 
339*98480730SJames Wright   PetscFunctionBeginUser;
340*98480730SJames Wright   *distance_sqr = PETSC_MAX_REAL;
341*98480730SJames Wright   *index        = -1;
342*98480730SJames Wright   for (PetscInt i = 0; i < leaf.count; i++) {
343*98480730SJames Wright     // Coord data saved in axis-major ordering for vectorization
344*98480730SJames Wright     PetscReal dist = 0.;
345*98480730SJames Wright     for (PetscInt d = 0; d < dim; d++) dist += PetscSqr(point[d] - tree->coords[leaf.coords_handle + d * leaf.count + i]);
346*98480730SJames Wright     if (dist < *distance_sqr) {
347*98480730SJames Wright       *distance_sqr = dist;
348*98480730SJames Wright       *index        = tree->bucket_indices[leaf.indices_handle + i];
349*98480730SJames Wright     }
350*98480730SJames Wright   }
351*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
352*98480730SJames Wright }
353*98480730SJames Wright 
354*98480730SJames Wright // Recursive point query from 'Algorithms for Fast Vector Quantization' by  Sunil Arya and David Mount
355*98480730SJames Wright // Variant also implemented in pykdtree
356*98480730SJames Wright static PetscErrorCode PetscKDTreeQuery_Recurse(PetscKDTree tree, const PetscReal point[], PetscCount node_handle, PetscBool is_node_leaf, PetscReal offset[], PetscReal rd, PetscReal tol_sqr, PetscCount *index, PetscReal *dist_sqr)
357*98480730SJames Wright {
358*98480730SJames Wright   PetscFunctionBeginUser;
359*98480730SJames Wright   if (*dist_sqr < tol_sqr) PetscFunctionReturn(PETSC_SUCCESS);
360*98480730SJames Wright   if (is_node_leaf) {
361*98480730SJames Wright     KDLeaf     leaf = tree->leaves[node_handle];
362*98480730SJames Wright     PetscReal  dist;
363*98480730SJames Wright     PetscCount point_index;
364*98480730SJames Wright 
365*98480730SJames Wright     if (leaf.coords_handle > -1) PetscCall(PetscKDTreeQueryLeaf_CopyCoords(tree, leaf, point, &point_index, &dist));
366*98480730SJames Wright     else PetscCall(PetscKDTreeQueryLeaf(tree, leaf, point, &point_index, &dist));
367*98480730SJames Wright     if (dist < *dist_sqr) {
368*98480730SJames Wright       *dist_sqr = dist;
369*98480730SJames Wright       *index    = point_index;
370*98480730SJames Wright     }
371*98480730SJames Wright     PetscFunctionReturn(PETSC_SUCCESS);
372*98480730SJames Wright   }
373*98480730SJames Wright 
374*98480730SJames Wright   KDStem    stem       = tree->stems[node_handle];
375*98480730SJames Wright   PetscReal old_offset = offset[stem.axis], new_offset = point[stem.axis] - stem.split;
376*98480730SJames Wright   if (new_offset <= 0) {
377*98480730SJames Wright     PetscCall(PetscKDTreeQuery_Recurse(tree, point, stem.less_equal_handle, stem.is_less_equal_leaf, offset, rd, tol_sqr, index, dist_sqr));
378*98480730SJames Wright     rd += -PetscSqr(old_offset) + PetscSqr(new_offset);
379*98480730SJames Wright     if (rd < *dist_sqr) {
380*98480730SJames Wright       offset[stem.axis] = new_offset;
381*98480730SJames Wright       PetscCall(PetscKDTreeQuery_Recurse(tree, point, stem.greater_handle, stem.is_greater_leaf, offset, rd, tol_sqr, index, dist_sqr));
382*98480730SJames Wright       offset[stem.axis] = old_offset;
383*98480730SJames Wright     }
384*98480730SJames Wright   } else {
385*98480730SJames Wright     PetscCall(PetscKDTreeQuery_Recurse(tree, point, stem.greater_handle, stem.is_greater_leaf, offset, rd, tol_sqr, index, dist_sqr));
386*98480730SJames Wright     rd += -PetscSqr(old_offset) + PetscSqr(new_offset);
387*98480730SJames Wright     if (rd < *dist_sqr) {
388*98480730SJames Wright       offset[stem.axis] = new_offset;
389*98480730SJames Wright       PetscCall(PetscKDTreeQuery_Recurse(tree, point, stem.less_equal_handle, stem.is_less_equal_leaf, offset, rd, tol_sqr, index, dist_sqr));
390*98480730SJames Wright       offset[stem.axis] = old_offset;
391*98480730SJames Wright     }
392*98480730SJames Wright   }
393*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
394*98480730SJames Wright }
395*98480730SJames Wright 
396*98480730SJames Wright /*@C
397*98480730SJames Wright   PetscKDTreeQueryPointsNearestNeighbor - find the nearest neighbor in a `PetscKDTree`
398*98480730SJames Wright 
399*98480730SJames Wright   Not Collective, No Fortran Support
400*98480730SJames Wright 
401*98480730SJames Wright   Input Parameters:
402*98480730SJames Wright + tree       - tree to query
403*98480730SJames Wright . num_points - number of points to query
404*98480730SJames Wright . points     - array of the coordinates, in point-major order
405*98480730SJames Wright - tolerance  - tolerance for nearest neighbor
406*98480730SJames Wright 
407*98480730SJames Wright   Output Parameter:
408*98480730SJames Wright + indices   - indices of the nearest neighbor to the query point
409*98480730SJames Wright - distances - distance between the queried point and the nearest neighbor
410*98480730SJames Wright 
411*98480730SJames Wright   Level: advanced
412*98480730SJames Wright 
413*98480730SJames Wright   Notes:
414*98480730SJames Wright   When traversing the tree, if a point has been found to be closer than the `tolerance`, the function short circuits and doesn't check for any closer points.
415*98480730SJames Wright 
416*98480730SJames Wright   The `indices` and `distances` arrays should be at least of size `num_points`.
417*98480730SJames Wright 
418*98480730SJames Wright .seealso: `PetscKDTree`, `PetscKDTreeCreate()`
419*98480730SJames Wright @*/
420*98480730SJames Wright PetscErrorCode PetscKDTreeQueryPointsNearestNeighbor(PetscKDTree tree, PetscCount num_points, const PetscReal points[], PetscReal tolerance, PetscCount indices[], PetscReal distances[])
421*98480730SJames Wright {
422*98480730SJames Wright   PetscReal *offsets, rd;
423*98480730SJames Wright 
424*98480730SJames Wright   PetscFunctionBeginUser;
425*98480730SJames Wright   PetscCall(PetscLogEventBegin(PetscKDTree_Query, 0, 0, 0, 0));
426*98480730SJames Wright   if (tree == NULL) {
427*98480730SJames Wright     PetscCheck(num_points == 0, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "num_points may only be zero, if tree is NULL");
428*98480730SJames Wright     PetscFunctionReturn(PETSC_SUCCESS);
429*98480730SJames Wright   }
430*98480730SJames Wright   PetscAssertPointer(points, 3);
431*98480730SJames Wright   PetscAssertPointer(indices, 5);
432*98480730SJames Wright   PetscAssertPointer(distances, 6);
433*98480730SJames Wright   PetscCall(PetscCalloc1(tree->dim, &offsets));
434*98480730SJames Wright 
435*98480730SJames Wright   for (PetscCount p = 0; p < num_points; p++) {
436*98480730SJames Wright     rd           = 0.;
437*98480730SJames Wright     distances[p] = PETSC_MAX_REAL;
438*98480730SJames Wright     indices[p]   = -1;
439*98480730SJames Wright     PetscCall(PetscKDTreeQuery_Recurse(tree, &points[p * tree->dim], tree->root_handle, tree->is_root_leaf, offsets, rd, PetscSqr(tolerance), &indices[p], &distances[p]));
440*98480730SJames Wright     distances[p] = PetscSqrtReal(distances[p]);
441*98480730SJames Wright   }
442*98480730SJames Wright   PetscCall(PetscFree(offsets));
443*98480730SJames Wright   PetscCall(PetscLogEventEnd(PetscKDTree_Query, 0, 0, 0, 0));
444*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
445*98480730SJames Wright }
446*98480730SJames Wright 
447*98480730SJames Wright /*@C
448*98480730SJames Wright   PetscKDTreeView - view a `PetscKDTree`
449*98480730SJames Wright 
450*98480730SJames Wright   Not Collective, No Fortran Support
451*98480730SJames Wright 
452*98480730SJames Wright   Input Parameters:
453*98480730SJames Wright + tree   - tree to view
454*98480730SJames Wright - viewer - visualization context
455*98480730SJames Wright 
456*98480730SJames Wright   Level: advanced
457*98480730SJames Wright 
458*98480730SJames Wright .seealso: `PetscKDTree`, `PetscKDTreeCreate()`, `PetscViewer`
459*98480730SJames Wright @*/
460*98480730SJames Wright PetscErrorCode PetscKDTreeView(PetscKDTree tree, PetscViewer viewer)
461*98480730SJames Wright {
462*98480730SJames Wright   PetscFunctionBeginUser;
463*98480730SJames Wright   if (viewer) PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
464*98480730SJames Wright   else PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_SELF, &viewer));
465*98480730SJames Wright   if (tree == NULL) PetscFunctionReturn(PETSC_SUCCESS);
466*98480730SJames Wright 
467*98480730SJames Wright   PetscCall(PetscViewerASCIIPrintf(viewer, "KDTree:\n"));
468*98480730SJames Wright   PetscCall(PetscViewerASCIIPushTab(viewer)); // KDTree:
469*98480730SJames Wright   PetscCall(PetscViewerASCIIPrintf(viewer, "Stems:\n"));
470*98480730SJames Wright   PetscCall(PetscViewerASCIIPushTab(viewer)); // Stems:
471*98480730SJames Wright   for (PetscCount i = 0; i < tree->num_stems; i++) {
472*98480730SJames Wright     KDStem stem = tree->stems[i];
473*98480730SJames Wright     PetscCall(PetscViewerASCIIPrintf(viewer, "Stem %" PetscCount_FMT ": Axis=%" PetscInt_FMT " Split=%g Greater_%s=%" PetscCount_FMT " Lesser_Equal_%s=%" PetscCount_FMT "\n", i, stem.axis, (double)stem.split, stem.is_greater_leaf ? "Leaf" : "Stem",
474*98480730SJames Wright                                      stem.greater_handle, stem.is_less_equal_leaf ? "Leaf" : "Stem", stem.less_equal_handle));
475*98480730SJames Wright   }
476*98480730SJames Wright   PetscCall(PetscViewerASCIIPopTab(viewer)); // Stems:
477*98480730SJames Wright 
478*98480730SJames Wright   PetscCall(PetscViewerASCIIPrintf(viewer, "Leaves:\n"));
479*98480730SJames Wright   PetscCall(PetscViewerASCIIPushTab(viewer)); // Leaves:
480*98480730SJames Wright   for (PetscCount i = 0; i < tree->num_leaves; i++) {
481*98480730SJames Wright     KDLeaf leaf = tree->leaves[i];
482*98480730SJames Wright     PetscCall(PetscViewerASCIIPrintf(viewer, "Leaf %" PetscCount_FMT ": Count=%" PetscInt_FMT, i, leaf.count));
483*98480730SJames Wright     PetscCall(PetscViewerASCIIPushTab(viewer)); // Coords:
484*98480730SJames Wright     for (PetscInt j = 0; j < leaf.count; j++) {
485*98480730SJames Wright       PetscInt   tabs;
486*98480730SJames Wright       PetscCount bucket_index = tree->bucket_indices[leaf.indices_handle + j];
487*98480730SJames Wright       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
488*98480730SJames Wright       PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscCount_FMT ": ", bucket_index));
489*98480730SJames Wright 
490*98480730SJames Wright       PetscCall(PetscViewerASCIIGetTab(viewer, &tabs));
491*98480730SJames Wright       PetscCall(PetscViewerASCIISetTab(viewer, 0));
492*98480730SJames Wright       if (leaf.coords_handle > -1) {
493*98480730SJames Wright         for (PetscInt k = 0; k < tree->dim; k++) PetscCall(PetscViewerASCIIPrintf(viewer, "%g ", (double)tree->coords[leaf.coords_handle + leaf.count * k + j]));
494*98480730SJames Wright         PetscCall(PetscViewerASCIIPrintf(viewer, " (stored at leaf)"));
495*98480730SJames Wright       } else {
496*98480730SJames Wright         for (PetscInt k = 0; k < tree->dim; k++) PetscCall(PetscViewerASCIIPrintf(viewer, "%g ", (double)tree->coords[bucket_index * tree->dim + k]));
497*98480730SJames Wright       }
498*98480730SJames Wright       PetscCall(PetscViewerASCIISetTab(viewer, tabs));
499*98480730SJames Wright     }
500*98480730SJames Wright     PetscCall(PetscViewerASCIIPopTab(viewer)); // Coords:
501*98480730SJames Wright     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
502*98480730SJames Wright   }
503*98480730SJames Wright   PetscCall(PetscViewerASCIIPopTab(viewer)); // Leaves:
504*98480730SJames Wright   PetscCall(PetscViewerASCIIPopTab(viewer)); // KDTree:
505*98480730SJames Wright   PetscFunctionReturn(PETSC_SUCCESS);
506*98480730SJames Wright }
507