1 2 /* 3 Routines to compute overlapping regions of a parallel MPI matrix. 4 Used for finding submatrices that were shared across processors. 5 */ 6 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 7 #include <petscbt.h> 8 9 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *); 10 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *); 11 12 PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov) 13 { 14 PetscInt i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov; 15 IS *is_new, *is_row; 16 Mat *submats; 17 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 18 Mat_SeqSBAIJ *asub_i; 19 PetscBT table; 20 PetscInt *ai, brow, nz, nis, l, nmax, nstages_local, nstages, max_no, pos; 21 const PetscInt *idx; 22 PetscBool flg; 23 24 PetscFunctionBegin; 25 PetscCall(PetscMalloc1(is_max, &is_new)); 26 /* Convert the indices into block format */ 27 PetscCall(ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new)); 28 PetscCheck(ov >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap specified"); 29 30 /* previous non-scalable implementation */ 31 flg = PETSC_FALSE; 32 PetscCall(PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg)); 33 if (flg) { /* previous non-scalable implementation */ 34 printf("use previous non-scalable implementation...\n"); 35 for (i = 0; i < ov; ++i) PetscCall(MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new)); 36 } else { /* implementation using modified BAIJ routines */ 37 38 PetscCall(PetscMalloc1(Mbs + 1, &nidx)); 39 PetscCall(PetscBTCreate(Mbs, &table)); /* for column search */ 40 41 /* Create is_row */ 42 PetscCall(PetscMalloc1(is_max, &is_row)); 43 PetscCall(ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0])); 44 45 for (i = 1; i < is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ } 46 47 /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */ 48 PetscCall(PetscMalloc1(is_max + 1, &submats)); 49 50 /* Determine the number of stages through which submatrices are done */ 51 nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt)); 52 if (!nmax) nmax = 1; 53 nstages_local = is_max / nmax + ((is_max % nmax) ? 1 : 0); 54 55 /* Make sure every processor loops through the nstages */ 56 PetscCall(MPIU_Allreduce(&nstages_local, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C))); 57 58 for (iov = 0; iov < ov; ++iov) { 59 /* 1) Get submats for column search */ 60 for (i = 0, pos = 0; i < nstages; i++) { 61 if (pos + nmax <= is_max) max_no = nmax; 62 else if (pos == is_max) max_no = 0; 63 else max_no = is_max - pos; 64 c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */ 65 /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to trigger that */ 66 PetscCall(PetscStrcpy(((PetscObject)c->A)->type_name, MATSEQBAIJ)); 67 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos)); 68 PetscCall(PetscStrcpy(((PetscObject)c->A)->type_name, MATSEQSBAIJ)); 69 pos += max_no; 70 } 71 72 /* 2) Row search */ 73 PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new)); 74 75 /* 3) Column search */ 76 for (i = 0; i < is_max; i++) { 77 asub_i = (Mat_SeqSBAIJ *)submats[i]->data; 78 ai = asub_i->i; 79 80 /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ 81 PetscCall(PetscBTMemzero(Mbs, table)); 82 83 PetscCall(ISGetIndices(is_new[i], &idx)); 84 PetscCall(ISGetLocalSize(is_new[i], &nis)); 85 for (l = 0; l < nis; l++) { 86 PetscCall(PetscBTSet(table, idx[l])); 87 nidx[l] = idx[l]; 88 } 89 isz = nis; 90 91 /* add column entries to table */ 92 for (brow = 0; brow < Mbs; brow++) { 93 nz = ai[brow + 1] - ai[brow]; 94 if (nz) { 95 if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow; 96 } 97 } 98 PetscCall(ISRestoreIndices(is_new[i], &idx)); 99 PetscCall(ISDestroy(&is_new[i])); 100 101 /* create updated is_new */ 102 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i)); 103 } 104 105 /* Free tmp spaces */ 106 for (i = 0; i < is_max; i++) PetscCall(MatDestroy(&submats[i])); 107 } 108 109 PetscCall(PetscBTDestroy(&table)); 110 PetscCall(PetscFree(submats)); 111 PetscCall(ISDestroy(&is_row[0])); 112 PetscCall(PetscFree(is_row)); 113 PetscCall(PetscFree(nidx)); 114 } 115 116 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i])); 117 PetscCall(ISExpandIndicesGeneral(N, N, bs, is_max, is_new, is)); 118 119 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is_new[i])); 120 PetscCall(PetscFree(is_new)); 121 PetscFunctionReturn(PETSC_SUCCESS); 122 } 123 124 typedef enum { 125 MINE, 126 OTHER 127 } WhoseOwner; 128 /* data1, odata1 and odata2 are packed in the format (for communication): 129 data[0] = is_max, no of is 130 data[1] = size of is[0] 131 ... 132 data[is_max] = size of is[is_max-1] 133 data[is_max + 1] = data(is[0]) 134 ... 135 data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) 136 ... 137 data2 is packed in the format (for creating output is[]): 138 data[0] = is_max, no of is 139 data[1] = size of is[0] 140 ... 141 data[is_max] = size of is[is_max-1] 142 data[is_max + 1] = data(is[0]) 143 ... 144 data[is_max + 1 + Mbs*i) = data(is[i]) 145 ... 146 */ 147 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[]) 148 { 149 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 150 PetscMPIInt size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, len, *iwork; 151 const PetscInt *idx_i; 152 PetscInt idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i; 153 PetscInt Mbs, i, j, k, *odata1, *odata2; 154 PetscInt proc_id, **odata2_ptr, *ctable = NULL, *btable, len_max, len_est; 155 PetscInt proc_end = 0, len_unused, nodata2; 156 PetscInt ois_max; /* max no of is[] in each of processor */ 157 char *t_p; 158 MPI_Comm comm; 159 MPI_Request *s_waits1, *s_waits2, r_req; 160 MPI_Status *s_status, r_status; 161 PetscBT *table; /* mark indices of this processor's is[] */ 162 PetscBT table_i; 163 PetscBT otable; /* mark indices of other processors' is[] */ 164 PetscInt bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners; 165 IS garray_local, garray_gl; 166 167 PetscFunctionBegin; 168 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 169 size = c->size; 170 rank = c->rank; 171 Mbs = c->Mbs; 172 173 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag1)); 174 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag2)); 175 176 /* create tables used in 177 step 1: table[i] - mark c->garray of proc [i] 178 step 3: table[i] - mark indices of is[i] when whose=MINE 179 table[0] - mark incideces of is[] when whose=OTHER */ 180 len = PetscMax(is_max, size); 181 PetscCall(PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p)); 182 for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i; 183 184 PetscCall(MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm)); 185 186 /* 1. Send this processor's is[] to other processors */ 187 /* allocate spaces */ 188 PetscCall(PetscMalloc1(is_max, &n)); 189 len = 0; 190 for (i = 0; i < is_max; i++) { 191 PetscCall(ISGetLocalSize(is[i], &n[i])); 192 len += n[i]; 193 } 194 if (!len) { 195 is_max = 0; 196 } else { 197 len += 1 + is_max; /* max length of data1 for one processor */ 198 } 199 200 PetscCall(PetscMalloc1(size * len + 1, &data1)); 201 PetscCall(PetscMalloc1(size, &data1_start)); 202 for (i = 0; i < size; i++) data1_start[i] = data1 + i * len; 203 204 PetscCall(PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners)); 205 206 /* gather c->garray from all processors */ 207 PetscCall(ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local)); 208 PetscCall(ISAllGather(garray_local, &garray_gl)); 209 PetscCall(ISDestroy(&garray_local)); 210 PetscCallMPI(MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm)); 211 212 Bowners[0] = 0; 213 for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i]; 214 215 if (is_max) { 216 /* hash table ctable which maps c->row to proc_id) */ 217 PetscCall(PetscMalloc1(Mbs, &ctable)); 218 for (proc_id = 0, j = 0; proc_id < size; proc_id++) { 219 for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id; 220 } 221 222 /* hash tables marking c->garray */ 223 PetscCall(ISGetIndices(garray_gl, &idx_i)); 224 for (i = 0; i < size; i++) { 225 table_i = table[i]; 226 PetscCall(PetscBTMemzero(Mbs, table_i)); 227 for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/ 228 PetscCall(PetscBTSet(table_i, idx_i[j])); 229 } 230 } 231 PetscCall(ISRestoreIndices(garray_gl, &idx_i)); 232 } /* if (is_max) */ 233 PetscCall(ISDestroy(&garray_gl)); 234 235 /* evaluate communication - mesg to who, length, and buffer space */ 236 for (i = 0; i < size; i++) len_s[i] = 0; 237 238 /* header of data1 */ 239 for (proc_id = 0; proc_id < size; proc_id++) { 240 iwork[proc_id] = 0; 241 *data1_start[proc_id] = is_max; 242 data1_start[proc_id]++; 243 for (j = 0; j < is_max; j++) { 244 if (proc_id == rank) { 245 *data1_start[proc_id] = n[j]; 246 } else { 247 *data1_start[proc_id] = 0; 248 } 249 data1_start[proc_id]++; 250 } 251 } 252 253 for (i = 0; i < is_max; i++) { 254 PetscCall(ISGetIndices(is[i], &idx_i)); 255 for (j = 0; j < n[i]; j++) { 256 idx = idx_i[j]; 257 *data1_start[rank] = idx; 258 data1_start[rank]++; /* for local processing */ 259 proc_end = ctable[idx]; 260 for (proc_id = 0; proc_id <= proc_end; proc_id++) { /* for others to process */ 261 if (proc_id == rank) continue; /* done before this loop */ 262 if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */ 263 *data1_start[proc_id] = idx; 264 data1_start[proc_id]++; 265 len_s[proc_id]++; 266 } 267 } 268 /* update header data */ 269 for (proc_id = 0; proc_id < size; proc_id++) { 270 if (proc_id == rank) continue; 271 *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id]; 272 iwork[proc_id] = len_s[proc_id]; 273 } 274 PetscCall(ISRestoreIndices(is[i], &idx_i)); 275 } 276 277 nrqs = 0; 278 nrqr = 0; 279 for (i = 0; i < size; i++) { 280 data1_start[i] = data1 + i * len; 281 if (len_s[i]) { 282 nrqs++; 283 len_s[i] += 1 + is_max; /* add no. of header msg */ 284 } 285 } 286 287 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i])); 288 PetscCall(PetscFree(n)); 289 PetscCall(PetscFree(ctable)); 290 291 /* Determine the number of messages to expect, their lengths, from from-ids */ 292 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr)); 293 PetscCall(PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1)); 294 295 /* Now post the sends */ 296 PetscCall(PetscMalloc2(size, &s_waits1, size, &s_waits2)); 297 k = 0; 298 for (proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */ 299 if (len_s[proc_id]) { 300 PetscCallMPI(MPI_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k)); 301 k++; 302 } 303 } 304 305 /* 2. Receive other's is[] and process. Then send back */ 306 len = 0; 307 for (i = 0; i < nrqr; i++) { 308 if (len_r1[i] > len) len = len_r1[i]; 309 } 310 PetscCall(PetscFree(len_r1)); 311 PetscCall(PetscFree(id_r1)); 312 313 for (proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; 314 315 PetscCall(PetscMalloc1(len + 1, &odata1)); 316 PetscCall(PetscMalloc1(size, &odata2_ptr)); 317 PetscCall(PetscBTCreate(Mbs, &otable)); 318 319 len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */ 320 len_est = 2 * len_max; /* estimated space of storing is[] for all receiving messages */ 321 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 322 nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ 323 324 odata2_ptr[nodata2] = odata2; 325 326 len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ 327 328 k = 0; 329 while (k < nrqr) { 330 /* Receive messages */ 331 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status)); 332 if (flag) { 333 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &len)); 334 proc_id = r_status.MPI_SOURCE; 335 PetscCallMPI(MPI_Irecv(odata1, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 336 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 337 338 /* Process messages */ 339 /* make sure there is enough unused space in odata2 array */ 340 if (len_unused < len_max) { /* allocate more space for odata2 */ 341 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 342 343 odata2_ptr[++nodata2] = odata2; 344 345 len_unused = len_est; 346 } 347 348 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable)); 349 len = 1 + odata2[0]; 350 for (i = 0; i < odata2[0]; i++) len += odata2[1 + i]; 351 352 /* Send messages back */ 353 PetscCallMPI(MPI_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k)); 354 k++; 355 odata2 += len; 356 len_unused -= len; 357 len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ 358 } 359 } 360 PetscCall(PetscFree(odata1)); 361 PetscCall(PetscBTDestroy(&otable)); 362 363 /* 3. Do local work on this processor's is[] */ 364 /* make sure there is enough unused space in odata2(=data) array */ 365 len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */ 366 if (len_unused < len_max) { /* allocate more space for odata2 */ 367 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 368 369 odata2_ptr[++nodata2] = odata2; 370 } 371 372 data = odata2; 373 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table)); 374 PetscCall(PetscFree(data1_start)); 375 376 /* 4. Receive work done on other processors, then merge */ 377 /* get max number of messages that this processor expects to recv */ 378 PetscCall(MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm)); 379 PetscCall(PetscMalloc1(iwork[rank] + 1, &data2)); 380 PetscCall(PetscFree4(len_s, btable, iwork, Bowners)); 381 382 k = 0; 383 while (k < nrqs) { 384 /* Receive messages */ 385 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status)); 386 if (flag) { 387 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &len)); 388 389 proc_id = r_status.MPI_SOURCE; 390 391 PetscCallMPI(MPI_Irecv(data2, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 392 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 393 if (len > 1 + is_max) { /* Add data2 into data */ 394 data2_i = data2 + 1 + is_max; 395 for (i = 0; i < is_max; i++) { 396 table_i = table[i]; 397 data_i = data + 1 + is_max + Mbs * i; 398 isz = data[1 + i]; 399 for (j = 0; j < data2[1 + i]; j++) { 400 col = data2_i[j]; 401 if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col; 402 } 403 data[1 + i] = isz; 404 if (i < is_max - 1) data2_i += data2[1 + i]; 405 } 406 } 407 k++; 408 } 409 } 410 PetscCall(PetscFree(data2)); 411 PetscCall(PetscFree2(table, t_p)); 412 413 /* phase 1 sends are complete */ 414 PetscCall(PetscMalloc1(size, &s_status)); 415 if (nrqs) PetscCallMPI(MPI_Waitall(nrqs, s_waits1, s_status)); 416 PetscCall(PetscFree(data1)); 417 418 /* phase 2 sends are complete */ 419 if (nrqr) PetscCallMPI(MPI_Waitall(nrqr, s_waits2, s_status)); 420 PetscCall(PetscFree2(s_waits1, s_waits2)); 421 PetscCall(PetscFree(s_status)); 422 423 /* 5. Create new is[] */ 424 for (i = 0; i < is_max; i++) { 425 data_i = data + 1 + is_max + Mbs * i; 426 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i)); 427 } 428 for (k = 0; k <= nodata2; k++) PetscCall(PetscFree(odata2_ptr[k])); 429 PetscCall(PetscFree(odata2_ptr)); 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 /* 434 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do 435 the work on the local processor. 436 437 Inputs: 438 C - MAT_MPISBAIJ; 439 data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format. 440 whose - whose is[] to be processed, 441 MINE: this processor's is[] 442 OTHER: other processor's is[] 443 Output: 444 nidx - whose = MINE: 445 holds input and newly found indices in the same format as data 446 whose = OTHER: 447 only holds the newly found indices 448 table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'. 449 */ 450 /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */ 451 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table) 452 { 453 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 454 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(c->A)->data; 455 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(c->B)->data; 456 PetscInt row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l; 457 PetscInt a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n; 458 PetscBT table0; /* mark the indices of input is[] for look up */ 459 PetscBT table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */ 460 461 PetscFunctionBegin; 462 Mbs = c->Mbs; 463 mbs = a->mbs; 464 ai = a->i; 465 aj = a->j; 466 bi = b->i; 467 bj = b->j; 468 garray = c->garray; 469 rstart = c->rstartbs; 470 is_max = data[0]; 471 472 PetscCall(PetscBTCreate(Mbs, &table0)); 473 474 nidx[0] = is_max; 475 idx_i = data + is_max + 1; /* ptr to input is[0] array */ 476 nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ 477 for (i = 0; i < is_max; i++) { /* for each is */ 478 isz = 0; 479 n = data[1 + i]; /* size of input is[i] */ 480 481 /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */ 482 if (whose == MINE) { /* process this processor's is[] */ 483 table_i = table[i]; 484 nidx_i = nidx + 1 + is_max + Mbs * i; 485 } else { /* process other processor's is[] - only use one temp table */ 486 table_i = table[0]; 487 } 488 PetscCall(PetscBTMemzero(Mbs, table_i)); 489 PetscCall(PetscBTMemzero(Mbs, table0)); 490 if (n == 0) { 491 nidx[1 + i] = 0; /* size of new is[i] */ 492 continue; 493 } 494 495 isz0 = 0; 496 col_max = 0; 497 for (j = 0; j < n; j++) { 498 col = idx_i[j]; 499 PetscCheck(col < Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT, col, Mbs); 500 if (!PetscBTLookupSet(table_i, col)) { 501 PetscCall(PetscBTSet(table0, col)); 502 if (whose == MINE) nidx_i[isz0] = col; 503 if (col_max < col) col_max = col; 504 isz0++; 505 } 506 } 507 508 if (whose == MINE) isz = isz0; 509 k = 0; /* no. of indices from input is[i] that have been examined */ 510 for (row = 0; row < mbs; row++) { 511 a_start = ai[row]; 512 a_end = ai[row + 1]; 513 b_start = bi[row]; 514 b_end = bi[row + 1]; 515 if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]: 516 do row search: collect all col in this row */ 517 for (l = a_start; l < a_end; l++) { /* Amat */ 518 col = aj[l] + rstart; 519 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 520 } 521 for (l = b_start; l < b_end; l++) { /* Bmat */ 522 col = garray[bj[l]]; 523 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 524 } 525 k++; 526 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 527 } else { /* row is not on input is[i]: 528 do col search: add row onto nidx_i if there is a col in nidx_i */ 529 for (l = a_start; l < a_end; l++) { /* Amat */ 530 col = aj[l] + rstart; 531 if (col > col_max) break; 532 if (PetscBTLookup(table0, col)) { 533 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 534 break; /* for l = start; l<end ; l++) */ 535 } 536 } 537 for (l = b_start; l < b_end; l++) { /* Bmat */ 538 col = garray[bj[l]]; 539 if (col > col_max) break; 540 if (PetscBTLookup(table0, col)) { 541 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 542 break; /* for l = start; l<end ; l++) */ 543 } 544 } 545 } 546 } 547 548 if (i < is_max - 1) { 549 idx_i += n; /* ptr to input is[i+1] array */ 550 nidx_i += isz; /* ptr to output is[i+1] array */ 551 } 552 nidx[1 + i] = isz; /* size of new is[i] */ 553 } /* for each is */ 554 PetscCall(PetscBTDestroy(&table0)); 555 PetscFunctionReturn(PETSC_SUCCESS); 556 } 557