1 2 /* 3 Routines to compute overlapping regions of a parallel MPI matrix. 4 Used for finding submatrices that were shared across processors. 5 */ 6 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 7 #include <petscbt.h> 8 9 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *); 10 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *); 11 12 PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov) 13 { 14 PetscInt i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov; 15 IS *is_new, *is_row; 16 Mat *submats; 17 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 18 Mat_SeqSBAIJ *asub_i; 19 PetscBT table; 20 PetscInt *ai, brow, nz, nis, l, nmax, nstages_local, nstages, max_no, pos; 21 const PetscInt *idx; 22 PetscBool flg; 23 24 PetscFunctionBegin; 25 PetscCall(PetscMalloc1(is_max, &is_new)); 26 /* Convert the indices into block format */ 27 PetscCall(ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new)); 28 PetscCheck(ov >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap specified"); 29 30 /* previous non-scalable implementation */ 31 flg = PETSC_FALSE; 32 PetscCall(PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg)); 33 if (flg) { /* previous non-scalable implementation */ 34 printf("use previous non-scalable implementation...\n"); 35 for (i = 0; i < ov; ++i) PetscCall(MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new)); 36 } else { /* implementation using modified BAIJ routines */ 37 38 PetscCall(PetscMalloc1(Mbs + 1, &nidx)); 39 PetscCall(PetscBTCreate(Mbs, &table)); /* for column search */ 40 41 /* Create is_row */ 42 PetscCall(PetscMalloc1(is_max, &is_row)); 43 PetscCall(ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0])); 44 45 for (i = 1; i < is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ } 46 47 /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */ 48 PetscCall(PetscMalloc1(is_max + 1, &submats)); 49 50 /* Determine the number of stages through which submatrices are done */ 51 nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt)); 52 if (!nmax) nmax = 1; 53 nstages_local = is_max / nmax + ((is_max % nmax) ? 1 : 0); 54 55 /* Make sure every processor loops through the nstages */ 56 PetscCall(MPIU_Allreduce(&nstages_local, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C))); 57 58 { 59 const PetscObject obj = (PetscObject)c->A; 60 size_t new_len, cur_len, max_len; 61 62 PetscCall(PetscStrlen(MATSEQBAIJ, &new_len)); 63 PetscCall(PetscStrlen(MATSEQSBAIJ, &cur_len)); 64 max_len = PetscMax(cur_len, new_len) + 1; 65 PetscCall(PetscRealloc(max_len * sizeof(*(obj->type_name)), &obj->type_name)); 66 /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to 67 trigger that */ 68 for (iov = 0; iov < ov; ++iov) { 69 /* 1) Get submats for column search */ 70 PetscCall(PetscStrncpy(obj->type_name, MATSEQBAIJ, max_len)); 71 for (i = 0, pos = 0; i < nstages; i++) { 72 if (pos + nmax <= is_max) max_no = nmax; 73 else if (pos == is_max) max_no = 0; 74 else max_no = is_max - pos; 75 c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */ 76 77 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos)); 78 pos += max_no; 79 } 80 PetscCall(PetscStrncpy(obj->type_name, MATSEQSBAIJ, max_len)); 81 82 /* 2) Row search */ 83 PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new)); 84 85 /* 3) Column search */ 86 for (i = 0; i < is_max; i++) { 87 asub_i = (Mat_SeqSBAIJ *)submats[i]->data; 88 ai = asub_i->i; 89 90 /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ 91 PetscCall(PetscBTMemzero(Mbs, table)); 92 93 PetscCall(ISGetIndices(is_new[i], &idx)); 94 PetscCall(ISGetLocalSize(is_new[i], &nis)); 95 for (l = 0; l < nis; l++) { 96 PetscCall(PetscBTSet(table, idx[l])); 97 nidx[l] = idx[l]; 98 } 99 isz = nis; 100 101 /* add column entries to table */ 102 for (brow = 0; brow < Mbs; brow++) { 103 nz = ai[brow + 1] - ai[brow]; 104 if (nz) { 105 if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow; 106 } 107 } 108 PetscCall(ISRestoreIndices(is_new[i], &idx)); 109 PetscCall(ISDestroy(&is_new[i])); 110 111 /* create updated is_new */ 112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i)); 113 } 114 115 /* Free tmp spaces */ 116 for (i = 0; i < is_max; i++) PetscCall(MatDestroy(&submats[i])); 117 } 118 119 PetscCall(PetscBTDestroy(&table)); 120 PetscCall(PetscFree(submats)); 121 PetscCall(ISDestroy(&is_row[0])); 122 PetscCall(PetscFree(is_row)); 123 PetscCall(PetscFree(nidx)); 124 } 125 } 126 127 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i])); 128 PetscCall(ISExpandIndicesGeneral(N, N, bs, is_max, is_new, is)); 129 130 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is_new[i])); 131 PetscCall(PetscFree(is_new)); 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 typedef enum { 136 MINE, 137 OTHER 138 } WhoseOwner; 139 /* data1, odata1 and odata2 are packed in the format (for communication): 140 data[0] = is_max, no of is 141 data[1] = size of is[0] 142 ... 143 data[is_max] = size of is[is_max-1] 144 data[is_max + 1] = data(is[0]) 145 ... 146 data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) 147 ... 148 data2 is packed in the format (for creating output is[]): 149 data[0] = is_max, no of is 150 data[1] = size of is[0] 151 ... 152 data[is_max] = size of is[is_max-1] 153 data[is_max + 1] = data(is[0]) 154 ... 155 data[is_max + 1 + Mbs*i) = data(is[i]) 156 ... 157 */ 158 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[]) 159 { 160 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 161 PetscMPIInt size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, len, *iwork; 162 const PetscInt *idx_i; 163 PetscInt idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i; 164 PetscInt Mbs, i, j, k, *odata1, *odata2; 165 PetscInt proc_id, **odata2_ptr, *ctable = NULL, *btable, len_max, len_est; 166 PetscInt proc_end = 0, len_unused, nodata2; 167 PetscInt ois_max; /* max no of is[] in each of processor */ 168 char *t_p; 169 MPI_Comm comm; 170 MPI_Request *s_waits1, *s_waits2, r_req; 171 MPI_Status *s_status, r_status; 172 PetscBT *table; /* mark indices of this processor's is[] */ 173 PetscBT table_i; 174 PetscBT otable; /* mark indices of other processors' is[] */ 175 PetscInt bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners; 176 IS garray_local, garray_gl; 177 178 PetscFunctionBegin; 179 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 180 size = c->size; 181 rank = c->rank; 182 Mbs = c->Mbs; 183 184 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag1)); 185 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag2)); 186 187 /* create tables used in 188 step 1: table[i] - mark c->garray of proc [i] 189 step 3: table[i] - mark indices of is[i] when whose=MINE 190 table[0] - mark incideces of is[] when whose=OTHER */ 191 len = PetscMax(is_max, size); 192 PetscCall(PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p)); 193 for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i; 194 195 PetscCall(MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm)); 196 197 /* 1. Send this processor's is[] to other processors */ 198 /* allocate spaces */ 199 PetscCall(PetscMalloc1(is_max, &n)); 200 len = 0; 201 for (i = 0; i < is_max; i++) { 202 PetscCall(ISGetLocalSize(is[i], &n[i])); 203 len += n[i]; 204 } 205 if (!len) { 206 is_max = 0; 207 } else { 208 len += 1 + is_max; /* max length of data1 for one processor */ 209 } 210 211 PetscCall(PetscMalloc1(size * len + 1, &data1)); 212 PetscCall(PetscMalloc1(size, &data1_start)); 213 for (i = 0; i < size; i++) data1_start[i] = data1 + i * len; 214 215 PetscCall(PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners)); 216 217 /* gather c->garray from all processors */ 218 PetscCall(ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local)); 219 PetscCall(ISAllGather(garray_local, &garray_gl)); 220 PetscCall(ISDestroy(&garray_local)); 221 PetscCallMPI(MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm)); 222 223 Bowners[0] = 0; 224 for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i]; 225 226 if (is_max) { 227 /* hash table ctable which maps c->row to proc_id) */ 228 PetscCall(PetscMalloc1(Mbs, &ctable)); 229 for (proc_id = 0, j = 0; proc_id < size; proc_id++) { 230 for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id; 231 } 232 233 /* hash tables marking c->garray */ 234 PetscCall(ISGetIndices(garray_gl, &idx_i)); 235 for (i = 0; i < size; i++) { 236 table_i = table[i]; 237 PetscCall(PetscBTMemzero(Mbs, table_i)); 238 for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/ 239 PetscCall(PetscBTSet(table_i, idx_i[j])); 240 } 241 } 242 PetscCall(ISRestoreIndices(garray_gl, &idx_i)); 243 } /* if (is_max) */ 244 PetscCall(ISDestroy(&garray_gl)); 245 246 /* evaluate communication - mesg to who, length, and buffer space */ 247 for (i = 0; i < size; i++) len_s[i] = 0; 248 249 /* header of data1 */ 250 for (proc_id = 0; proc_id < size; proc_id++) { 251 iwork[proc_id] = 0; 252 *data1_start[proc_id] = is_max; 253 data1_start[proc_id]++; 254 for (j = 0; j < is_max; j++) { 255 if (proc_id == rank) { 256 *data1_start[proc_id] = n[j]; 257 } else { 258 *data1_start[proc_id] = 0; 259 } 260 data1_start[proc_id]++; 261 } 262 } 263 264 for (i = 0; i < is_max; i++) { 265 PetscCall(ISGetIndices(is[i], &idx_i)); 266 for (j = 0; j < n[i]; j++) { 267 idx = idx_i[j]; 268 *data1_start[rank] = idx; 269 data1_start[rank]++; /* for local processing */ 270 proc_end = ctable[idx]; 271 for (proc_id = 0; proc_id <= proc_end; proc_id++) { /* for others to process */ 272 if (proc_id == rank) continue; /* done before this loop */ 273 if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */ 274 *data1_start[proc_id] = idx; 275 data1_start[proc_id]++; 276 len_s[proc_id]++; 277 } 278 } 279 /* update header data */ 280 for (proc_id = 0; proc_id < size; proc_id++) { 281 if (proc_id == rank) continue; 282 *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id]; 283 iwork[proc_id] = len_s[proc_id]; 284 } 285 PetscCall(ISRestoreIndices(is[i], &idx_i)); 286 } 287 288 nrqs = 0; 289 nrqr = 0; 290 for (i = 0; i < size; i++) { 291 data1_start[i] = data1 + i * len; 292 if (len_s[i]) { 293 nrqs++; 294 len_s[i] += 1 + is_max; /* add no. of header msg */ 295 } 296 } 297 298 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i])); 299 PetscCall(PetscFree(n)); 300 PetscCall(PetscFree(ctable)); 301 302 /* Determine the number of messages to expect, their lengths, from from-ids */ 303 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr)); 304 PetscCall(PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1)); 305 306 /* Now post the sends */ 307 PetscCall(PetscMalloc2(size, &s_waits1, size, &s_waits2)); 308 k = 0; 309 for (proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */ 310 if (len_s[proc_id]) { 311 PetscCallMPI(MPI_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k)); 312 k++; 313 } 314 } 315 316 /* 2. Receive other's is[] and process. Then send back */ 317 len = 0; 318 for (i = 0; i < nrqr; i++) { 319 if (len_r1[i] > len) len = len_r1[i]; 320 } 321 PetscCall(PetscFree(len_r1)); 322 PetscCall(PetscFree(id_r1)); 323 324 for (proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; 325 326 PetscCall(PetscMalloc1(len + 1, &odata1)); 327 PetscCall(PetscMalloc1(size, &odata2_ptr)); 328 PetscCall(PetscBTCreate(Mbs, &otable)); 329 330 len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */ 331 len_est = 2 * len_max; /* estimated space of storing is[] for all receiving messages */ 332 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 333 nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ 334 335 odata2_ptr[nodata2] = odata2; 336 337 len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ 338 339 k = 0; 340 while (k < nrqr) { 341 /* Receive messages */ 342 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status)); 343 if (flag) { 344 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &len)); 345 proc_id = r_status.MPI_SOURCE; 346 PetscCallMPI(MPI_Irecv(odata1, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 347 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 348 349 /* Process messages */ 350 /* make sure there is enough unused space in odata2 array */ 351 if (len_unused < len_max) { /* allocate more space for odata2 */ 352 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 353 354 odata2_ptr[++nodata2] = odata2; 355 356 len_unused = len_est; 357 } 358 359 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable)); 360 len = 1 + odata2[0]; 361 for (i = 0; i < odata2[0]; i++) len += odata2[1 + i]; 362 363 /* Send messages back */ 364 PetscCallMPI(MPI_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k)); 365 k++; 366 odata2 += len; 367 len_unused -= len; 368 len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */ 369 } 370 } 371 PetscCall(PetscFree(odata1)); 372 PetscCall(PetscBTDestroy(&otable)); 373 374 /* 3. Do local work on this processor's is[] */ 375 /* make sure there is enough unused space in odata2(=data) array */ 376 len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */ 377 if (len_unused < len_max) { /* allocate more space for odata2 */ 378 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 379 380 odata2_ptr[++nodata2] = odata2; 381 } 382 383 data = odata2; 384 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table)); 385 PetscCall(PetscFree(data1_start)); 386 387 /* 4. Receive work done on other processors, then merge */ 388 /* get max number of messages that this processor expects to recv */ 389 PetscCall(MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm)); 390 PetscCall(PetscMalloc1(iwork[rank] + 1, &data2)); 391 PetscCall(PetscFree4(len_s, btable, iwork, Bowners)); 392 393 k = 0; 394 while (k < nrqs) { 395 /* Receive messages */ 396 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status)); 397 if (flag) { 398 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &len)); 399 400 proc_id = r_status.MPI_SOURCE; 401 402 PetscCallMPI(MPI_Irecv(data2, len, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 403 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 404 if (len > 1 + is_max) { /* Add data2 into data */ 405 data2_i = data2 + 1 + is_max; 406 for (i = 0; i < is_max; i++) { 407 table_i = table[i]; 408 data_i = data + 1 + is_max + Mbs * i; 409 isz = data[1 + i]; 410 for (j = 0; j < data2[1 + i]; j++) { 411 col = data2_i[j]; 412 if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col; 413 } 414 data[1 + i] = isz; 415 if (i < is_max - 1) data2_i += data2[1 + i]; 416 } 417 } 418 k++; 419 } 420 } 421 PetscCall(PetscFree(data2)); 422 PetscCall(PetscFree2(table, t_p)); 423 424 /* phase 1 sends are complete */ 425 PetscCall(PetscMalloc1(size, &s_status)); 426 if (nrqs) PetscCallMPI(MPI_Waitall(nrqs, s_waits1, s_status)); 427 PetscCall(PetscFree(data1)); 428 429 /* phase 2 sends are complete */ 430 if (nrqr) PetscCallMPI(MPI_Waitall(nrqr, s_waits2, s_status)); 431 PetscCall(PetscFree2(s_waits1, s_waits2)); 432 PetscCall(PetscFree(s_status)); 433 434 /* 5. Create new is[] */ 435 for (i = 0; i < is_max; i++) { 436 data_i = data + 1 + is_max + Mbs * i; 437 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i)); 438 } 439 for (k = 0; k <= nodata2; k++) PetscCall(PetscFree(odata2_ptr[k])); 440 PetscCall(PetscFree(odata2_ptr)); 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 /* 445 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do 446 the work on the local processor. 447 448 Inputs: 449 C - MAT_MPISBAIJ; 450 data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format. 451 whose - whose is[] to be processed, 452 MINE: this processor's is[] 453 OTHER: other processor's is[] 454 Output: 455 nidx - whose = MINE: 456 holds input and newly found indices in the same format as data 457 whose = OTHER: 458 only holds the newly found indices 459 table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'. 460 */ 461 /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */ 462 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table) 463 { 464 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 465 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(c->A)->data; 466 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(c->B)->data; 467 PetscInt row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l; 468 PetscInt a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n; 469 PetscBT table0; /* mark the indices of input is[] for look up */ 470 PetscBT table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */ 471 472 PetscFunctionBegin; 473 Mbs = c->Mbs; 474 mbs = a->mbs; 475 ai = a->i; 476 aj = a->j; 477 bi = b->i; 478 bj = b->j; 479 garray = c->garray; 480 rstart = c->rstartbs; 481 is_max = data[0]; 482 483 PetscCall(PetscBTCreate(Mbs, &table0)); 484 485 nidx[0] = is_max; 486 idx_i = data + is_max + 1; /* ptr to input is[0] array */ 487 nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ 488 for (i = 0; i < is_max; i++) { /* for each is */ 489 isz = 0; 490 n = data[1 + i]; /* size of input is[i] */ 491 492 /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */ 493 if (whose == MINE) { /* process this processor's is[] */ 494 table_i = table[i]; 495 nidx_i = nidx + 1 + is_max + Mbs * i; 496 } else { /* process other processor's is[] - only use one temp table */ 497 table_i = table[0]; 498 } 499 PetscCall(PetscBTMemzero(Mbs, table_i)); 500 PetscCall(PetscBTMemzero(Mbs, table0)); 501 if (n == 0) { 502 nidx[1 + i] = 0; /* size of new is[i] */ 503 continue; 504 } 505 506 isz0 = 0; 507 col_max = 0; 508 for (j = 0; j < n; j++) { 509 col = idx_i[j]; 510 PetscCheck(col < Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT, col, Mbs); 511 if (!PetscBTLookupSet(table_i, col)) { 512 PetscCall(PetscBTSet(table0, col)); 513 if (whose == MINE) nidx_i[isz0] = col; 514 if (col_max < col) col_max = col; 515 isz0++; 516 } 517 } 518 519 if (whose == MINE) isz = isz0; 520 k = 0; /* no. of indices from input is[i] that have been examined */ 521 for (row = 0; row < mbs; row++) { 522 a_start = ai[row]; 523 a_end = ai[row + 1]; 524 b_start = bi[row]; 525 b_end = bi[row + 1]; 526 if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]: 527 do row search: collect all col in this row */ 528 for (l = a_start; l < a_end; l++) { /* Amat */ 529 col = aj[l] + rstart; 530 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 531 } 532 for (l = b_start; l < b_end; l++) { /* Bmat */ 533 col = garray[bj[l]]; 534 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 535 } 536 k++; 537 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 538 } else { /* row is not on input is[i]: 539 do col search: add row onto nidx_i if there is a col in nidx_i */ 540 for (l = a_start; l < a_end; l++) { /* Amat */ 541 col = aj[l] + rstart; 542 if (col > col_max) break; 543 if (PetscBTLookup(table0, col)) { 544 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 545 break; /* for l = start; l<end ; l++) */ 546 } 547 } 548 for (l = b_start; l < b_end; l++) { /* Bmat */ 549 col = garray[bj[l]]; 550 if (col > col_max) break; 551 if (PetscBTLookup(table0, col)) { 552 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 553 break; /* for l = start; l<end ; l++) */ 554 } 555 } 556 } 557 } 558 559 if (i < is_max - 1) { 560 idx_i += n; /* ptr to input is[i+1] array */ 561 nidx_i += isz; /* ptr to output is[i+1] array */ 562 } 563 nidx[1 + i] = isz; /* size of new is[i] */ 564 } /* for each is */ 565 PetscCall(PetscBTDestroy(&table0)); 566 PetscFunctionReturn(PETSC_SUCCESS); 567 } 568