1 /* 2 Routines to compute overlapping regions of a parallel MPI matrix. 3 Used for finding submatrices that were shared across processors. 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 #include <petscbt.h> 7 8 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat, PetscInt, IS *); 9 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat, PetscInt *, PetscInt, PetscInt *, PetscBT *); 10 11 PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C, PetscInt is_max, IS is[], PetscInt ov) 12 { 13 PetscInt i, N = C->cmap->N, bs = C->rmap->bs, M = C->rmap->N, Mbs = M / bs, *nidx, isz, iov; 14 IS *is_new, *is_row; 15 Mat *submats; 16 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 17 Mat_SeqSBAIJ *asub_i; 18 PetscBT table; 19 PetscInt *ai, brow, nz, nis, l, nmax, nstages_local, nstages, max_no, pos; 20 const PetscInt *idx; 21 PetscBool flg; 22 23 PetscFunctionBegin; 24 PetscCall(PetscMalloc1(is_max, &is_new)); 25 /* Convert the indices into block format */ 26 PetscCall(ISCompressIndicesGeneral(N, C->rmap->n, bs, is_max, is, is_new)); 27 PetscCheck(ov >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap specified"); 28 29 /* previous non-scalable implementation */ 30 flg = PETSC_FALSE; 31 PetscCall(PetscOptionsHasName(NULL, NULL, "-IncreaseOverlap_old", &flg)); 32 if (flg) { /* previous non-scalable implementation */ 33 printf("use previous non-scalable implementation...\n"); 34 for (i = 0; i < ov; ++i) PetscCall(MatIncreaseOverlap_MPISBAIJ_Once(C, is_max, is_new)); 35 } else { /* implementation using modified BAIJ routines */ 36 37 PetscCall(PetscMalloc1(Mbs + 1, &nidx)); 38 PetscCall(PetscBTCreate(Mbs, &table)); /* for column search */ 39 40 /* Create is_row */ 41 PetscCall(PetscMalloc1(is_max, &is_row)); 42 PetscCall(ISCreateStride(PETSC_COMM_SELF, Mbs, 0, 1, &is_row[0])); 43 44 for (i = 1; i < is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ } 45 46 /* Allocate memory to hold all the submatrices - Modified from MatCreateSubMatrices_MPIBAIJ() */ 47 PetscCall(PetscMalloc1(is_max + 1, &submats)); 48 49 /* Determine the number of stages through which submatrices are done */ 50 nmax = 20 * 1000000 / (c->Nbs * sizeof(PetscInt)); 51 if (!nmax) nmax = 1; 52 nstages_local = is_max / nmax + ((is_max % nmax) ? 1 : 0); 53 54 /* Make sure every processor loops through the nstages */ 55 PetscCallMPI(MPIU_Allreduce(&nstages_local, &nstages, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)C))); 56 57 { 58 const PetscObject obj = (PetscObject)c->A; 59 size_t new_len, cur_len, max_len; 60 61 PetscCall(PetscStrlen(MATSEQBAIJ, &new_len)); 62 PetscCall(PetscStrlen(MATSEQSBAIJ, &cur_len)); 63 max_len = PetscMax(cur_len, new_len) + 1; 64 PetscCall(PetscRealloc(max_len * sizeof(*obj->type_name), &obj->type_name)); 65 /* The resulting submatrices should be BAIJ, not SBAIJ, hence we change this value to 66 trigger that */ 67 for (iov = 0; iov < ov; ++iov) { 68 /* 1) Get submats for column search */ 69 PetscCall(PetscStrncpy(obj->type_name, MATSEQBAIJ, max_len)); 70 for (i = 0, pos = 0; i < nstages; i++) { 71 if (pos + nmax <= is_max) max_no = nmax; 72 else if (pos == is_max) max_no = 0; 73 else max_no = is_max - pos; 74 c->ijonly = PETSC_TRUE; /* only matrix data structures are requested */ 75 76 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(C, max_no, is_row + pos, is_new + pos, MAT_INITIAL_MATRIX, submats + pos, PETSC_TRUE)); 77 pos += max_no; 78 } 79 PetscCall(PetscStrncpy(obj->type_name, MATSEQSBAIJ, max_len)); 80 81 /* 2) Row search */ 82 PetscCall(MatIncreaseOverlap_MPIBAIJ_Once(C, is_max, is_new)); 83 84 /* 3) Column search */ 85 for (i = 0; i < is_max; i++) { 86 asub_i = (Mat_SeqSBAIJ *)submats[i]->data; 87 ai = asub_i->i; 88 89 /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ 90 PetscCall(PetscBTMemzero(Mbs, table)); 91 92 PetscCall(ISGetIndices(is_new[i], &idx)); 93 PetscCall(ISGetLocalSize(is_new[i], &nis)); 94 for (l = 0; l < nis; l++) { 95 PetscCall(PetscBTSet(table, idx[l])); 96 nidx[l] = idx[l]; 97 } 98 isz = nis; 99 100 /* add column entries to table */ 101 for (brow = 0; brow < Mbs; brow++) { 102 nz = ai[brow + 1] - ai[brow]; 103 if (nz) { 104 if (!PetscBTLookupSet(table, brow)) nidx[isz++] = brow; 105 } 106 } 107 PetscCall(ISRestoreIndices(is_new[i], &idx)); 108 PetscCall(ISDestroy(&is_new[i])); 109 110 /* create updated is_new */ 111 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, isz, nidx, PETSC_COPY_VALUES, is_new + i)); 112 } 113 114 /* Free tmp spaces */ 115 for (i = 0; i < is_max; i++) PetscCall(MatDestroy(&submats[i])); 116 } 117 118 PetscCall(PetscBTDestroy(&table)); 119 PetscCall(PetscFree(submats)); 120 PetscCall(ISDestroy(&is_row[0])); 121 PetscCall(PetscFree(is_row)); 122 PetscCall(PetscFree(nidx)); 123 } 124 } 125 for (i = 0; i < is_max; i++) { 126 PetscCall(ISDestroy(&is[i])); 127 PetscCall(ISGetLocalSize(is_new[i], &nis)); 128 PetscCall(ISGetIndices(is_new[i], &idx)); 129 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)is_new[i]), bs, nis, idx, PETSC_COPY_VALUES, &is[i])); 130 PetscCall(ISDestroy(&is_new[i])); 131 } 132 PetscCall(PetscFree(is_new)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 typedef enum { 137 MINE, 138 OTHER 139 } WhoseOwner; 140 /* data1, odata1 and odata2 are packed in the format (for communication): 141 data[0] = is_max, no of is 142 data[1] = size of is[0] 143 ... 144 data[is_max] = size of is[is_max-1] 145 data[is_max + 1] = data(is[0]) 146 ... 147 data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i]) 148 ... 149 data2 is packed in the format (for creating output is[]): 150 data[0] = is_max, no of is 151 data[1] = size of is[0] 152 ... 153 data[is_max] = size of is[is_max-1] 154 data[is_max + 1] = data(is[0]) 155 ... 156 data[is_max + 1 + Mbs*i) = data(is[i]) 157 ... 158 */ 159 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C, PetscInt is_max, IS is[]) 160 { 161 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 162 PetscMPIInt proc_end = 0, size, rank, tag1, tag2, *len_s, nrqr, nrqs, *id_r1, *len_r1, flag, *iwork; 163 const PetscInt *idx_i; 164 PetscInt idx, isz, col, *n, *data1, **data1_start, *data2, *data2_i, *data, *data_i, len; 165 PetscInt Mbs, i, j, k, *odata1, *odata2; 166 PetscInt **odata2_ptr, *ctable = NULL, *btable, len_max, len_est; 167 PetscInt len_unused, nodata2; 168 PetscInt ois_max; /* max no of is[] in each of processor */ 169 char *t_p; 170 MPI_Comm comm; 171 MPI_Request *s_waits1, *s_waits2, r_req; 172 MPI_Status *s_status, r_status; 173 PetscBT *table; /* mark indices of this processor's is[] */ 174 PetscBT table_i; 175 PetscBT otable; /* mark indices of other processors' is[] */ 176 PetscInt bs = C->rmap->bs, Bn = c->B->cmap->n, Bnbs = Bn / bs, *Bowners; 177 IS garray_local, garray_gl; 178 179 PetscFunctionBegin; 180 PetscCall(PetscObjectGetComm((PetscObject)C, &comm)); 181 size = c->size; 182 rank = c->rank; 183 Mbs = c->Mbs; 184 185 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag1)); 186 PetscCall(PetscObjectGetNewTag((PetscObject)C, &tag2)); 187 188 /* create tables used in 189 step 1: table[i] - mark c->garray of proc [i] 190 step 3: table[i] - mark indices of is[i] when whose=MINE 191 table[0] - mark incideces of is[] when whose=OTHER */ 192 len = PetscMax(is_max, size); 193 PetscCall(PetscMalloc2(len, &table, (Mbs / PETSC_BITS_PER_BYTE + 1) * len, &t_p)); 194 for (i = 0; i < len; i++) table[i] = t_p + (Mbs / PETSC_BITS_PER_BYTE + 1) * i; 195 196 PetscCallMPI(MPIU_Allreduce(&is_max, &ois_max, 1, MPIU_INT, MPI_MAX, comm)); 197 198 /* 1. Send this processor's is[] to other processors */ 199 /* allocate spaces */ 200 PetscCall(PetscMalloc1(is_max, &n)); 201 len = 0; 202 for (i = 0; i < is_max; i++) { 203 PetscCall(ISGetLocalSize(is[i], &n[i])); 204 len += n[i]; 205 } 206 if (!len) { 207 is_max = 0; 208 } else { 209 len += 1 + is_max; /* max length of data1 for one processor */ 210 } 211 212 PetscCall(PetscMalloc1(size * len + 1, &data1)); 213 PetscCall(PetscMalloc1(size, &data1_start)); 214 for (i = 0; i < size; i++) data1_start[i] = data1 + i * len; 215 216 PetscCall(PetscMalloc4(size, &len_s, size, &btable, size, &iwork, size + 1, &Bowners)); 217 218 /* gather c->garray from all processors */ 219 PetscCall(ISCreateGeneral(comm, Bnbs, c->garray, PETSC_COPY_VALUES, &garray_local)); 220 PetscCall(ISAllGather(garray_local, &garray_gl)); 221 PetscCall(ISDestroy(&garray_local)); 222 PetscCallMPI(MPI_Allgather(&Bnbs, 1, MPIU_INT, Bowners + 1, 1, MPIU_INT, comm)); 223 224 Bowners[0] = 0; 225 for (i = 0; i < size; i++) Bowners[i + 1] += Bowners[i]; 226 227 if (is_max) { 228 /* hash table ctable which maps c->row to proc_id) */ 229 PetscCall(PetscMalloc1(Mbs, &ctable)); 230 for (PetscMPIInt proc_id = 0, j = 0; proc_id < size; proc_id++) { 231 for (; j < C->rmap->range[proc_id + 1] / bs; j++) ctable[j] = proc_id; 232 } 233 234 /* hash tables marking c->garray */ 235 PetscCall(ISGetIndices(garray_gl, &idx_i)); 236 for (i = 0; i < size; i++) { 237 table_i = table[i]; 238 PetscCall(PetscBTMemzero(Mbs, table_i)); 239 for (j = Bowners[i]; j < Bowners[i + 1]; j++) { /* go through B cols of proc[i]*/ 240 PetscCall(PetscBTSet(table_i, idx_i[j])); 241 } 242 } 243 PetscCall(ISRestoreIndices(garray_gl, &idx_i)); 244 } /* if (is_max) */ 245 PetscCall(ISDestroy(&garray_gl)); 246 247 /* evaluate communication - mesg to who, length, and buffer space */ 248 for (i = 0; i < size; i++) len_s[i] = 0; 249 250 /* header of data1 */ 251 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) { 252 iwork[proc_id] = 0; 253 *data1_start[proc_id] = is_max; 254 data1_start[proc_id]++; 255 for (j = 0; j < is_max; j++) { 256 if (proc_id == rank) { 257 *data1_start[proc_id] = n[j]; 258 } else { 259 *data1_start[proc_id] = 0; 260 } 261 data1_start[proc_id]++; 262 } 263 } 264 265 for (i = 0; i < is_max; i++) { 266 PetscCall(ISGetIndices(is[i], &idx_i)); 267 for (j = 0; j < n[i]; j++) { 268 idx = idx_i[j]; 269 *data1_start[rank] = idx; 270 data1_start[rank]++; /* for local processing */ 271 PetscCall(PetscMPIIntCast(ctable[idx], &proc_end)); 272 for (PetscMPIInt proc_id = 0; proc_id <= proc_end; proc_id++) { /* for others to process */ 273 if (proc_id == rank) continue; /* done before this loop */ 274 if (proc_id < proc_end && !PetscBTLookup(table[proc_id], idx)) continue; /* no need for sending idx to [proc_id] */ 275 *data1_start[proc_id] = idx; 276 data1_start[proc_id]++; 277 len_s[proc_id]++; 278 } 279 } 280 /* update header data */ 281 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) { 282 if (proc_id == rank) continue; 283 *(data1 + proc_id * len + 1 + i) = len_s[proc_id] - iwork[proc_id]; 284 iwork[proc_id] = len_s[proc_id]; 285 } 286 PetscCall(ISRestoreIndices(is[i], &idx_i)); 287 } 288 289 nrqs = 0; 290 nrqr = 0; 291 for (i = 0; i < size; i++) { 292 data1_start[i] = data1 + i * len; 293 if (len_s[i]) { 294 nrqs++; 295 len_s[i] += 1 + is_max; /* add no. of header msg */ 296 } 297 } 298 299 for (i = 0; i < is_max; i++) PetscCall(ISDestroy(&is[i])); 300 PetscCall(PetscFree(n)); 301 PetscCall(PetscFree(ctable)); 302 303 /* Determine the number of messages to expect, their lengths, from from-ids */ 304 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &nrqr)); 305 PetscCall(PetscGatherMessageLengths(comm, nrqs, nrqr, len_s, &id_r1, &len_r1)); 306 307 /* Now post the sends */ 308 PetscCall(PetscMalloc2(size, &s_waits1, size, &s_waits2)); 309 k = 0; 310 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) { /* send data1 to processor [proc_id] */ 311 if (len_s[proc_id]) { 312 PetscCallMPI(MPIU_Isend(data1_start[proc_id], len_s[proc_id], MPIU_INT, proc_id, tag1, comm, s_waits1 + k)); 313 k++; 314 } 315 } 316 317 /* 2. Receive other's is[] and process. Then send back */ 318 len = 0; 319 for (i = 0; i < nrqr; i++) { 320 if (len_r1[i] > len) len = len_r1[i]; 321 } 322 PetscCall(PetscFree(len_r1)); 323 PetscCall(PetscFree(id_r1)); 324 325 for (PetscMPIInt proc_id = 0; proc_id < size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0; 326 327 PetscCall(PetscMalloc1(len + 1, &odata1)); 328 PetscCall(PetscMalloc1(size, &odata2_ptr)); 329 PetscCall(PetscBTCreate(Mbs, &otable)); 330 331 len_max = ois_max * (Mbs + 1); /* max space storing all is[] for each receive */ 332 len_est = 2 * len_max; /* estimated space of storing is[] for all receiving messages */ 333 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 334 nodata2 = 0; /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */ 335 336 odata2_ptr[nodata2] = odata2; 337 338 len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max */ 339 340 k = 0; 341 while (k < nrqr) { 342 PetscMPIInt ilen; 343 344 /* Receive messages */ 345 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &r_status)); 346 if (flag) { 347 PetscMPIInt proc_id; 348 349 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen)); 350 proc_id = r_status.MPI_SOURCE; 351 PetscCallMPI(MPIU_Irecv(odata1, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 352 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 353 354 /* Process messages */ 355 /* make sure there is enough unused space in odata2 array */ 356 if (len_unused < len_max) { /* allocate more space for odata2 */ 357 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 358 odata2_ptr[++nodata2] = odata2; 359 len_unused = len_est; 360 } 361 362 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, odata1, OTHER, odata2, &otable)); 363 len = 1 + odata2[0]; 364 for (i = 0; i < odata2[0]; i++) len += odata2[1 + i]; 365 366 /* Send messages back */ 367 PetscCallMPI(MPIU_Isend(odata2, len, MPIU_INT, proc_id, tag2, comm, s_waits2 + k)); 368 k++; 369 odata2 += len; 370 len_unused -= len; 371 PetscCall(PetscMPIIntCast(len, &len_s[proc_id])); /* length of message sending back to proc_id */ 372 } 373 } 374 PetscCall(PetscFree(odata1)); 375 PetscCall(PetscBTDestroy(&otable)); 376 377 /* 3. Do local work on this processor's is[] */ 378 /* make sure there is enough unused space in odata2(=data) array */ 379 len_max = is_max * (Mbs + 1); /* max space storing all is[] for this processor */ 380 if (len_unused < len_max) { /* allocate more space for odata2 */ 381 PetscCall(PetscMalloc1(len_est + 1, &odata2)); 382 383 odata2_ptr[++nodata2] = odata2; 384 } 385 386 data = odata2; 387 PetscCall(MatIncreaseOverlap_MPISBAIJ_Local(C, data1_start[rank], MINE, data, table)); 388 PetscCall(PetscFree(data1_start)); 389 390 /* 4. Receive work done on other processors, then merge */ 391 /* get max number of messages that this processor expects to recv */ 392 PetscCallMPI(MPIU_Allreduce(len_s, iwork, size, MPI_INT, MPI_MAX, comm)); 393 PetscCall(PetscMalloc1(iwork[rank] + 1, &data2)); 394 PetscCall(PetscFree4(len_s, btable, iwork, Bowners)); 395 396 k = 0; 397 while (k < nrqs) { 398 /* Receive messages */ 399 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag2, comm, &flag, &r_status)); 400 if (flag) { 401 PetscMPIInt proc_id, ilen; 402 PetscCallMPI(MPI_Get_count(&r_status, MPIU_INT, &ilen)); 403 proc_id = r_status.MPI_SOURCE; 404 PetscCallMPI(MPIU_Irecv(data2, ilen, MPIU_INT, proc_id, r_status.MPI_TAG, comm, &r_req)); 405 PetscCallMPI(MPI_Wait(&r_req, &r_status)); 406 if (ilen > 1 + is_max) { /* Add data2 into data */ 407 data2_i = data2 + 1 + is_max; 408 for (i = 0; i < is_max; i++) { 409 table_i = table[i]; 410 data_i = data + 1 + is_max + Mbs * i; 411 isz = data[1 + i]; 412 for (j = 0; j < data2[1 + i]; j++) { 413 col = data2_i[j]; 414 if (!PetscBTLookupSet(table_i, col)) data_i[isz++] = col; 415 } 416 data[1 + i] = isz; 417 if (i < is_max - 1) data2_i += data2[1 + i]; 418 } 419 } 420 k++; 421 } 422 } 423 PetscCall(PetscFree(data2)); 424 PetscCall(PetscFree2(table, t_p)); 425 426 /* phase 1 sends are complete */ 427 PetscCall(PetscMalloc1(size, &s_status)); 428 if (nrqs) PetscCallMPI(MPI_Waitall(nrqs, s_waits1, s_status)); 429 PetscCall(PetscFree(data1)); 430 431 /* phase 2 sends are complete */ 432 if (nrqr) PetscCallMPI(MPI_Waitall(nrqr, s_waits2, s_status)); 433 PetscCall(PetscFree2(s_waits1, s_waits2)); 434 PetscCall(PetscFree(s_status)); 435 436 /* 5. Create new is[] */ 437 for (i = 0; i < is_max; i++) { 438 data_i = data + 1 + is_max + Mbs * i; 439 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, data[1 + i], data_i, PETSC_COPY_VALUES, is + i)); 440 } 441 for (k = 0; k <= nodata2; k++) PetscCall(PetscFree(odata2_ptr[k])); 442 PetscCall(PetscFree(odata2_ptr)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /* 447 MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do 448 the work on the local processor. 449 450 Inputs: 451 C - MAT_MPISBAIJ; 452 data - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format. 453 whose - whose is[] to be processed, 454 MINE: this processor's is[] 455 OTHER: other processor's is[] 456 Output: 457 nidx - whose = MINE: 458 holds input and newly found indices in the same format as data 459 whose = OTHER: 460 only holds the newly found indices 461 table - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'. 462 */ 463 /* Would computation be reduced by swapping the loop 'for each is' and 'for each row'? */ 464 static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat C, PetscInt *data, PetscInt whose, PetscInt *nidx, PetscBT *table) 465 { 466 Mat_MPISBAIJ *c = (Mat_MPISBAIJ *)C->data; 467 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)c->A->data; 468 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)c->B->data; 469 PetscInt row, mbs, Mbs, *nidx_i, col, col_max, isz, isz0, *ai, *aj, *bi, *bj, *garray, rstart, l; 470 PetscInt a_start, a_end, b_start, b_end, i, j, k, is_max, *idx_i, n; 471 PetscBT table0; /* mark the indices of input is[] for look up */ 472 PetscBT table_i; /* points to i-th table. When whose=OTHER, a single table is used for all is[] */ 473 474 PetscFunctionBegin; 475 Mbs = c->Mbs; 476 mbs = a->mbs; 477 ai = a->i; 478 aj = a->j; 479 bi = b->i; 480 bj = b->j; 481 garray = c->garray; 482 rstart = c->rstartbs; 483 is_max = data[0]; 484 485 PetscCall(PetscBTCreate(Mbs, &table0)); 486 487 nidx[0] = is_max; 488 idx_i = data + is_max + 1; /* ptr to input is[0] array */ 489 nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ 490 for (i = 0; i < is_max; i++) { /* for each is */ 491 isz = 0; 492 n = data[1 + i]; /* size of input is[i] */ 493 494 /* initialize and set table_i(mark idx and nidx) and table0(only mark idx) */ 495 if (whose == MINE) { /* process this processor's is[] */ 496 table_i = table[i]; 497 nidx_i = nidx + 1 + is_max + Mbs * i; 498 } else { /* process other processor's is[] - only use one temp table */ 499 table_i = table[0]; 500 } 501 PetscCall(PetscBTMemzero(Mbs, table_i)); 502 PetscCall(PetscBTMemzero(Mbs, table0)); 503 if (n == 0) { 504 nidx[1 + i] = 0; /* size of new is[i] */ 505 continue; 506 } 507 508 isz0 = 0; 509 col_max = 0; 510 for (j = 0; j < n; j++) { 511 col = idx_i[j]; 512 PetscCheck(col < Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "index col %" PetscInt_FMT " >= Mbs %" PetscInt_FMT, col, Mbs); 513 if (!PetscBTLookupSet(table_i, col)) { 514 PetscCall(PetscBTSet(table0, col)); 515 if (whose == MINE) nidx_i[isz0] = col; 516 if (col_max < col) col_max = col; 517 isz0++; 518 } 519 } 520 521 if (whose == MINE) isz = isz0; 522 k = 0; /* no. of indices from input is[i] that have been examined */ 523 for (row = 0; row < mbs; row++) { 524 a_start = ai[row]; 525 a_end = ai[row + 1]; 526 b_start = bi[row]; 527 b_end = bi[row + 1]; 528 if (PetscBTLookup(table0, row + rstart)) { /* row is on input is[i]: 529 do row search: collect all col in this row */ 530 for (l = a_start; l < a_end; l++) { /* Amat */ 531 col = aj[l] + rstart; 532 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 533 } 534 for (l = b_start; l < b_end; l++) { /* Bmat */ 535 col = garray[bj[l]]; 536 if (!PetscBTLookupSet(table_i, col)) nidx_i[isz++] = col; 537 } 538 k++; 539 if (k >= isz0) break; /* for (row=0; row<mbs; row++) */ 540 } else { /* row is not on input is[i]: 541 do col search: add row onto nidx_i if there is a col in nidx_i */ 542 for (l = a_start; l < a_end; l++) { /* Amat */ 543 col = aj[l] + rstart; 544 if (col > col_max) break; 545 if (PetscBTLookup(table0, col)) { 546 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 547 break; /* for l = start; l<end ; l++) */ 548 } 549 } 550 for (l = b_start; l < b_end; l++) { /* Bmat */ 551 col = garray[bj[l]]; 552 if (col > col_max) break; 553 if (PetscBTLookup(table0, col)) { 554 if (!PetscBTLookupSet(table_i, row + rstart)) nidx_i[isz++] = row + rstart; 555 break; /* for l = start; l<end ; l++) */ 556 } 557 } 558 } 559 } 560 561 if (i < is_max - 1) { 562 idx_i += n; /* ptr to input is[i+1] array */ 563 nidx_i += isz; /* ptr to output is[i+1] array */ 564 } 565 nidx[1 + i] = isz; /* size of new is[i] */ 566 } /* for each is */ 567 PetscCall(PetscBTDestroy(&table0)); 568 PetscFunctionReturn(PETSC_SUCCESS); 569 } 570