1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) { 12 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 13 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 14 PetscScalar *va, *vv; 15 Vec vB, vA; 16 const PetscScalar *vb; 17 18 PetscFunctionBegin; 19 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 20 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 21 22 PetscCall(VecGetArrayWrite(vA, &va)); 23 if (idx) { 24 for (i = 0; i < m; i++) { 25 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 26 } 27 } 28 29 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 30 PetscCall(PetscMalloc1(m, &idxb)); 31 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 32 33 PetscCall(VecGetArrayWrite(v, &vv)); 34 PetscCall(VecGetArrayRead(vB, &vb)); 35 for (i = 0; i < m; i++) { 36 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 37 vv[i] = vb[i]; 38 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 39 } else { 40 vv[i] = va[i]; 41 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 42 } 43 } 44 PetscCall(VecRestoreArrayWrite(vA, &vv)); 45 PetscCall(VecRestoreArrayWrite(vA, &va)); 46 PetscCall(VecRestoreArrayRead(vB, &vb)); 47 PetscCall(PetscFree(idxb)); 48 PetscCall(VecDestroy(&vA)); 49 PetscCall(VecDestroy(&vB)); 50 PetscFunctionReturn(0); 51 } 52 53 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 56 PetscFunctionBegin; 57 PetscCall(MatStoreValues(aij->A)); 58 PetscCall(MatStoreValues(aij->B)); 59 PetscFunctionReturn(0); 60 } 61 62 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) { 63 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 64 65 PetscFunctionBegin; 66 PetscCall(MatRetrieveValues(aij->A)); 67 PetscCall(MatRetrieveValues(aij->B)); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) { 78 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 79 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 80 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap)); 85 for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES)); 86 #else 87 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 88 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 89 #endif 90 PetscFunctionReturn(0); 91 } 92 93 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 94 { \ 95 brow = row / bs; \ 96 rp = aj + ai[brow]; \ 97 ap = aa + bs2 * ai[brow]; \ 98 rmax = aimax[brow]; \ 99 nrow = ailen[brow]; \ 100 bcol = col / bs; \ 101 ridx = row % bs; \ 102 cidx = col % bs; \ 103 low = 0; \ 104 high = nrow; \ 105 while (high - low > 3) { \ 106 t = (low + high) / 2; \ 107 if (rp[t] > bcol) high = t; \ 108 else low = t; \ 109 } \ 110 for (_i = low; _i < high; _i++) { \ 111 if (rp[_i] > bcol) break; \ 112 if (rp[_i] == bcol) { \ 113 bap = ap + bs2 * _i + bs * cidx + ridx; \ 114 if (addv == ADD_VALUES) *bap += value; \ 115 else *bap = value; \ 116 goto a_noinsert; \ 117 } \ 118 } \ 119 if (a->nonew == 1) goto a_noinsert; \ 120 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 121 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 122 N = nrow++ - 1; \ 123 /* shift up all the later entries in this row */ \ 124 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 125 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 126 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 127 rp[_i] = bcol; \ 128 ap[bs2 * _i + bs * cidx + ridx] = value; \ 129 a_noinsert:; \ 130 ailen[brow] = nrow; \ 131 } 132 133 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 134 { \ 135 brow = row / bs; \ 136 rp = bj + bi[brow]; \ 137 ap = ba + bs2 * bi[brow]; \ 138 rmax = bimax[brow]; \ 139 nrow = bilen[brow]; \ 140 bcol = col / bs; \ 141 ridx = row % bs; \ 142 cidx = col % bs; \ 143 low = 0; \ 144 high = nrow; \ 145 while (high - low > 3) { \ 146 t = (low + high) / 2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i = low; _i < high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2 * _i + bs * cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 165 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 166 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2 * _i + bs * cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 174 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 175 MatScalar value; 176 PetscBool roworiented = baij->roworiented; 177 PetscInt i, j, row, col; 178 PetscInt rstart_orig = mat->rmap->rstart; 179 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 180 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 181 182 /* Some Variables required in the macro */ 183 Mat A = baij->A; 184 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 185 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 186 MatScalar *aa = a->a; 187 188 Mat B = baij->B; 189 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 190 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 191 MatScalar *ba = b->a; 192 193 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 194 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 195 MatScalar *ap, *bap; 196 197 PetscFunctionBegin; 198 for (i = 0; i < m; i++) { 199 if (im[i] < 0) continue; 200 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 201 if (im[i] >= rstart_orig && im[i] < rend_orig) { 202 row = im[i] - rstart_orig; 203 for (j = 0; j < n; j++) { 204 if (in[j] >= cstart_orig && in[j] < cend_orig) { 205 col = in[j] - cstart_orig; 206 if (roworiented) value = v[i * n + j]; 207 else value = v[i + j * m]; 208 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 209 } else if (in[j] < 0) { 210 continue; 211 } else { 212 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 213 if (mat->was_assembled) { 214 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 215 #if defined(PETSC_USE_CTABLE) 216 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 217 col = col - 1; 218 #else 219 col = baij->colmap[in[j] / bs] - 1; 220 #endif 221 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 222 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 223 col = in[j]; 224 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 225 B = baij->B; 226 b = (Mat_SeqBAIJ *)(B)->data; 227 bimax = b->imax; 228 bi = b->i; 229 bilen = b->ilen; 230 bj = b->j; 231 ba = b->a; 232 } else { 233 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234 col += in[j] % bs; 235 } 236 } else col = in[j]; 237 if (roworiented) value = v[i * n + j]; 238 else value = v[i + j * m]; 239 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 240 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 241 } 242 } 243 } else { 244 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 245 if (!baij->donotstash) { 246 mat->assembled = PETSC_FALSE; 247 if (roworiented) { 248 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 249 } else { 250 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 251 } 252 } 253 } 254 } 255 PetscFunctionReturn(0); 256 } 257 258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 260 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 261 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 262 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 263 PetscBool roworiented = a->roworiented; 264 const PetscScalar *value = v; 265 MatScalar *ap, *aa = a->a, *bap; 266 267 PetscFunctionBegin; 268 rp = aj + ai[row]; 269 ap = aa + bs2 * ai[row]; 270 rmax = imax[row]; 271 nrow = ailen[row]; 272 value = v; 273 low = 0; 274 high = nrow; 275 while (high - low > 7) { 276 t = (low + high) / 2; 277 if (rp[t] > col) high = t; 278 else low = t; 279 } 280 for (i = low; i < high; i++) { 281 if (rp[i] > col) break; 282 if (rp[i] == col) { 283 bap = ap + bs2 * i; 284 if (roworiented) { 285 if (is == ADD_VALUES) { 286 for (ii = 0; ii < bs; ii++) { 287 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 288 } 289 } else { 290 for (ii = 0; ii < bs; ii++) { 291 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 292 } 293 } 294 } else { 295 if (is == ADD_VALUES) { 296 for (ii = 0; ii < bs; ii++, value += bs) { 297 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 298 bap += bs; 299 } 300 } else { 301 for (ii = 0; ii < bs; ii++, value += bs) { 302 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 303 bap += bs; 304 } 305 } 306 } 307 goto noinsert2; 308 } 309 } 310 if (nonew == 1) goto noinsert2; 311 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 312 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 313 N = nrow++ - 1; 314 high++; 315 /* shift up all the later entries in this row */ 316 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 317 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 318 rp[i] = col; 319 bap = ap + bs2 * i; 320 if (roworiented) { 321 for (ii = 0; ii < bs; ii++) { 322 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 323 } 324 } else { 325 for (ii = 0; ii < bs; ii++) { 326 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 327 } 328 } 329 noinsert2:; 330 ailen[row] = nrow; 331 PetscFunctionReturn(0); 332 } 333 334 /* 335 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 336 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 337 */ 338 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 339 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 340 const PetscScalar *value; 341 MatScalar *barray = baij->barray; 342 PetscBool roworiented = baij->roworiented; 343 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 344 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 345 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 346 347 PetscFunctionBegin; 348 if (!barray) { 349 PetscCall(PetscMalloc1(bs2, &barray)); 350 baij->barray = barray; 351 } 352 353 if (roworiented) stepval = (n - 1) * bs; 354 else stepval = (m - 1) * bs; 355 356 for (i = 0; i < m; i++) { 357 if (im[i] < 0) continue; 358 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 359 if (im[i] >= rstart && im[i] < rend) { 360 row = im[i] - rstart; 361 for (j = 0; j < n; j++) { 362 /* If NumCol = 1 then a copy is not required */ 363 if ((roworiented) && (n == 1)) { 364 barray = (MatScalar *)v + i * bs2; 365 } else if ((!roworiented) && (m == 1)) { 366 barray = (MatScalar *)v + j * bs2; 367 } else { /* Here a copy is required */ 368 if (roworiented) { 369 value = v + (i * (stepval + bs) + j) * bs; 370 } else { 371 value = v + (j * (stepval + bs) + i) * bs; 372 } 373 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 374 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 375 barray += bs; 376 } 377 barray -= bs2; 378 } 379 380 if (in[j] >= cstart && in[j] < cend) { 381 col = in[j] - cstart; 382 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 383 } else if (in[j] < 0) { 384 continue; 385 } else { 386 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 387 if (mat->was_assembled) { 388 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 389 390 #if defined(PETSC_USE_DEBUG) 391 #if defined(PETSC_USE_CTABLE) 392 { 393 PetscInt data; 394 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 395 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 396 } 397 #else 398 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 399 #endif 400 #endif 401 #if defined(PETSC_USE_CTABLE) 402 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 403 col = (col - 1) / bs; 404 #else 405 col = (baij->colmap[in[j]] - 1) / bs; 406 #endif 407 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 408 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 409 col = in[j]; 410 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 411 } else col = in[j]; 412 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 413 } 414 } 415 } else { 416 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 417 if (!baij->donotstash) { 418 if (roworiented) { 419 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 420 } else { 421 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 422 } 423 } 424 } 425 } 426 PetscFunctionReturn(0); 427 } 428 429 #define HASH_KEY 0.6180339887 430 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 431 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 432 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 433 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 434 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 435 PetscBool roworiented = baij->roworiented; 436 PetscInt i, j, row, col; 437 PetscInt rstart_orig = mat->rmap->rstart; 438 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 439 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 440 PetscReal tmp; 441 MatScalar **HD = baij->hd, value; 442 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 443 444 PetscFunctionBegin; 445 for (i = 0; i < m; i++) { 446 if (PetscDefined(USE_DEBUG)) { 447 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 448 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 449 } 450 row = im[i]; 451 if (row >= rstart_orig && row < rend_orig) { 452 for (j = 0; j < n; j++) { 453 col = in[j]; 454 if (roworiented) value = v[i * n + j]; 455 else value = v[i + j * m]; 456 /* Look up PetscInto the Hash Table */ 457 key = (row / bs) * Nbs + (col / bs) + 1; 458 h1 = HASH(size, key, tmp); 459 460 idx = h1; 461 if (PetscDefined(USE_DEBUG)) { 462 insert_ct++; 463 total_ct++; 464 if (HT[idx] != key) { 465 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 466 ; 467 if (idx == size) { 468 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 469 ; 470 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 471 } 472 } 473 } else if (HT[idx] != key) { 474 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 475 ; 476 if (idx == size) { 477 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 478 ; 479 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 480 } 481 } 482 /* A HASH table entry is found, so insert the values at the correct address */ 483 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 484 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 485 } 486 } else if (!baij->donotstash) { 487 if (roworiented) { 488 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 489 } else { 490 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 491 } 492 } 493 } 494 if (PetscDefined(USE_DEBUG)) { 495 baij->ht_total_ct += total_ct; 496 baij->ht_insert_ct += insert_ct; 497 } 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 503 PetscBool roworiented = baij->roworiented; 504 PetscInt i, j, ii, jj, row, col; 505 PetscInt rstart = baij->rstartbs; 506 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 507 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 508 PetscReal tmp; 509 MatScalar **HD = baij->hd, *baij_a; 510 const PetscScalar *v_t, *value; 511 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 512 513 PetscFunctionBegin; 514 if (roworiented) stepval = (n - 1) * bs; 515 else stepval = (m - 1) * bs; 516 517 for (i = 0; i < m; i++) { 518 if (PetscDefined(USE_DEBUG)) { 519 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 520 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 521 } 522 row = im[i]; 523 v_t = v + i * nbs2; 524 if (row >= rstart && row < rend) { 525 for (j = 0; j < n; j++) { 526 col = in[j]; 527 528 /* Look up into the Hash Table */ 529 key = row * Nbs + col + 1; 530 h1 = HASH(size, key, tmp); 531 532 idx = h1; 533 if (PetscDefined(USE_DEBUG)) { 534 total_ct++; 535 insert_ct++; 536 if (HT[idx] != key) { 537 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 538 ; 539 if (idx == size) { 540 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 541 ; 542 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 543 } 544 } 545 } else if (HT[idx] != key) { 546 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 547 ; 548 if (idx == size) { 549 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 550 ; 551 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 552 } 553 } 554 baij_a = HD[idx]; 555 if (roworiented) { 556 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 557 /* value = v + (i*(stepval+bs)+j)*bs; */ 558 value = v_t; 559 v_t += bs; 560 if (addv == ADD_VALUES) { 561 for (ii = 0; ii < bs; ii++, value += stepval) { 562 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 563 } 564 } else { 565 for (ii = 0; ii < bs; ii++, value += stepval) { 566 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 567 } 568 } 569 } else { 570 value = v + j * (stepval + bs) * bs + i * bs; 571 if (addv == ADD_VALUES) { 572 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 573 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 574 } 575 } else { 576 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 577 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 578 } 579 } 580 } 581 } 582 } else { 583 if (!baij->donotstash) { 584 if (roworiented) { 585 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 586 } else { 587 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 588 } 589 } 590 } 591 } 592 if (PetscDefined(USE_DEBUG)) { 593 baij->ht_total_ct += total_ct; 594 baij->ht_insert_ct += insert_ct; 595 } 596 PetscFunctionReturn(0); 597 } 598 599 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 600 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 601 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 602 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 603 604 PetscFunctionBegin; 605 for (i = 0; i < m; i++) { 606 if (idxm[i] < 0) continue; /* negative row */ 607 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 608 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 609 row = idxm[i] - bsrstart; 610 for (j = 0; j < n; j++) { 611 if (idxn[j] < 0) continue; /* negative column */ 612 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 613 if (idxn[j] >= bscstart && idxn[j] < bscend) { 614 col = idxn[j] - bscstart; 615 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 616 } else { 617 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 618 #if defined(PETSC_USE_CTABLE) 619 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 620 data--; 621 #else 622 data = baij->colmap[idxn[j] / bs] - 1; 623 #endif 624 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 625 else { 626 col = data + idxn[j] % bs; 627 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 628 } 629 } 630 } 631 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 632 } 633 PetscFunctionReturn(0); 634 } 635 636 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) { 637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 638 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 639 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 640 PetscReal sum = 0.0; 641 MatScalar *v; 642 643 PetscFunctionBegin; 644 if (baij->size == 1) { 645 PetscCall(MatNorm(baij->A, type, nrm)); 646 } else { 647 if (type == NORM_FROBENIUS) { 648 v = amat->a; 649 nz = amat->nz * bs2; 650 for (i = 0; i < nz; i++) { 651 sum += PetscRealPart(PetscConj(*v) * (*v)); 652 v++; 653 } 654 v = bmat->a; 655 nz = bmat->nz * bs2; 656 for (i = 0; i < nz; i++) { 657 sum += PetscRealPart(PetscConj(*v) * (*v)); 658 v++; 659 } 660 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 661 *nrm = PetscSqrtReal(*nrm); 662 } else if (type == NORM_1) { /* max column sum */ 663 PetscReal *tmp, *tmp2; 664 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 665 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 666 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 667 v = amat->a; 668 jj = amat->j; 669 for (i = 0; i < amat->nz; i++) { 670 for (j = 0; j < bs; j++) { 671 col = bs * (cstart + *jj) + j; /* column index */ 672 for (row = 0; row < bs; row++) { 673 tmp[col] += PetscAbsScalar(*v); 674 v++; 675 } 676 } 677 jj++; 678 } 679 v = bmat->a; 680 jj = bmat->j; 681 for (i = 0; i < bmat->nz; i++) { 682 for (j = 0; j < bs; j++) { 683 col = bs * garray[*jj] + j; 684 for (row = 0; row < bs; row++) { 685 tmp[col] += PetscAbsScalar(*v); 686 v++; 687 } 688 } 689 jj++; 690 } 691 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 692 *nrm = 0.0; 693 for (j = 0; j < mat->cmap->N; j++) { 694 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 695 } 696 PetscCall(PetscFree(tmp)); 697 PetscCall(PetscFree(tmp2)); 698 } else if (type == NORM_INFINITY) { /* max row sum */ 699 PetscReal *sums; 700 PetscCall(PetscMalloc1(bs, &sums)); 701 sum = 0.0; 702 for (j = 0; j < amat->mbs; j++) { 703 for (row = 0; row < bs; row++) sums[row] = 0.0; 704 v = amat->a + bs2 * amat->i[j]; 705 nz = amat->i[j + 1] - amat->i[j]; 706 for (i = 0; i < nz; i++) { 707 for (col = 0; col < bs; col++) { 708 for (row = 0; row < bs; row++) { 709 sums[row] += PetscAbsScalar(*v); 710 v++; 711 } 712 } 713 } 714 v = bmat->a + bs2 * bmat->i[j]; 715 nz = bmat->i[j + 1] - bmat->i[j]; 716 for (i = 0; i < nz; i++) { 717 for (col = 0; col < bs; col++) { 718 for (row = 0; row < bs; row++) { 719 sums[row] += PetscAbsScalar(*v); 720 v++; 721 } 722 } 723 } 724 for (row = 0; row < bs; row++) { 725 if (sums[row] > sum) sum = sums[row]; 726 } 727 } 728 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 729 PetscCall(PetscFree(sums)); 730 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 731 } 732 PetscFunctionReturn(0); 733 } 734 735 /* 736 Creates the hash table, and sets the table 737 This table is created only once. 738 If new entried need to be added to the matrix 739 then the hash table has to be destroyed and 740 recreated. 741 */ 742 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) { 743 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 744 Mat A = baij->A, B = baij->B; 745 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 746 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 747 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 748 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 749 PetscInt *HT, key; 750 MatScalar **HD; 751 PetscReal tmp; 752 #if defined(PETSC_USE_INFO) 753 PetscInt ct = 0, max = 0; 754 #endif 755 756 PetscFunctionBegin; 757 if (baij->ht) PetscFunctionReturn(0); 758 759 baij->ht_size = (PetscInt)(factor * nz); 760 ht_size = baij->ht_size; 761 762 /* Allocate Memory for Hash Table */ 763 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 764 HD = baij->hd; 765 HT = baij->ht; 766 767 /* Loop Over A */ 768 for (i = 0; i < a->mbs; i++) { 769 for (j = ai[i]; j < ai[i + 1]; j++) { 770 row = i + rstart; 771 col = aj[j] + cstart; 772 773 key = row * Nbs + col + 1; 774 h1 = HASH(ht_size, key, tmp); 775 for (k = 0; k < ht_size; k++) { 776 if (!HT[(h1 + k) % ht_size]) { 777 HT[(h1 + k) % ht_size] = key; 778 HD[(h1 + k) % ht_size] = a->a + j * bs2; 779 break; 780 #if defined(PETSC_USE_INFO) 781 } else { 782 ct++; 783 #endif 784 } 785 } 786 #if defined(PETSC_USE_INFO) 787 if (k > max) max = k; 788 #endif 789 } 790 } 791 /* Loop Over B */ 792 for (i = 0; i < b->mbs; i++) { 793 for (j = bi[i]; j < bi[i + 1]; j++) { 794 row = i + rstart; 795 col = garray[bj[j]]; 796 key = row * Nbs + col + 1; 797 h1 = HASH(ht_size, key, tmp); 798 for (k = 0; k < ht_size; k++) { 799 if (!HT[(h1 + k) % ht_size]) { 800 HT[(h1 + k) % ht_size] = key; 801 HD[(h1 + k) % ht_size] = b->a + j * bs2; 802 break; 803 #if defined(PETSC_USE_INFO) 804 } else { 805 ct++; 806 #endif 807 } 808 } 809 #if defined(PETSC_USE_INFO) 810 if (k > max) max = k; 811 #endif 812 } 813 } 814 815 /* Print Summary */ 816 #if defined(PETSC_USE_INFO) 817 for (i = 0, j = 0; i < ht_size; i++) { 818 if (HT[i]) j++; 819 } 820 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 821 #endif 822 PetscFunctionReturn(0); 823 } 824 825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) { 826 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 827 PetscInt nstash, reallocs; 828 829 PetscFunctionBegin; 830 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 831 832 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 833 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 834 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 835 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 836 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 837 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 838 PetscFunctionReturn(0); 839 } 840 841 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) { 842 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 844 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 845 PetscInt *row, *col; 846 PetscBool r1, r2, r3, other_disassembled; 847 MatScalar *val; 848 PetscMPIInt n; 849 850 PetscFunctionBegin; 851 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 852 if (!baij->donotstash && !mat->nooffprocentries) { 853 while (1) { 854 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 855 if (!flg) break; 856 857 for (i = 0; i < n;) { 858 /* Now identify the consecutive vals belonging to the same row */ 859 for (j = i, rstart = row[j]; j < n; j++) { 860 if (row[j] != rstart) break; 861 } 862 if (j < n) ncols = j - i; 863 else ncols = n - i; 864 /* Now assemble all these values with a single function call */ 865 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 866 i = j; 867 } 868 } 869 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 870 /* Now process the block-stash. Since the values are stashed column-oriented, 871 set the roworiented flag to column oriented, and after MatSetValues() 872 restore the original flags */ 873 r1 = baij->roworiented; 874 r2 = a->roworiented; 875 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 876 877 baij->roworiented = PETSC_FALSE; 878 a->roworiented = PETSC_FALSE; 879 880 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 881 while (1) { 882 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 883 if (!flg) break; 884 885 for (i = 0; i < n;) { 886 /* Now identify the consecutive vals belonging to the same row */ 887 for (j = i, rstart = row[j]; j < n; j++) { 888 if (row[j] != rstart) break; 889 } 890 if (j < n) ncols = j - i; 891 else ncols = n - i; 892 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 893 i = j; 894 } 895 } 896 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 897 898 baij->roworiented = r1; 899 a->roworiented = r2; 900 901 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 902 } 903 904 PetscCall(MatAssemblyBegin(baij->A, mode)); 905 PetscCall(MatAssemblyEnd(baij->A, mode)); 906 907 /* determine if any processor has disassembled, if so we must 908 also disassemble ourselves, in order that we may reassemble. */ 909 /* 910 if nonzero structure of submatrix B cannot change then we know that 911 no processor disassembled thus we can skip this stuff 912 */ 913 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 914 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 915 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 916 } 917 918 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 919 PetscCall(MatAssemblyBegin(baij->B, mode)); 920 PetscCall(MatAssemblyEnd(baij->B, mode)); 921 922 #if defined(PETSC_USE_INFO) 923 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 924 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 925 926 baij->ht_total_ct = 0; 927 baij->ht_insert_ct = 0; 928 } 929 #endif 930 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 931 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 932 933 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 934 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 935 } 936 937 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 938 939 baij->rowvalues = NULL; 940 941 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 942 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 943 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 944 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 945 } 946 PetscFunctionReturn(0); 947 } 948 949 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 950 #include <petscdraw.h> 951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 952 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 953 PetscMPIInt rank = baij->rank; 954 PetscInt bs = mat->rmap->bs; 955 PetscBool iascii, isdraw; 956 PetscViewer sviewer; 957 PetscViewerFormat format; 958 959 PetscFunctionBegin; 960 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 961 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 962 if (iascii) { 963 PetscCall(PetscViewerGetFormat(viewer, &format)); 964 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 965 MatInfo info; 966 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 967 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 968 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 969 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 970 mat->rmap->bs, (double)info.memory)); 971 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 972 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 973 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 974 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 975 PetscCall(PetscViewerFlush(viewer)); 976 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 977 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 978 PetscCall(VecScatterView(baij->Mvctx, viewer)); 979 PetscFunctionReturn(0); 980 } else if (format == PETSC_VIEWER_ASCII_INFO) { 981 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 982 PetscFunctionReturn(0); 983 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 984 PetscFunctionReturn(0); 985 } 986 } 987 988 if (isdraw) { 989 PetscDraw draw; 990 PetscBool isnull; 991 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 992 PetscCall(PetscDrawIsNull(draw, &isnull)); 993 if (isnull) PetscFunctionReturn(0); 994 } 995 996 { 997 /* assemble the entire matrix onto first processor. */ 998 Mat A; 999 Mat_SeqBAIJ *Aloc; 1000 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1001 MatScalar *a; 1002 const char *matname; 1003 1004 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1005 /* Perhaps this should be the type of mat? */ 1006 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1007 if (rank == 0) { 1008 PetscCall(MatSetSizes(A, M, N, M, N)); 1009 } else { 1010 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1011 } 1012 PetscCall(MatSetType(A, MATMPIBAIJ)); 1013 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1014 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1015 1016 /* copy over the A part */ 1017 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1018 ai = Aloc->i; 1019 aj = Aloc->j; 1020 a = Aloc->a; 1021 PetscCall(PetscMalloc1(bs, &rvals)); 1022 1023 for (i = 0; i < mbs; i++) { 1024 rvals[0] = bs * (baij->rstartbs + i); 1025 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1026 for (j = ai[i]; j < ai[i + 1]; j++) { 1027 col = (baij->cstartbs + aj[j]) * bs; 1028 for (k = 0; k < bs; k++) { 1029 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1030 col++; 1031 a += bs; 1032 } 1033 } 1034 } 1035 /* copy over the B part */ 1036 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1037 ai = Aloc->i; 1038 aj = Aloc->j; 1039 a = Aloc->a; 1040 for (i = 0; i < mbs; i++) { 1041 rvals[0] = bs * (baij->rstartbs + i); 1042 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1043 for (j = ai[i]; j < ai[i + 1]; j++) { 1044 col = baij->garray[aj[j]] * bs; 1045 for (k = 0; k < bs; k++) { 1046 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1047 col++; 1048 a += bs; 1049 } 1050 } 1051 } 1052 PetscCall(PetscFree(rvals)); 1053 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1054 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1055 /* 1056 Everyone has to call to draw the matrix since the graphics waits are 1057 synchronized across all processors that share the PetscDraw object 1058 */ 1059 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1060 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1061 if (rank == 0) { 1062 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1063 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1064 } 1065 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1066 PetscCall(PetscViewerFlush(viewer)); 1067 PetscCall(MatDestroy(&A)); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1073 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 1074 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1075 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1076 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1077 const PetscInt *garray = aij->garray; 1078 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1079 PetscInt *rowlens, *colidxs; 1080 PetscScalar *matvals; 1081 1082 PetscFunctionBegin; 1083 PetscCall(PetscViewerSetUp(viewer)); 1084 1085 M = mat->rmap->N; 1086 N = mat->cmap->N; 1087 m = mat->rmap->n; 1088 rs = mat->rmap->rstart; 1089 cs = mat->cmap->rstart; 1090 bs = mat->rmap->bs; 1091 nz = bs * bs * (A->nz + B->nz); 1092 1093 /* write matrix header */ 1094 header[0] = MAT_FILE_CLASSID; 1095 header[1] = M; 1096 header[2] = N; 1097 header[3] = nz; 1098 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1099 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1100 1101 /* fill in and store row lengths */ 1102 PetscCall(PetscMalloc1(m, &rowlens)); 1103 for (cnt = 0, i = 0; i < A->mbs; i++) 1104 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1105 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1106 PetscCall(PetscFree(rowlens)); 1107 1108 /* fill in and store column indices */ 1109 PetscCall(PetscMalloc1(nz, &colidxs)); 1110 for (cnt = 0, i = 0; i < A->mbs; i++) { 1111 for (k = 0; k < bs; k++) { 1112 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1113 if (garray[B->j[jb]] > cs / bs) break; 1114 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1115 } 1116 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1117 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1118 for (; jb < B->i[i + 1]; jb++) 1119 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1120 } 1121 } 1122 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1123 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1124 PetscCall(PetscFree(colidxs)); 1125 1126 /* fill in and store nonzero values */ 1127 PetscCall(PetscMalloc1(nz, &matvals)); 1128 for (cnt = 0, i = 0; i < A->mbs; i++) { 1129 for (k = 0; k < bs; k++) { 1130 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1131 if (garray[B->j[jb]] > cs / bs) break; 1132 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1133 } 1134 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1135 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1136 for (; jb < B->i[i + 1]; jb++) 1137 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1138 } 1139 } 1140 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1141 PetscCall(PetscFree(matvals)); 1142 1143 /* write block size option to the viewer's .info file */ 1144 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) { 1149 PetscBool iascii, isdraw, issocket, isbinary; 1150 1151 PetscFunctionBegin; 1152 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1153 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1154 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1155 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1156 if (iascii || isdraw || issocket) { 1157 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1158 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) { 1163 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1164 1165 PetscFunctionBegin; 1166 #if defined(PETSC_USE_LOG) 1167 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1168 #endif 1169 PetscCall(MatStashDestroy_Private(&mat->stash)); 1170 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1171 PetscCall(MatDestroy(&baij->A)); 1172 PetscCall(MatDestroy(&baij->B)); 1173 #if defined(PETSC_USE_CTABLE) 1174 PetscCall(PetscTableDestroy(&baij->colmap)); 1175 #else 1176 PetscCall(PetscFree(baij->colmap)); 1177 #endif 1178 PetscCall(PetscFree(baij->garray)); 1179 PetscCall(VecDestroy(&baij->lvec)); 1180 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1181 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1182 PetscCall(PetscFree(baij->barray)); 1183 PetscCall(PetscFree2(baij->hd, baij->ht)); 1184 PetscCall(PetscFree(baij->rangebs)); 1185 PetscCall(PetscFree(mat->data)); 1186 1187 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 1197 #if defined(PETSC_HAVE_HYPRE) 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 1199 #endif 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1205 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1206 PetscInt nt; 1207 1208 PetscFunctionBegin; 1209 PetscCall(VecGetLocalSize(xx, &nt)); 1210 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1211 PetscCall(VecGetLocalSize(yy, &nt)); 1212 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy"); 1213 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1214 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1215 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1216 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1222 1223 PetscFunctionBegin; 1224 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1225 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1226 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1227 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1232 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1233 1234 PetscFunctionBegin; 1235 /* do nondiagonal part */ 1236 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1237 /* do local part */ 1238 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1239 /* add partial results together */ 1240 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1241 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1247 1248 PetscFunctionBegin; 1249 /* do nondiagonal part */ 1250 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1251 /* do local part */ 1252 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1253 /* add partial results together */ 1254 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1255 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 /* 1260 This only works correctly for square matrices where the subblock A->A is the 1261 diagonal block 1262 */ 1263 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) { 1264 PetscFunctionBegin; 1265 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1266 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1272 1273 PetscFunctionBegin; 1274 PetscCall(MatScale(a->A, aa)); 1275 PetscCall(MatScale(a->B, aa)); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1280 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1281 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1282 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1283 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1284 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1285 1286 PetscFunctionBegin; 1287 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1288 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1289 mat->getrowactive = PETSC_TRUE; 1290 1291 if (!mat->rowvalues && (idx || v)) { 1292 /* 1293 allocate enough space to hold information from the longest row. 1294 */ 1295 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1296 PetscInt max = 1, mbs = mat->mbs, tmp; 1297 for (i = 0; i < mbs; i++) { 1298 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1299 if (max < tmp) max = tmp; 1300 } 1301 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1302 } 1303 lrow = row - brstart; 1304 1305 pvA = &vworkA; 1306 pcA = &cworkA; 1307 pvB = &vworkB; 1308 pcB = &cworkB; 1309 if (!v) { 1310 pvA = NULL; 1311 pvB = NULL; 1312 } 1313 if (!idx) { 1314 pcA = NULL; 1315 if (!v) pcB = NULL; 1316 } 1317 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1318 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1319 nztot = nzA + nzB; 1320 1321 cmap = mat->garray; 1322 if (v || idx) { 1323 if (nztot) { 1324 /* Sort by increasing column numbers, assuming A and B already sorted */ 1325 PetscInt imark = -1; 1326 if (v) { 1327 *v = v_p = mat->rowvalues; 1328 for (i = 0; i < nzB; i++) { 1329 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1330 else break; 1331 } 1332 imark = i; 1333 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1334 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1335 } 1336 if (idx) { 1337 *idx = idx_p = mat->rowindices; 1338 if (imark > -1) { 1339 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1340 } else { 1341 for (i = 0; i < nzB; i++) { 1342 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1343 else break; 1344 } 1345 imark = i; 1346 } 1347 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1348 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1349 } 1350 } else { 1351 if (idx) *idx = NULL; 1352 if (v) *v = NULL; 1353 } 1354 } 1355 *nz = nztot; 1356 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1357 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1358 PetscFunctionReturn(0); 1359 } 1360 1361 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1362 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1363 1364 PetscFunctionBegin; 1365 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1366 baij->getrowactive = PETSC_FALSE; 1367 PetscFunctionReturn(0); 1368 } 1369 1370 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) { 1371 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1372 1373 PetscFunctionBegin; 1374 PetscCall(MatZeroEntries(l->A)); 1375 PetscCall(MatZeroEntries(l->B)); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1380 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1381 Mat A = a->A, B = a->B; 1382 PetscLogDouble isend[5], irecv[5]; 1383 1384 PetscFunctionBegin; 1385 info->block_size = (PetscReal)matin->rmap->bs; 1386 1387 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1388 1389 isend[0] = info->nz_used; 1390 isend[1] = info->nz_allocated; 1391 isend[2] = info->nz_unneeded; 1392 isend[3] = info->memory; 1393 isend[4] = info->mallocs; 1394 1395 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1396 1397 isend[0] += info->nz_used; 1398 isend[1] += info->nz_allocated; 1399 isend[2] += info->nz_unneeded; 1400 isend[3] += info->memory; 1401 isend[4] += info->mallocs; 1402 1403 if (flag == MAT_LOCAL) { 1404 info->nz_used = isend[0]; 1405 info->nz_allocated = isend[1]; 1406 info->nz_unneeded = isend[2]; 1407 info->memory = isend[3]; 1408 info->mallocs = isend[4]; 1409 } else if (flag == MAT_GLOBAL_MAX) { 1410 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1411 1412 info->nz_used = irecv[0]; 1413 info->nz_allocated = irecv[1]; 1414 info->nz_unneeded = irecv[2]; 1415 info->memory = irecv[3]; 1416 info->mallocs = irecv[4]; 1417 } else if (flag == MAT_GLOBAL_SUM) { 1418 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1419 1420 info->nz_used = irecv[0]; 1421 info->nz_allocated = irecv[1]; 1422 info->nz_unneeded = irecv[2]; 1423 info->memory = irecv[3]; 1424 info->mallocs = irecv[4]; 1425 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1426 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1427 info->fill_ratio_needed = 0; 1428 info->factor_mallocs = 0; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) { 1433 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1434 1435 PetscFunctionBegin; 1436 switch (op) { 1437 case MAT_NEW_NONZERO_LOCATIONS: 1438 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1439 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1440 case MAT_KEEP_NONZERO_PATTERN: 1441 case MAT_NEW_NONZERO_LOCATION_ERR: 1442 MatCheckPreallocated(A, 1); 1443 PetscCall(MatSetOption(a->A, op, flg)); 1444 PetscCall(MatSetOption(a->B, op, flg)); 1445 break; 1446 case MAT_ROW_ORIENTED: 1447 MatCheckPreallocated(A, 1); 1448 a->roworiented = flg; 1449 1450 PetscCall(MatSetOption(a->A, op, flg)); 1451 PetscCall(MatSetOption(a->B, op, flg)); 1452 break; 1453 case MAT_FORCE_DIAGONAL_ENTRIES: 1454 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1455 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1456 case MAT_USE_HASH_TABLE: 1457 a->ht_flag = flg; 1458 a->ht_fact = 1.39; 1459 break; 1460 case MAT_SYMMETRIC: 1461 case MAT_STRUCTURALLY_SYMMETRIC: 1462 case MAT_HERMITIAN: 1463 case MAT_SUBMAT_SINGLEIS: 1464 case MAT_SYMMETRY_ETERNAL: 1465 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1466 case MAT_SPD_ETERNAL: 1467 /* if the diagonal matrix is square it inherits some of the properties above */ 1468 break; 1469 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1470 } 1471 PetscFunctionReturn(0); 1472 } 1473 1474 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) { 1475 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1476 Mat_SeqBAIJ *Aloc; 1477 Mat B; 1478 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1479 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1480 MatScalar *a; 1481 1482 PetscFunctionBegin; 1483 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1484 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1485 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1486 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1487 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1488 /* Do not know preallocation information, but must set block size */ 1489 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1490 } else { 1491 B = *matout; 1492 } 1493 1494 /* copy over the A part */ 1495 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1496 ai = Aloc->i; 1497 aj = Aloc->j; 1498 a = Aloc->a; 1499 PetscCall(PetscMalloc1(bs, &rvals)); 1500 1501 for (i = 0; i < mbs; i++) { 1502 rvals[0] = bs * (baij->rstartbs + i); 1503 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1504 for (j = ai[i]; j < ai[i + 1]; j++) { 1505 col = (baij->cstartbs + aj[j]) * bs; 1506 for (k = 0; k < bs; k++) { 1507 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1508 1509 col++; 1510 a += bs; 1511 } 1512 } 1513 } 1514 /* copy over the B part */ 1515 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1516 ai = Aloc->i; 1517 aj = Aloc->j; 1518 a = Aloc->a; 1519 for (i = 0; i < mbs; i++) { 1520 rvals[0] = bs * (baij->rstartbs + i); 1521 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1522 for (j = ai[i]; j < ai[i + 1]; j++) { 1523 col = baij->garray[aj[j]] * bs; 1524 for (k = 0; k < bs; k++) { 1525 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1526 col++; 1527 a += bs; 1528 } 1529 } 1530 } 1531 PetscCall(PetscFree(rvals)); 1532 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1533 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1534 1535 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1536 else PetscCall(MatHeaderMerge(A, &B)); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) { 1541 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1542 Mat a = baij->A, b = baij->B; 1543 PetscInt s1, s2, s3; 1544 1545 PetscFunctionBegin; 1546 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1547 if (rr) { 1548 PetscCall(VecGetLocalSize(rr, &s1)); 1549 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1550 /* Overlap communication with computation. */ 1551 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1552 } 1553 if (ll) { 1554 PetscCall(VecGetLocalSize(ll, &s1)); 1555 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1556 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1557 } 1558 /* scale the diagonal block */ 1559 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1560 1561 if (rr) { 1562 /* Do a scatter end and then right scale the off-diagonal block */ 1563 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1564 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1565 } 1566 PetscFunctionReturn(0); 1567 } 1568 1569 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 1570 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1571 PetscInt *lrows; 1572 PetscInt r, len; 1573 PetscBool cong; 1574 1575 PetscFunctionBegin; 1576 /* get locally owned rows */ 1577 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1578 /* fix right hand side if needed */ 1579 if (x && b) { 1580 const PetscScalar *xx; 1581 PetscScalar *bb; 1582 1583 PetscCall(VecGetArrayRead(x, &xx)); 1584 PetscCall(VecGetArray(b, &bb)); 1585 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1586 PetscCall(VecRestoreArrayRead(x, &xx)); 1587 PetscCall(VecRestoreArray(b, &bb)); 1588 } 1589 1590 /* actually zap the local rows */ 1591 /* 1592 Zero the required rows. If the "diagonal block" of the matrix 1593 is square and the user wishes to set the diagonal we use separate 1594 code so that MatSetValues() is not called for each diagonal allocating 1595 new memory, thus calling lots of mallocs and slowing things down. 1596 1597 */ 1598 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1599 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1600 PetscCall(MatHasCongruentLayouts(A, &cong)); 1601 if ((diag != 0.0) && cong) { 1602 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1603 } else if (diag != 0.0) { 1604 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1605 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1606 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1607 for (r = 0; r < len; ++r) { 1608 const PetscInt row = lrows[r] + A->rmap->rstart; 1609 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1610 } 1611 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1612 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1613 } else { 1614 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1615 } 1616 PetscCall(PetscFree(lrows)); 1617 1618 /* only change matrix nonzero state if pattern was allowed to be changed */ 1619 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1620 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1621 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1622 } 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 1627 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1628 PetscMPIInt n = A->rmap->n, p = 0; 1629 PetscInt i, j, k, r, len = 0, row, col, count; 1630 PetscInt *lrows, *owners = A->rmap->range; 1631 PetscSFNode *rrows; 1632 PetscSF sf; 1633 const PetscScalar *xx; 1634 PetscScalar *bb, *mask; 1635 Vec xmask, lmask; 1636 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1637 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1638 PetscScalar *aa; 1639 1640 PetscFunctionBegin; 1641 /* Create SF where leaves are input rows and roots are owned rows */ 1642 PetscCall(PetscMalloc1(n, &lrows)); 1643 for (r = 0; r < n; ++r) lrows[r] = -1; 1644 PetscCall(PetscMalloc1(N, &rrows)); 1645 for (r = 0; r < N; ++r) { 1646 const PetscInt idx = rows[r]; 1647 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1648 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1649 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1650 } 1651 rrows[r].rank = p; 1652 rrows[r].index = rows[r] - owners[p]; 1653 } 1654 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1655 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1656 /* Collect flags for rows to be zeroed */ 1657 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1658 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1659 PetscCall(PetscSFDestroy(&sf)); 1660 /* Compress and put in row numbers */ 1661 for (r = 0; r < n; ++r) 1662 if (lrows[r] >= 0) lrows[len++] = r; 1663 /* zero diagonal part of matrix */ 1664 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1665 /* handle off diagonal part of matrix */ 1666 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1667 PetscCall(VecDuplicate(l->lvec, &lmask)); 1668 PetscCall(VecGetArray(xmask, &bb)); 1669 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1670 PetscCall(VecRestoreArray(xmask, &bb)); 1671 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1672 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1673 PetscCall(VecDestroy(&xmask)); 1674 if (x) { 1675 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1676 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1677 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1678 PetscCall(VecGetArray(b, &bb)); 1679 } 1680 PetscCall(VecGetArray(lmask, &mask)); 1681 /* remove zeroed rows of off diagonal matrix */ 1682 for (i = 0; i < len; ++i) { 1683 row = lrows[i]; 1684 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1685 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1686 for (k = 0; k < count; ++k) { 1687 aa[0] = 0.0; 1688 aa += bs; 1689 } 1690 } 1691 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1692 for (i = 0; i < l->B->rmap->N; ++i) { 1693 row = i / bs; 1694 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1695 for (k = 0; k < bs; ++k) { 1696 col = bs * baij->j[j] + k; 1697 if (PetscAbsScalar(mask[col])) { 1698 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1699 if (x) bb[i] -= aa[0] * xx[col]; 1700 aa[0] = 0.0; 1701 } 1702 } 1703 } 1704 } 1705 if (x) { 1706 PetscCall(VecRestoreArray(b, &bb)); 1707 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1708 } 1709 PetscCall(VecRestoreArray(lmask, &mask)); 1710 PetscCall(VecDestroy(&lmask)); 1711 PetscCall(PetscFree(lrows)); 1712 1713 /* only change matrix nonzero state if pattern was allowed to be changed */ 1714 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1715 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1716 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) { 1722 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1723 1724 PetscFunctionBegin; 1725 PetscCall(MatSetUnfactored(a->A)); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1730 1731 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) { 1732 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1733 Mat a, b, c, d; 1734 PetscBool flg; 1735 1736 PetscFunctionBegin; 1737 a = matA->A; 1738 b = matA->B; 1739 c = matB->A; 1740 d = matB->B; 1741 1742 PetscCall(MatEqual(a, c, &flg)); 1743 if (flg) PetscCall(MatEqual(b, d, &flg)); 1744 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) { 1749 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1750 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1751 1752 PetscFunctionBegin; 1753 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1754 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1755 PetscCall(MatCopy_Basic(A, B, str)); 1756 } else { 1757 PetscCall(MatCopy(a->A, b->A, str)); 1758 PetscCall(MatCopy(a->B, b->B, str)); 1759 } 1760 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) { 1765 PetscFunctionBegin; 1766 PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 1771 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1772 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1773 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1774 1775 PetscFunctionBegin; 1776 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1777 PetscFunctionReturn(0); 1778 } 1779 1780 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 1781 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1782 PetscBLASInt bnz, one = 1; 1783 Mat_SeqBAIJ *x, *y; 1784 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1785 1786 PetscFunctionBegin; 1787 if (str == SAME_NONZERO_PATTERN) { 1788 PetscScalar alpha = a; 1789 x = (Mat_SeqBAIJ *)xx->A->data; 1790 y = (Mat_SeqBAIJ *)yy->A->data; 1791 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1792 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1793 x = (Mat_SeqBAIJ *)xx->B->data; 1794 y = (Mat_SeqBAIJ *)yy->B->data; 1795 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1796 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1797 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1798 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1799 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1800 } else { 1801 Mat B; 1802 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1803 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1804 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1805 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1806 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1807 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1808 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1809 PetscCall(MatSetType(B, MATMPIBAIJ)); 1810 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1811 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1812 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1813 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1814 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1815 PetscCall(MatHeaderMerge(Y, &B)); 1816 PetscCall(PetscFree(nnz_d)); 1817 PetscCall(PetscFree(nnz_o)); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 1822 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) { 1823 PetscFunctionBegin; 1824 if (PetscDefined(USE_COMPLEX)) { 1825 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1826 1827 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1828 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 1833 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) { 1834 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1835 1836 PetscFunctionBegin; 1837 PetscCall(MatRealPart(a->A)); 1838 PetscCall(MatRealPart(a->B)); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) { 1843 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1844 1845 PetscFunctionBegin; 1846 PetscCall(MatImaginaryPart(a->A)); 1847 PetscCall(MatImaginaryPart(a->B)); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 1852 IS iscol_local; 1853 PetscInt csize; 1854 1855 PetscFunctionBegin; 1856 PetscCall(ISGetLocalSize(iscol, &csize)); 1857 if (call == MAT_REUSE_MATRIX) { 1858 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1859 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1860 } else { 1861 PetscCall(ISAllGather(iscol, &iscol_local)); 1862 } 1863 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1864 if (call == MAT_INITIAL_MATRIX) { 1865 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1866 PetscCall(ISDestroy(&iscol_local)); 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 /* 1872 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1873 in local and then by concatenating the local matrices the end result. 1874 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1875 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1876 */ 1877 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 1878 PetscMPIInt rank, size; 1879 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1880 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1881 Mat M, Mreuse; 1882 MatScalar *vwork, *aa; 1883 MPI_Comm comm; 1884 IS isrow_new, iscol_new; 1885 Mat_SeqBAIJ *aij; 1886 1887 PetscFunctionBegin; 1888 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1889 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1890 PetscCallMPI(MPI_Comm_size(comm, &size)); 1891 /* The compression and expansion should be avoided. Doesn't point 1892 out errors, might change the indices, hence buggey */ 1893 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1894 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1895 1896 if (call == MAT_REUSE_MATRIX) { 1897 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1898 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1899 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1900 } else { 1901 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1902 } 1903 PetscCall(ISDestroy(&isrow_new)); 1904 PetscCall(ISDestroy(&iscol_new)); 1905 /* 1906 m - number of local rows 1907 n - number of columns (same on all processors) 1908 rstart - first row in new global matrix generated 1909 */ 1910 PetscCall(MatGetBlockSize(mat, &bs)); 1911 PetscCall(MatGetSize(Mreuse, &m, &n)); 1912 m = m / bs; 1913 n = n / bs; 1914 1915 if (call == MAT_INITIAL_MATRIX) { 1916 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1917 ii = aij->i; 1918 jj = aij->j; 1919 1920 /* 1921 Determine the number of non-zeros in the diagonal and off-diagonal 1922 portions of the matrix in order to do correct preallocation 1923 */ 1924 1925 /* first get start and end of "diagonal" columns */ 1926 if (csize == PETSC_DECIDE) { 1927 PetscCall(ISGetSize(isrow, &mglobal)); 1928 if (mglobal == n * bs) { /* square matrix */ 1929 nlocal = m; 1930 } else { 1931 nlocal = n / size + ((n % size) > rank); 1932 } 1933 } else { 1934 nlocal = csize / bs; 1935 } 1936 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1937 rstart = rend - nlocal; 1938 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1939 1940 /* next, compute all the lengths */ 1941 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1942 for (i = 0; i < m; i++) { 1943 jend = ii[i + 1] - ii[i]; 1944 olen = 0; 1945 dlen = 0; 1946 for (j = 0; j < jend; j++) { 1947 if (*jj < rstart || *jj >= rend) olen++; 1948 else dlen++; 1949 jj++; 1950 } 1951 olens[i] = olen; 1952 dlens[i] = dlen; 1953 } 1954 PetscCall(MatCreate(comm, &M)); 1955 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1956 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 1957 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1958 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1959 PetscCall(PetscFree2(dlens, olens)); 1960 } else { 1961 PetscInt ml, nl; 1962 1963 M = *newmat; 1964 PetscCall(MatGetLocalSize(M, &ml, &nl)); 1965 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 1966 PetscCall(MatZeroEntries(M)); 1967 /* 1968 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1969 rather than the slower MatSetValues(). 1970 */ 1971 M->was_assembled = PETSC_TRUE; 1972 M->assembled = PETSC_FALSE; 1973 } 1974 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 1975 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 1976 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1977 ii = aij->i; 1978 jj = aij->j; 1979 aa = aij->a; 1980 for (i = 0; i < m; i++) { 1981 row = rstart / bs + i; 1982 nz = ii[i + 1] - ii[i]; 1983 cwork = jj; 1984 jj += nz; 1985 vwork = aa; 1986 aa += nz * bs * bs; 1987 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 1988 } 1989 1990 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 1991 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 1992 *newmat = M; 1993 1994 /* save submatrix used in processor for next request */ 1995 if (call == MAT_INITIAL_MATRIX) { 1996 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 1997 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) { 2003 MPI_Comm comm, pcomm; 2004 PetscInt clocal_size, nrows; 2005 const PetscInt *rows; 2006 PetscMPIInt size; 2007 IS crowp, lcolp; 2008 2009 PetscFunctionBegin; 2010 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2011 /* make a collective version of 'rowp' */ 2012 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2013 if (pcomm == comm) { 2014 crowp = rowp; 2015 } else { 2016 PetscCall(ISGetSize(rowp, &nrows)); 2017 PetscCall(ISGetIndices(rowp, &rows)); 2018 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2019 PetscCall(ISRestoreIndices(rowp, &rows)); 2020 } 2021 PetscCall(ISSetPermutation(crowp)); 2022 /* make a local version of 'colp' */ 2023 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2024 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2025 if (size == 1) { 2026 lcolp = colp; 2027 } else { 2028 PetscCall(ISAllGather(colp, &lcolp)); 2029 } 2030 PetscCall(ISSetPermutation(lcolp)); 2031 /* now we just get the submatrix */ 2032 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2033 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2034 /* clean up */ 2035 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2036 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 2041 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2042 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2043 2044 PetscFunctionBegin; 2045 if (nghosts) *nghosts = B->nbs; 2046 if (ghosts) *ghosts = baij->garray; 2047 PetscFunctionReturn(0); 2048 } 2049 2050 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) { 2051 Mat B; 2052 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2053 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2054 Mat_SeqAIJ *b; 2055 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2056 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2057 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2058 2059 PetscFunctionBegin; 2060 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2061 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2062 2063 /* ---------------------------------------------------------------- 2064 Tell every processor the number of nonzeros per row 2065 */ 2066 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2067 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2068 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2069 displs = recvcounts + size; 2070 for (i = 0; i < size; i++) { 2071 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2072 displs[i] = A->rmap->range[i] / bs; 2073 } 2074 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2075 /* --------------------------------------------------------------- 2076 Create the sequential matrix of the same type as the local block diagonal 2077 */ 2078 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2079 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2080 PetscCall(MatSetType(B, MATSEQAIJ)); 2081 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2082 b = (Mat_SeqAIJ *)B->data; 2083 2084 /*-------------------------------------------------------------------- 2085 Copy my part of matrix column indices over 2086 */ 2087 sendcount = ad->nz + bd->nz; 2088 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2089 a_jsendbuf = ad->j; 2090 b_jsendbuf = bd->j; 2091 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2092 cnt = 0; 2093 for (i = 0; i < n; i++) { 2094 /* put in lower diagonal portion */ 2095 m = bd->i[i + 1] - bd->i[i]; 2096 while (m > 0) { 2097 /* is it above diagonal (in bd (compressed) numbering) */ 2098 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2099 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2100 m--; 2101 } 2102 2103 /* put in diagonal portion */ 2104 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2105 2106 /* put in upper diagonal portion */ 2107 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2108 } 2109 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2110 2111 /*-------------------------------------------------------------------- 2112 Gather all column indices to all processors 2113 */ 2114 for (i = 0; i < size; i++) { 2115 recvcounts[i] = 0; 2116 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2117 } 2118 displs[0] = 0; 2119 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2120 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121 /*-------------------------------------------------------------------- 2122 Assemble the matrix into useable form (note numerical values not yet set) 2123 */ 2124 /* set the b->ilen (length of each row) values */ 2125 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2126 /* set the b->i indices */ 2127 b->i[0] = 0; 2128 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2129 PetscCall(PetscFree(lens)); 2130 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2131 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2132 PetscCall(PetscFree(recvcounts)); 2133 2134 PetscCall(MatPropagateSymmetryOptions(A, B)); 2135 *newmat = B; 2136 PetscFunctionReturn(0); 2137 } 2138 2139 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2140 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2141 Vec bb1 = NULL; 2142 2143 PetscFunctionBegin; 2144 if (flag == SOR_APPLY_UPPER) { 2145 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2150 2151 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2152 if (flag & SOR_ZERO_INITIAL_GUESS) { 2153 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2154 its--; 2155 } 2156 2157 while (its--) { 2158 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2159 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2160 2161 /* update rhs: bb1 = bb - B*x */ 2162 PetscCall(VecScale(mat->lvec, -1.0)); 2163 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2164 2165 /* local sweep */ 2166 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2167 } 2168 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2169 if (flag & SOR_ZERO_INITIAL_GUESS) { 2170 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2171 its--; 2172 } 2173 while (its--) { 2174 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2175 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2176 2177 /* update rhs: bb1 = bb - B*x */ 2178 PetscCall(VecScale(mat->lvec, -1.0)); 2179 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2180 2181 /* local sweep */ 2182 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2183 } 2184 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2185 if (flag & SOR_ZERO_INITIAL_GUESS) { 2186 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2187 its--; 2188 } 2189 while (its--) { 2190 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2191 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2192 2193 /* update rhs: bb1 = bb - B*x */ 2194 PetscCall(VecScale(mat->lvec, -1.0)); 2195 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2196 2197 /* local sweep */ 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2199 } 2200 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2201 2202 PetscCall(VecDestroy(&bb1)); 2203 PetscFunctionReturn(0); 2204 } 2205 2206 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 2207 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2208 PetscInt m, N, i, *garray = aij->garray; 2209 PetscInt ib, jb, bs = A->rmap->bs; 2210 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2211 MatScalar *a_val = a_aij->a; 2212 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2213 MatScalar *b_val = b_aij->a; 2214 PetscReal *work; 2215 2216 PetscFunctionBegin; 2217 PetscCall(MatGetSize(A, &m, &N)); 2218 PetscCall(PetscCalloc1(N, &work)); 2219 if (type == NORM_2) { 2220 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2221 for (jb = 0; jb < bs; jb++) { 2222 for (ib = 0; ib < bs; ib++) { 2223 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2224 a_val++; 2225 } 2226 } 2227 } 2228 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2229 for (jb = 0; jb < bs; jb++) { 2230 for (ib = 0; ib < bs; ib++) { 2231 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2232 b_val++; 2233 } 2234 } 2235 } 2236 } else if (type == NORM_1) { 2237 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2238 for (jb = 0; jb < bs; jb++) { 2239 for (ib = 0; ib < bs; ib++) { 2240 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2241 a_val++; 2242 } 2243 } 2244 } 2245 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2246 for (jb = 0; jb < bs; jb++) { 2247 for (ib = 0; ib < bs; ib++) { 2248 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2249 b_val++; 2250 } 2251 } 2252 } 2253 } else if (type == NORM_INFINITY) { 2254 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2255 for (jb = 0; jb < bs; jb++) { 2256 for (ib = 0; ib < bs; ib++) { 2257 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2258 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2259 a_val++; 2260 } 2261 } 2262 } 2263 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2264 for (jb = 0; jb < bs; jb++) { 2265 for (ib = 0; ib < bs; ib++) { 2266 int col = garray[b_aij->j[i]] * bs + jb; 2267 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2268 b_val++; 2269 } 2270 } 2271 } 2272 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2273 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2277 a_val++; 2278 } 2279 } 2280 } 2281 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2282 for (jb = 0; jb < bs; jb++) { 2283 for (ib = 0; ib < bs; ib++) { 2284 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2285 b_val++; 2286 } 2287 } 2288 } 2289 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2290 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2294 a_val++; 2295 } 2296 } 2297 } 2298 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2299 for (jb = 0; jb < bs; jb++) { 2300 for (ib = 0; ib < bs; ib++) { 2301 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2302 b_val++; 2303 } 2304 } 2305 } 2306 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2307 if (type == NORM_INFINITY) { 2308 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2309 } else { 2310 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2311 } 2312 PetscCall(PetscFree(work)); 2313 if (type == NORM_2) { 2314 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2315 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2316 for (i = 0; i < N; i++) reductions[i] /= m; 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) { 2322 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2323 2324 PetscFunctionBegin; 2325 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2326 A->factorerrortype = a->A->factorerrortype; 2327 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2328 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2329 PetscFunctionReturn(0); 2330 } 2331 2332 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) { 2333 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2334 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2335 2336 PetscFunctionBegin; 2337 if (!Y->preallocated) { 2338 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2339 } else if (!aij->nz) { 2340 PetscInt nonew = aij->nonew; 2341 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2342 aij->nonew = nonew; 2343 } 2344 PetscCall(MatShift_Basic(Y, a)); 2345 PetscFunctionReturn(0); 2346 } 2347 2348 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 2349 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2350 2351 PetscFunctionBegin; 2352 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2353 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2354 if (d) { 2355 PetscInt rstart; 2356 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2357 *d += rstart / A->rmap->bs; 2358 } 2359 PetscFunctionReturn(0); 2360 } 2361 2362 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) { 2363 PetscFunctionBegin; 2364 *a = ((Mat_MPIBAIJ *)A->data)->A; 2365 PetscFunctionReturn(0); 2366 } 2367 2368 /* -------------------------------------------------------------------*/ 2369 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2370 MatGetRow_MPIBAIJ, 2371 MatRestoreRow_MPIBAIJ, 2372 MatMult_MPIBAIJ, 2373 /* 4*/ MatMultAdd_MPIBAIJ, 2374 MatMultTranspose_MPIBAIJ, 2375 MatMultTransposeAdd_MPIBAIJ, 2376 NULL, 2377 NULL, 2378 NULL, 2379 /*10*/ NULL, 2380 NULL, 2381 NULL, 2382 MatSOR_MPIBAIJ, 2383 MatTranspose_MPIBAIJ, 2384 /*15*/ MatGetInfo_MPIBAIJ, 2385 MatEqual_MPIBAIJ, 2386 MatGetDiagonal_MPIBAIJ, 2387 MatDiagonalScale_MPIBAIJ, 2388 MatNorm_MPIBAIJ, 2389 /*20*/ MatAssemblyBegin_MPIBAIJ, 2390 MatAssemblyEnd_MPIBAIJ, 2391 MatSetOption_MPIBAIJ, 2392 MatZeroEntries_MPIBAIJ, 2393 /*24*/ MatZeroRows_MPIBAIJ, 2394 NULL, 2395 NULL, 2396 NULL, 2397 NULL, 2398 /*29*/ MatSetUp_MPIBAIJ, 2399 NULL, 2400 NULL, 2401 MatGetDiagonalBlock_MPIBAIJ, 2402 NULL, 2403 /*34*/ MatDuplicate_MPIBAIJ, 2404 NULL, 2405 NULL, 2406 NULL, 2407 NULL, 2408 /*39*/ MatAXPY_MPIBAIJ, 2409 MatCreateSubMatrices_MPIBAIJ, 2410 MatIncreaseOverlap_MPIBAIJ, 2411 MatGetValues_MPIBAIJ, 2412 MatCopy_MPIBAIJ, 2413 /*44*/ NULL, 2414 MatScale_MPIBAIJ, 2415 MatShift_MPIBAIJ, 2416 NULL, 2417 MatZeroRowsColumns_MPIBAIJ, 2418 /*49*/ NULL, 2419 NULL, 2420 NULL, 2421 NULL, 2422 NULL, 2423 /*54*/ MatFDColoringCreate_MPIXAIJ, 2424 NULL, 2425 MatSetUnfactored_MPIBAIJ, 2426 MatPermute_MPIBAIJ, 2427 MatSetValuesBlocked_MPIBAIJ, 2428 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2429 MatDestroy_MPIBAIJ, 2430 MatView_MPIBAIJ, 2431 NULL, 2432 NULL, 2433 /*64*/ NULL, 2434 NULL, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2439 NULL, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*74*/ NULL, 2444 MatFDColoringApply_BAIJ, 2445 NULL, 2446 NULL, 2447 NULL, 2448 /*79*/ NULL, 2449 NULL, 2450 NULL, 2451 NULL, 2452 MatLoad_MPIBAIJ, 2453 /*84*/ NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 NULL, 2458 /*89*/ NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 NULL, 2463 /*94*/ NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 NULL, 2468 /*99*/ NULL, 2469 NULL, 2470 NULL, 2471 MatConjugate_MPIBAIJ, 2472 NULL, 2473 /*104*/ NULL, 2474 MatRealPart_MPIBAIJ, 2475 MatImaginaryPart_MPIBAIJ, 2476 NULL, 2477 NULL, 2478 /*109*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 MatMissingDiagonal_MPIBAIJ, 2483 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2484 NULL, 2485 MatGetGhosts_MPIBAIJ, 2486 NULL, 2487 NULL, 2488 /*119*/ NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 MatGetMultiProcBlock_MPIBAIJ, 2493 /*124*/ NULL, 2494 MatGetColumnReductions_MPIBAIJ, 2495 MatInvertBlockDiagonal_MPIBAIJ, 2496 NULL, 2497 NULL, 2498 /*129*/ NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*134*/ NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 /*139*/ MatSetBlockSizes_Default, 2509 NULL, 2510 NULL, 2511 MatFDColoringSetUp_MPIXAIJ, 2512 NULL, 2513 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2514 NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 /*150*/ NULL}; 2520 2521 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2522 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2523 2524 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 2525 PetscInt m, rstart, cstart, cend; 2526 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2527 const PetscInt *JJ = NULL; 2528 PetscScalar *values = NULL; 2529 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2530 PetscBool nooffprocentries; 2531 2532 PetscFunctionBegin; 2533 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2534 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2535 PetscCall(PetscLayoutSetUp(B->rmap)); 2536 PetscCall(PetscLayoutSetUp(B->cmap)); 2537 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2538 m = B->rmap->n / bs; 2539 rstart = B->rmap->rstart / bs; 2540 cstart = B->cmap->rstart / bs; 2541 cend = B->cmap->rend / bs; 2542 2543 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2544 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2545 for (i = 0; i < m; i++) { 2546 nz = ii[i + 1] - ii[i]; 2547 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2548 nz_max = PetscMax(nz_max, nz); 2549 dlen = 0; 2550 olen = 0; 2551 JJ = jj + ii[i]; 2552 for (j = 0; j < nz; j++) { 2553 if (*JJ < cstart || *JJ >= cend) olen++; 2554 else dlen++; 2555 JJ++; 2556 } 2557 d_nnz[i] = dlen; 2558 o_nnz[i] = olen; 2559 } 2560 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2561 PetscCall(PetscFree2(d_nnz, o_nnz)); 2562 2563 values = (PetscScalar *)V; 2564 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2565 for (i = 0; i < m; i++) { 2566 PetscInt row = i + rstart; 2567 PetscInt ncols = ii[i + 1] - ii[i]; 2568 const PetscInt *icols = jj + ii[i]; 2569 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2570 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2571 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2572 } else { /* block ordering does not match so we can only insert one block at a time. */ 2573 PetscInt j; 2574 for (j = 0; j < ncols; j++) { 2575 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2576 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2577 } 2578 } 2579 } 2580 2581 if (!V) PetscCall(PetscFree(values)); 2582 nooffprocentries = B->nooffprocentries; 2583 B->nooffprocentries = PETSC_TRUE; 2584 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2585 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2586 B->nooffprocentries = nooffprocentries; 2587 2588 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 /*@C 2593 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2594 2595 Collective 2596 2597 Input Parameters: 2598 + B - the matrix 2599 . bs - the block size 2600 . i - the indices into j for the start of each local row (starts with zero) 2601 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2602 - v - optional values in the matrix 2603 2604 Level: advanced 2605 2606 Notes: 2607 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2608 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2609 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2610 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2611 block column and the second index is over columns within a block. 2612 2613 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2614 2615 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2616 @*/ 2617 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 2618 PetscFunctionBegin; 2619 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2620 PetscValidType(B, 1); 2621 PetscValidLogicalCollectiveInt(B, bs, 2); 2622 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2623 PetscFunctionReturn(0); 2624 } 2625 2626 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 2627 Mat_MPIBAIJ *b; 2628 PetscInt i; 2629 PetscMPIInt size; 2630 2631 PetscFunctionBegin; 2632 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2633 PetscCall(PetscLayoutSetUp(B->rmap)); 2634 PetscCall(PetscLayoutSetUp(B->cmap)); 2635 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2636 2637 if (d_nnz) { 2638 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2639 } 2640 if (o_nnz) { 2641 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2642 } 2643 2644 b = (Mat_MPIBAIJ *)B->data; 2645 b->bs2 = bs * bs; 2646 b->mbs = B->rmap->n / bs; 2647 b->nbs = B->cmap->n / bs; 2648 b->Mbs = B->rmap->N / bs; 2649 b->Nbs = B->cmap->N / bs; 2650 2651 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2652 b->rstartbs = B->rmap->rstart / bs; 2653 b->rendbs = B->rmap->rend / bs; 2654 b->cstartbs = B->cmap->rstart / bs; 2655 b->cendbs = B->cmap->rend / bs; 2656 2657 #if defined(PETSC_USE_CTABLE) 2658 PetscCall(PetscTableDestroy(&b->colmap)); 2659 #else 2660 PetscCall(PetscFree(b->colmap)); 2661 #endif 2662 PetscCall(PetscFree(b->garray)); 2663 PetscCall(VecDestroy(&b->lvec)); 2664 PetscCall(VecScatterDestroy(&b->Mvctx)); 2665 2666 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2667 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2668 PetscCall(MatDestroy(&b->B)); 2669 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2670 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2671 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2672 2673 if (!B->preallocated) { 2674 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2675 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2676 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2677 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2678 } 2679 2680 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2681 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2682 B->preallocated = PETSC_TRUE; 2683 B->was_assembled = PETSC_FALSE; 2684 B->assembled = PETSC_FALSE; 2685 PetscFunctionReturn(0); 2686 } 2687 2688 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2689 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2690 2691 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) { 2692 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2693 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2694 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2695 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2696 2697 PetscFunctionBegin; 2698 PetscCall(PetscMalloc1(M + 1, &ii)); 2699 ii[0] = 0; 2700 for (i = 0; i < M; i++) { 2701 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2702 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2703 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2704 /* remove one from count of matrix has diagonal */ 2705 for (j = id[i]; j < id[i + 1]; j++) { 2706 if (jd[j] == i) { 2707 ii[i + 1]--; 2708 break; 2709 } 2710 } 2711 } 2712 PetscCall(PetscMalloc1(ii[M], &jj)); 2713 cnt = 0; 2714 for (i = 0; i < M; i++) { 2715 for (j = io[i]; j < io[i + 1]; j++) { 2716 if (garray[jo[j]] > rstart) break; 2717 jj[cnt++] = garray[jo[j]]; 2718 } 2719 for (k = id[i]; k < id[i + 1]; k++) { 2720 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2721 } 2722 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2723 } 2724 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2729 2730 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2731 2732 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 2733 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2734 Mat_MPIAIJ *b; 2735 Mat B; 2736 2737 PetscFunctionBegin; 2738 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2739 2740 if (reuse == MAT_REUSE_MATRIX) { 2741 B = *newmat; 2742 } else { 2743 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2744 PetscCall(MatSetType(B, MATMPIAIJ)); 2745 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2746 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2747 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2748 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2749 } 2750 b = (Mat_MPIAIJ *)B->data; 2751 2752 if (reuse == MAT_REUSE_MATRIX) { 2753 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2754 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2755 } else { 2756 PetscCall(MatDestroy(&b->A)); 2757 PetscCall(MatDestroy(&b->B)); 2758 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2759 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2760 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2761 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2762 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2763 } 2764 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2765 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2766 2767 if (reuse == MAT_INPLACE_MATRIX) { 2768 PetscCall(MatHeaderReplace(A, &B)); 2769 } else { 2770 *newmat = B; 2771 } 2772 PetscFunctionReturn(0); 2773 } 2774 2775 /*MC 2776 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2777 2778 Options Database Keys: 2779 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2780 . -mat_block_size <bs> - set the blocksize used to store the matrix 2781 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2782 - -mat_use_hash_table <fact> - set hash table factor 2783 2784 Level: beginner 2785 2786 Note: 2787 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2788 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2789 2790 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2791 M*/ 2792 2793 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2794 2795 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) { 2796 Mat_MPIBAIJ *b; 2797 PetscBool flg = PETSC_FALSE; 2798 2799 PetscFunctionBegin; 2800 PetscCall(PetscNew(&b)); 2801 B->data = (void *)b; 2802 2803 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2804 B->assembled = PETSC_FALSE; 2805 2806 B->insertmode = NOT_SET_VALUES; 2807 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2808 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2809 2810 /* build local table of row and column ownerships */ 2811 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2812 2813 /* build cache for off array entries formed */ 2814 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2815 2816 b->donotstash = PETSC_FALSE; 2817 b->colmap = NULL; 2818 b->garray = NULL; 2819 b->roworiented = PETSC_TRUE; 2820 2821 /* stuff used in block assembly */ 2822 b->barray = NULL; 2823 2824 /* stuff used for matrix vector multiply */ 2825 b->lvec = NULL; 2826 b->Mvctx = NULL; 2827 2828 /* stuff for MatGetRow() */ 2829 b->rowindices = NULL; 2830 b->rowvalues = NULL; 2831 b->getrowactive = PETSC_FALSE; 2832 2833 /* hash table stuff */ 2834 b->ht = NULL; 2835 b->hd = NULL; 2836 b->ht_size = 0; 2837 b->ht_flag = PETSC_FALSE; 2838 b->ht_fact = 0; 2839 b->ht_total_ct = 0; 2840 b->ht_insert_ct = 0; 2841 2842 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2843 b->ijonly = PETSC_FALSE; 2844 2845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2848 #if defined(PETSC_HAVE_HYPRE) 2849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2850 #endif 2851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2858 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2859 2860 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2861 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2862 if (flg) { 2863 PetscReal fact = 1.39; 2864 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2865 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2866 if (fact <= 1.0) fact = 1.39; 2867 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2868 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2869 } 2870 PetscOptionsEnd(); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 /*MC 2875 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2876 2877 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2878 and `MATMPIBAIJ` otherwise. 2879 2880 Options Database Keys: 2881 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2882 2883 Level: beginner 2884 2885 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2886 M*/ 2887 2888 /*@C 2889 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2890 (block compressed row). For good matrix assembly performance 2891 the user should preallocate the matrix storage by setting the parameters 2892 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2893 performance can be increased by more than a factor of 50. 2894 2895 Collective on Mat 2896 2897 Input Parameters: 2898 + B - the matrix 2899 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2900 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2901 . d_nz - number of block nonzeros per block row in diagonal portion of local 2902 submatrix (same for all local rows) 2903 . d_nnz - array containing the number of block nonzeros in the various block rows 2904 of the in diagonal portion of the local (possibly different for each block 2905 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2906 set it even if it is zero. 2907 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2908 submatrix (same for all local rows). 2909 - o_nnz - array containing the number of nonzeros in the various block rows of the 2910 off-diagonal portion of the local submatrix (possibly different for 2911 each block row) or NULL. 2912 2913 If the *_nnz parameter is given then the *_nz parameter is ignored 2914 2915 Options Database Keys: 2916 + -mat_block_size - size of the blocks to use 2917 - -mat_use_hash_table <fact> - set hash table factor 2918 2919 Notes: 2920 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2921 than it must be used on all processors that share the object for that argument. 2922 2923 Storage Information: 2924 For a square global matrix we define each processor's diagonal portion 2925 to be its local rows and the corresponding columns (a square submatrix); 2926 each processor's off-diagonal portion encompasses the remainder of the 2927 local matrix (a rectangular submatrix). 2928 2929 The user can specify preallocated storage for the diagonal part of 2930 the local submatrix with either d_nz or d_nnz (not both). Set 2931 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2932 memory allocation. Likewise, specify preallocated storage for the 2933 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2934 2935 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2936 the figure below we depict these three local rows and all columns (0-11). 2937 2938 .vb 2939 0 1 2 3 4 5 6 7 8 9 10 11 2940 -------------------------- 2941 row 3 |o o o d d d o o o o o o 2942 row 4 |o o o d d d o o o o o o 2943 row 5 |o o o d d d o o o o o o 2944 -------------------------- 2945 .ve 2946 2947 Thus, any entries in the d locations are stored in the d (diagonal) 2948 submatrix, and any entries in the o locations are stored in the 2949 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2950 stored simply in the `MATSEQBAIJ` format for compressed row storage. 2951 2952 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2953 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2954 In general, for PDE problems in which most nonzeros are near the diagonal, 2955 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2956 or you will get TERRIBLE performance; see the users' manual chapter on 2957 matrices. 2958 2959 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2960 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2961 You can also run with the option -info and look for messages with the string 2962 malloc in them to see if additional memory allocation was needed. 2963 2964 Level: intermediate 2965 2966 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 2967 @*/ 2968 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2969 PetscFunctionBegin; 2970 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2971 PetscValidType(B, 1); 2972 PetscValidLogicalCollectiveInt(B, bs, 2); 2973 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2974 PetscFunctionReturn(0); 2975 } 2976 2977 /*@C 2978 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 2979 (block compressed row). For good matrix assembly performance 2980 the user should preallocate the matrix storage by setting the parameters 2981 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2982 performance can be increased by more than a factor of 50. 2983 2984 Collective 2985 2986 Input Parameters: 2987 + comm - MPI communicator 2988 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2989 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2990 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2991 This value should be the same as the local size used in creating the 2992 y vector for the matrix-vector product y = Ax. 2993 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2994 This value should be the same as the local size used in creating the 2995 x vector for the matrix-vector product y = Ax. 2996 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2997 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2998 . d_nz - number of nonzero blocks per block row in diagonal portion of local 2999 submatrix (same for all local rows) 3000 . d_nnz - array containing the number of nonzero blocks in the various block rows 3001 of the in diagonal portion of the local (possibly different for each block 3002 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3003 and set it even if it is zero. 3004 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3005 submatrix (same for all local rows). 3006 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3007 off-diagonal portion of the local submatrix (possibly different for 3008 each block row) or NULL. 3009 3010 Output Parameter: 3011 . A - the matrix 3012 3013 Options Database Keys: 3014 + -mat_block_size - size of the blocks to use 3015 - -mat_use_hash_table <fact> - set hash table factor 3016 3017 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3018 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3019 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3020 3021 Notes: 3022 If the *_nnz parameter is given then the *_nz parameter is ignored 3023 3024 A nonzero block is any block that as 1 or more nonzeros in it 3025 3026 The user MUST specify either the local or global matrix dimensions 3027 (possibly both). 3028 3029 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3030 than it must be used on all processors that share the object for that argument. 3031 3032 Storage Information: 3033 For a square global matrix we define each processor's diagonal portion 3034 to be its local rows and the corresponding columns (a square submatrix); 3035 each processor's off-diagonal portion encompasses the remainder of the 3036 local matrix (a rectangular submatrix). 3037 3038 The user can specify preallocated storage for the diagonal part of 3039 the local submatrix with either d_nz or d_nnz (not both). Set 3040 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3041 memory allocation. Likewise, specify preallocated storage for the 3042 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3043 3044 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3045 the figure below we depict these three local rows and all columns (0-11). 3046 3047 .vb 3048 0 1 2 3 4 5 6 7 8 9 10 11 3049 -------------------------- 3050 row 3 |o o o d d d o o o o o o 3051 row 4 |o o o d d d o o o o o o 3052 row 5 |o o o d d d o o o o o o 3053 -------------------------- 3054 .ve 3055 3056 Thus, any entries in the d locations are stored in the d (diagonal) 3057 submatrix, and any entries in the o locations are stored in the 3058 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3059 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3060 3061 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3062 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3063 In general, for PDE problems in which most nonzeros are near the diagonal, 3064 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3065 or you will get TERRIBLE performance; see the users' manual chapter on 3066 matrices. 3067 3068 Level: intermediate 3069 3070 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3071 @*/ 3072 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 3073 PetscMPIInt size; 3074 3075 PetscFunctionBegin; 3076 PetscCall(MatCreate(comm, A)); 3077 PetscCall(MatSetSizes(*A, m, n, M, N)); 3078 PetscCallMPI(MPI_Comm_size(comm, &size)); 3079 if (size > 1) { 3080 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3081 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3082 } else { 3083 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3084 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3085 } 3086 PetscFunctionReturn(0); 3087 } 3088 3089 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 3090 Mat mat; 3091 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3092 PetscInt len = 0; 3093 3094 PetscFunctionBegin; 3095 *newmat = NULL; 3096 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3097 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3098 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3099 3100 mat->factortype = matin->factortype; 3101 mat->preallocated = PETSC_TRUE; 3102 mat->assembled = PETSC_TRUE; 3103 mat->insertmode = NOT_SET_VALUES; 3104 3105 a = (Mat_MPIBAIJ *)mat->data; 3106 mat->rmap->bs = matin->rmap->bs; 3107 a->bs2 = oldmat->bs2; 3108 a->mbs = oldmat->mbs; 3109 a->nbs = oldmat->nbs; 3110 a->Mbs = oldmat->Mbs; 3111 a->Nbs = oldmat->Nbs; 3112 3113 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3114 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3115 3116 a->size = oldmat->size; 3117 a->rank = oldmat->rank; 3118 a->donotstash = oldmat->donotstash; 3119 a->roworiented = oldmat->roworiented; 3120 a->rowindices = NULL; 3121 a->rowvalues = NULL; 3122 a->getrowactive = PETSC_FALSE; 3123 a->barray = NULL; 3124 a->rstartbs = oldmat->rstartbs; 3125 a->rendbs = oldmat->rendbs; 3126 a->cstartbs = oldmat->cstartbs; 3127 a->cendbs = oldmat->cendbs; 3128 3129 /* hash table stuff */ 3130 a->ht = NULL; 3131 a->hd = NULL; 3132 a->ht_size = 0; 3133 a->ht_flag = oldmat->ht_flag; 3134 a->ht_fact = oldmat->ht_fact; 3135 a->ht_total_ct = 0; 3136 a->ht_insert_ct = 0; 3137 3138 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3139 if (oldmat->colmap) { 3140 #if defined(PETSC_USE_CTABLE) 3141 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 3142 #else 3143 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3144 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3145 #endif 3146 } else a->colmap = NULL; 3147 3148 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3149 PetscCall(PetscMalloc1(len, &a->garray)); 3150 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3151 } else a->garray = NULL; 3152 3153 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3154 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3155 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3156 3157 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3158 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3159 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3160 *newmat = mat; 3161 PetscFunctionReturn(0); 3162 } 3163 3164 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3165 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 3166 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3167 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3168 PetscScalar *matvals; 3169 3170 PetscFunctionBegin; 3171 PetscCall(PetscViewerSetUp(viewer)); 3172 3173 /* read in matrix header */ 3174 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3175 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3176 M = header[1]; 3177 N = header[2]; 3178 nz = header[3]; 3179 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3180 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3181 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3182 3183 /* set block sizes from the viewer's .info file */ 3184 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3185 /* set local sizes if not set already */ 3186 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3187 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3188 /* set global sizes if not set already */ 3189 if (mat->rmap->N < 0) mat->rmap->N = M; 3190 if (mat->cmap->N < 0) mat->cmap->N = N; 3191 PetscCall(PetscLayoutSetUp(mat->rmap)); 3192 PetscCall(PetscLayoutSetUp(mat->cmap)); 3193 3194 /* check if the matrix sizes are correct */ 3195 PetscCall(MatGetSize(mat, &rows, &cols)); 3196 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3197 PetscCall(MatGetBlockSize(mat, &bs)); 3198 PetscCall(MatGetLocalSize(mat, &m, &n)); 3199 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3200 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3201 mbs = m / bs; 3202 nbs = n / bs; 3203 3204 /* read in row lengths and build row indices */ 3205 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3206 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3207 rowidxs[0] = 0; 3208 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3209 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3210 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3211 3212 /* read in column indices and matrix values */ 3213 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3214 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3215 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3216 3217 { /* preallocate matrix storage */ 3218 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3219 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3220 PetscBool sbaij, done; 3221 PetscInt *d_nnz, *o_nnz; 3222 3223 PetscCall(PetscBTCreate(nbs, &bt)); 3224 PetscCall(PetscHSetICreate(&ht)); 3225 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3226 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3227 for (i = 0; i < mbs; i++) { 3228 PetscCall(PetscBTMemzero(nbs, bt)); 3229 PetscCall(PetscHSetIClear(ht)); 3230 for (k = 0; k < bs; k++) { 3231 PetscInt row = bs * i + k; 3232 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3233 PetscInt col = colidxs[j]; 3234 if (!sbaij || col >= row) { 3235 if (col >= cs && col < ce) { 3236 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3237 } else { 3238 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3239 if (done) o_nnz[i]++; 3240 } 3241 } 3242 } 3243 } 3244 } 3245 PetscCall(PetscBTDestroy(&bt)); 3246 PetscCall(PetscHSetIDestroy(&ht)); 3247 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3248 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3249 PetscCall(PetscFree2(d_nnz, o_nnz)); 3250 } 3251 3252 /* store matrix values */ 3253 for (i = 0; i < m; i++) { 3254 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3255 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3256 } 3257 3258 PetscCall(PetscFree(rowidxs)); 3259 PetscCall(PetscFree2(colidxs, matvals)); 3260 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3261 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3262 PetscFunctionReturn(0); 3263 } 3264 3265 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) { 3266 PetscBool isbinary; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3270 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3271 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3272 PetscFunctionReturn(0); 3273 } 3274 3275 /*@ 3276 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3277 3278 Input Parameters: 3279 + mat - the matrix 3280 - fact - factor 3281 3282 Options Database Key: 3283 . -mat_use_hash_table <fact> - provide the factor 3284 3285 Level: advanced 3286 3287 .seealso: `MATMPIBAIJ`, `MatSetOption()` 3288 @*/ 3289 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) { 3290 PetscFunctionBegin; 3291 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) { 3296 Mat_MPIBAIJ *baij; 3297 3298 PetscFunctionBegin; 3299 baij = (Mat_MPIBAIJ *)mat->data; 3300 baij->ht_fact = fact; 3301 PetscFunctionReturn(0); 3302 } 3303 3304 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 3305 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3306 PetscBool flg; 3307 3308 PetscFunctionBegin; 3309 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3310 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3311 if (Ad) *Ad = a->A; 3312 if (Ao) *Ao = a->B; 3313 if (colmap) *colmap = a->garray; 3314 PetscFunctionReturn(0); 3315 } 3316 3317 /* 3318 Special version for direct calls from Fortran (to eliminate two function call overheads 3319 */ 3320 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3321 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3322 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3323 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3324 #endif 3325 3326 /*@C 3327 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3328 3329 Collective on Mat 3330 3331 Input Parameters: 3332 + mat - the matrix 3333 . min - number of input rows 3334 . im - input rows 3335 . nin - number of input columns 3336 . in - input columns 3337 . v - numerical values input 3338 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3339 3340 Developer Note: 3341 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3342 3343 Level: advanced 3344 3345 .seealso: `MatSetValuesBlocked()` 3346 @*/ 3347 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) { 3348 /* convert input arguments to C version */ 3349 Mat mat = *matin; 3350 PetscInt m = *min, n = *nin; 3351 InsertMode addv = *addvin; 3352 3353 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3354 const MatScalar *value; 3355 MatScalar *barray = baij->barray; 3356 PetscBool roworiented = baij->roworiented; 3357 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3358 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3359 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3360 3361 PetscFunctionBegin; 3362 /* tasks normally handled by MatSetValuesBlocked() */ 3363 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3364 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3365 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3366 if (mat->assembled) { 3367 mat->was_assembled = PETSC_TRUE; 3368 mat->assembled = PETSC_FALSE; 3369 } 3370 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3371 3372 if (!barray) { 3373 PetscCall(PetscMalloc1(bs2, &barray)); 3374 baij->barray = barray; 3375 } 3376 3377 if (roworiented) stepval = (n - 1) * bs; 3378 else stepval = (m - 1) * bs; 3379 3380 for (i = 0; i < m; i++) { 3381 if (im[i] < 0) continue; 3382 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3383 if (im[i] >= rstart && im[i] < rend) { 3384 row = im[i] - rstart; 3385 for (j = 0; j < n; j++) { 3386 /* If NumCol = 1 then a copy is not required */ 3387 if ((roworiented) && (n == 1)) { 3388 barray = (MatScalar *)v + i * bs2; 3389 } else if ((!roworiented) && (m == 1)) { 3390 barray = (MatScalar *)v + j * bs2; 3391 } else { /* Here a copy is required */ 3392 if (roworiented) { 3393 value = v + i * (stepval + bs) * bs + j * bs; 3394 } else { 3395 value = v + j * (stepval + bs) * bs + i * bs; 3396 } 3397 for (ii = 0; ii < bs; ii++, value += stepval) { 3398 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3399 } 3400 barray -= bs2; 3401 } 3402 3403 if (in[j] >= cstart && in[j] < cend) { 3404 col = in[j] - cstart; 3405 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3406 } else if (in[j] < 0) { 3407 continue; 3408 } else { 3409 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3410 if (mat->was_assembled) { 3411 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3412 3413 #if defined(PETSC_USE_DEBUG) 3414 #if defined(PETSC_USE_CTABLE) 3415 { 3416 PetscInt data; 3417 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 3418 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3419 } 3420 #else 3421 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3422 #endif 3423 #endif 3424 #if defined(PETSC_USE_CTABLE) 3425 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 3426 col = (col - 1) / bs; 3427 #else 3428 col = (baij->colmap[in[j]] - 1) / bs; 3429 #endif 3430 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3431 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3432 col = in[j]; 3433 } 3434 } else col = in[j]; 3435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3436 } 3437 } 3438 } else { 3439 if (!baij->donotstash) { 3440 if (roworiented) { 3441 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3442 } else { 3443 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3444 } 3445 } 3446 } 3447 } 3448 3449 /* task normally handled by MatSetValuesBlocked() */ 3450 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 /*@ 3455 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3456 CSR format the local rows. 3457 3458 Collective 3459 3460 Input Parameters: 3461 + comm - MPI communicator 3462 . bs - the block size, only a block size of 1 is supported 3463 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3464 . n - This value should be the same as the local size used in creating the 3465 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3466 calculated if N is given) For square matrices n is almost always m. 3467 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3468 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3469 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3470 . j - column indices 3471 - a - matrix values 3472 3473 Output Parameter: 3474 . mat - the matrix 3475 3476 Level: intermediate 3477 3478 Notes: 3479 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3480 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3481 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3482 3483 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3484 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3485 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3486 with column-major ordering within blocks. 3487 3488 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3489 3490 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3491 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3492 @*/ 3493 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 3494 PetscFunctionBegin; 3495 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3496 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3497 PetscCall(MatCreate(comm, mat)); 3498 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3499 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3500 PetscCall(MatSetBlockSize(*mat, bs)); 3501 PetscCall(MatSetUp(*mat)); 3502 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3503 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3504 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3509 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3510 PetscInt *indx; 3511 PetscScalar *values; 3512 3513 PetscFunctionBegin; 3514 PetscCall(MatGetSize(inmat, &m, &N)); 3515 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3517 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3518 PetscInt *bindx, rmax = a->rmax, j; 3519 PetscMPIInt rank, size; 3520 3521 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3522 mbs = m / bs; 3523 Nbs = N / cbs; 3524 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3525 nbs = n / cbs; 3526 3527 PetscCall(PetscMalloc1(rmax, &bindx)); 3528 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3529 3530 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3531 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3532 if (rank == size - 1) { 3533 /* Check sum(nbs) = Nbs */ 3534 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3535 } 3536 3537 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3538 for (i = 0; i < mbs; i++) { 3539 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3540 nnz = nnz / bs; 3541 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3542 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3543 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3544 } 3545 PetscCall(PetscFree(bindx)); 3546 3547 PetscCall(MatCreate(comm, outmat)); 3548 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3549 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3550 PetscCall(MatSetType(*outmat, MATBAIJ)); 3551 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3552 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3553 MatPreallocateEnd(dnz, onz); 3554 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3555 } 3556 3557 /* numeric phase */ 3558 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3559 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3560 3561 for (i = 0; i < m; i++) { 3562 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3563 Ii = i + rstart; 3564 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3565 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3566 } 3567 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3568 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3569 PetscFunctionReturn(0); 3570 } 3571