1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c5d9258eSSatish Balay 3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 565a92638SMatthew G. Knepley #include <petscsf.h> 679bdfe76SSatish Balay 77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 97ea3e4caSstefano_zampini #endif 107ea3e4caSstefano_zampini 119371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) { 127843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 134e879edeSHong Zhang PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 144e879edeSHong Zhang PetscScalar *va, *vv; 154e879edeSHong Zhang Vec vB, vA; 164e879edeSHong Zhang const PetscScalar *vb; 177843d17aSBarry Smith 187843d17aSBarry Smith PetscFunctionBegin; 199566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 209566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 214e879edeSHong Zhang 229566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(vA, &va)); 23985db425SBarry Smith if (idx) { 244e879edeSHong Zhang for (i = 0; i < m; i++) { 2526fbe8dcSKarl Rupp if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2626fbe8dcSKarl Rupp } 27985db425SBarry Smith } 287843d17aSBarry Smith 299566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &idxb)); 319566063dSJacob Faibussowitsch PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 327843d17aSBarry Smith 339566063dSJacob Faibussowitsch PetscCall(VecGetArrayWrite(v, &vv)); 349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(vB, &vb)); 354e879edeSHong Zhang for (i = 0; i < m; i++) { 3626fbe8dcSKarl Rupp if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 374e879edeSHong Zhang vv[i] = vb[i]; 384e879edeSHong Zhang if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 394e879edeSHong Zhang } else { 404e879edeSHong Zhang vv[i] = va[i]; 419371c9d4SSatish Balay if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 4226fbe8dcSKarl Rupp } 437843d17aSBarry Smith } 449566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA, &vv)); 459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayWrite(vA, &va)); 469566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(vB, &vb)); 479566063dSJacob Faibussowitsch PetscCall(PetscFree(idxb)); 489566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vA)); 499566063dSJacob Faibussowitsch PetscCall(VecDestroy(&vB)); 507843d17aSBarry Smith PetscFunctionReturn(0); 517843d17aSBarry Smith } 527843d17aSBarry Smith 539371c9d4SSatish Balay PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) { 547fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 557fc3c18eSBarry Smith 567fc3c18eSBarry Smith PetscFunctionBegin; 579566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->A)); 589566063dSJacob Faibussowitsch PetscCall(MatStoreValues(aij->B)); 597fc3c18eSBarry Smith PetscFunctionReturn(0); 607fc3c18eSBarry Smith } 617fc3c18eSBarry Smith 629371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) { 637fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 647fc3c18eSBarry Smith 657fc3c18eSBarry Smith PetscFunctionBegin; 669566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->A)); 679566063dSJacob Faibussowitsch PetscCall(MatRetrieveValues(aij->B)); 687fc3c18eSBarry Smith PetscFunctionReturn(0); 697fc3c18eSBarry Smith } 707fc3c18eSBarry Smith 71537820f0SBarry Smith /* 72537820f0SBarry Smith Local utility routine that creates a mapping from the global column 7357b952d6SSatish Balay number to the local number in the off-diagonal part of the local 74e06f6af7SJed Brown storage of the matrix. This is done in a non scalable way since the 7557b952d6SSatish Balay length of colmap equals the global matrix length. 7657b952d6SSatish Balay */ 779371c9d4SSatish Balay PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) { 7857b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 7957b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 80d0f46423SBarry Smith PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 8157b952d6SSatish Balay 82d64ed03dSBarry Smith PetscFunctionBegin; 83aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 849566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap)); 8548a46eb9SPierre Jolivet for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES)); 8648e59246SSatish Balay #else 879566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 88928fc39bSSatish Balay for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 8948e59246SSatish Balay #endif 903a40ed3dSBarry Smith PetscFunctionReturn(0); 9157b952d6SSatish Balay } 9257b952d6SSatish Balay 93d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 9480c1aa95SSatish Balay { \ 9580c1aa95SSatish Balay brow = row / bs; \ 969371c9d4SSatish Balay rp = aj + ai[brow]; \ 979371c9d4SSatish Balay ap = aa + bs2 * ai[brow]; \ 989371c9d4SSatish Balay rmax = aimax[brow]; \ 999371c9d4SSatish Balay nrow = ailen[brow]; \ 10080c1aa95SSatish Balay bcol = col / bs; \ 1019371c9d4SSatish Balay ridx = row % bs; \ 1029371c9d4SSatish Balay cidx = col % bs; \ 1039371c9d4SSatish Balay low = 0; \ 1049371c9d4SSatish Balay high = nrow; \ 105ab26458aSBarry Smith while (high - low > 3) { \ 106ab26458aSBarry Smith t = (low + high) / 2; \ 107ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 108ab26458aSBarry Smith else low = t; \ 109ab26458aSBarry Smith } \ 110ab26458aSBarry Smith for (_i = low; _i < high; _i++) { \ 11180c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11280c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11380c1aa95SSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 114eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 115eada6651SSatish Balay else *bap = value; \ 116ac7a638eSSatish Balay goto a_noinsert; \ 11780c1aa95SSatish Balay } \ 11880c1aa95SSatish Balay } \ 11989280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 1205f80ce2aSJacob Faibussowitsch PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 121fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 12280c1aa95SSatish Balay N = nrow++ - 1; \ 12380c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 1249566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 1259566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 1269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 12780c1aa95SSatish Balay rp[_i] = bcol; \ 12880c1aa95SSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 129ac7a638eSSatish Balay a_noinsert:; \ 13080c1aa95SSatish Balay ailen[brow] = nrow; \ 13180c1aa95SSatish Balay } 13257b952d6SSatish Balay 133d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 134ac7a638eSSatish Balay { \ 135ac7a638eSSatish Balay brow = row / bs; \ 1369371c9d4SSatish Balay rp = bj + bi[brow]; \ 1379371c9d4SSatish Balay ap = ba + bs2 * bi[brow]; \ 1389371c9d4SSatish Balay rmax = bimax[brow]; \ 1399371c9d4SSatish Balay nrow = bilen[brow]; \ 140ac7a638eSSatish Balay bcol = col / bs; \ 1419371c9d4SSatish Balay ridx = row % bs; \ 1429371c9d4SSatish Balay cidx = col % bs; \ 1439371c9d4SSatish Balay low = 0; \ 1449371c9d4SSatish Balay high = nrow; \ 145ac7a638eSSatish Balay while (high - low > 3) { \ 146ac7a638eSSatish Balay t = (low + high) / 2; \ 147ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 148ac7a638eSSatish Balay else low = t; \ 149ac7a638eSSatish Balay } \ 150ac7a638eSSatish Balay for (_i = low; _i < high; _i++) { \ 151ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 152ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 153ac7a638eSSatish Balay bap = ap + bs2 * _i + bs * cidx + ridx; \ 154ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 155ac7a638eSSatish Balay else *bap = value; \ 156ac7a638eSSatish Balay goto b_noinsert; \ 157ac7a638eSSatish Balay } \ 158ac7a638eSSatish Balay } \ 15989280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 1605f80ce2aSJacob Faibussowitsch PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 162ac7a638eSSatish Balay N = nrow++ - 1; \ 163ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 1649566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 1659566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 1669566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 167ac7a638eSSatish Balay rp[_i] = bcol; \ 168ac7a638eSSatish Balay ap[bs2 * _i + bs * cidx + ridx] = value; \ 169ac7a638eSSatish Balay b_noinsert:; \ 170ac7a638eSSatish Balay bilen[brow] = nrow; \ 171ac7a638eSSatish Balay } 172ac7a638eSSatish Balay 1739371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 17457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 17593fea6afSBarry Smith MatScalar value; 176ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 177b24ad042SBarry Smith PetscInt i, j, row, col; 178d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 179d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 180d0f46423SBarry Smith PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 18157b952d6SSatish Balay 182eada6651SSatish Balay /* Some Variables required in the macro */ 18380c1aa95SSatish Balay Mat A = baij->A; 18480c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 185b24ad042SBarry Smith PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 1863eda8832SBarry Smith MatScalar *aa = a->a; 187ac7a638eSSatish Balay 188ac7a638eSSatish Balay Mat B = baij->B; 189ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 190b24ad042SBarry Smith PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 1913eda8832SBarry Smith MatScalar *ba = b->a; 192ac7a638eSSatish Balay 193b24ad042SBarry Smith PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 194b24ad042SBarry Smith PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 1953eda8832SBarry Smith MatScalar *ap, *bap; 19680c1aa95SSatish Balay 197d64ed03dSBarry Smith PetscFunctionBegin; 19857b952d6SSatish Balay for (i = 0; i < m; i++) { 1995ef9f2a5SBarry Smith if (im[i] < 0) continue; 2005f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 20157b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 20257b952d6SSatish Balay row = im[i] - rstart_orig; 20357b952d6SSatish Balay for (j = 0; j < n; j++) { 20457b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { 20557b952d6SSatish Balay col = in[j] - cstart_orig; 206db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 207db4deed7SKarl Rupp else value = v[i + j * m]; 208d40312a9SBarry Smith MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 209f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 210f7d195e4SLawrence Mitchell continue; 211f7d195e4SLawrence Mitchell } else { 212f7d195e4SLawrence Mitchell PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 21357b952d6SSatish Balay if (mat->was_assembled) { 21448a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 215aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2169566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 217bba1ac68SSatish Balay col = col - 1; 21848e59246SSatish Balay #else 219bba1ac68SSatish Balay col = baij->colmap[in[j] / bs] - 1; 22048e59246SSatish Balay #endif 221c9ef50b2SBarry Smith if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 2229566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIBAIJ(mat)); 2238295de27SSatish Balay col = in[j]; 2249bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2259bf004c3SSatish Balay B = baij->B; 2269bf004c3SSatish Balay b = (Mat_SeqBAIJ *)(B)->data; 2279371c9d4SSatish Balay bimax = b->imax; 2289371c9d4SSatish Balay bi = b->i; 2299371c9d4SSatish Balay bilen = b->ilen; 2309371c9d4SSatish Balay bj = b->j; 2319bf004c3SSatish Balay ba = b->a; 232f7d195e4SLawrence Mitchell } else { 233f7d195e4SLawrence Mitchell PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234f7d195e4SLawrence Mitchell col += in[j] % bs; 235f7d195e4SLawrence Mitchell } 2368295de27SSatish Balay } else col = in[j]; 237db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 238db4deed7SKarl Rupp else value = v[i + j * m]; 239d40312a9SBarry Smith MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 2409566063dSJacob Faibussowitsch /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 24157b952d6SSatish Balay } 24257b952d6SSatish Balay } 243d64ed03dSBarry Smith } else { 2445f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 24590f02eecSBarry Smith if (!baij->donotstash) { 2465080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 247ff2fd236SBarry Smith if (roworiented) { 2489566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 249ff2fd236SBarry Smith } else { 2509566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 25157b952d6SSatish Balay } 25257b952d6SSatish Balay } 25357b952d6SSatish Balay } 25490f02eecSBarry Smith } 2553a40ed3dSBarry Smith PetscFunctionReturn(0); 25657b952d6SSatish Balay } 25757b952d6SSatish Balay 2589371c9d4SSatish Balay static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 259880c6e6aSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2608ab52850SBarry Smith PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 261880c6e6aSBarry Smith PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2628ab52850SBarry Smith PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 263880c6e6aSBarry Smith PetscBool roworiented = a->roworiented; 264880c6e6aSBarry Smith const PetscScalar *value = v; 265880c6e6aSBarry Smith MatScalar *ap, *aa = a->a, *bap; 266880c6e6aSBarry Smith 267880c6e6aSBarry Smith PetscFunctionBegin; 268880c6e6aSBarry Smith rp = aj + ai[row]; 269880c6e6aSBarry Smith ap = aa + bs2 * ai[row]; 270880c6e6aSBarry Smith rmax = imax[row]; 271880c6e6aSBarry Smith nrow = ailen[row]; 2728ab52850SBarry Smith value = v; 2738ab52850SBarry Smith low = 0; 2748ab52850SBarry Smith high = nrow; 275880c6e6aSBarry Smith while (high - low > 7) { 276880c6e6aSBarry Smith t = (low + high) / 2; 277880c6e6aSBarry Smith if (rp[t] > col) high = t; 278880c6e6aSBarry Smith else low = t; 279880c6e6aSBarry Smith } 280880c6e6aSBarry Smith for (i = low; i < high; i++) { 281880c6e6aSBarry Smith if (rp[i] > col) break; 282880c6e6aSBarry Smith if (rp[i] == col) { 283880c6e6aSBarry Smith bap = ap + bs2 * i; 284880c6e6aSBarry Smith if (roworiented) { 285880c6e6aSBarry Smith if (is == ADD_VALUES) { 2868ab52850SBarry Smith for (ii = 0; ii < bs; ii++) { 287ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 288880c6e6aSBarry Smith } 289880c6e6aSBarry Smith } else { 2908ab52850SBarry Smith for (ii = 0; ii < bs; ii++) { 291ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 292880c6e6aSBarry Smith } 293880c6e6aSBarry Smith } 294880c6e6aSBarry Smith } else { 295880c6e6aSBarry Smith if (is == ADD_VALUES) { 2968ab52850SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 297ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 298880c6e6aSBarry Smith bap += bs; 299880c6e6aSBarry Smith } 300880c6e6aSBarry Smith } else { 3018ab52850SBarry Smith for (ii = 0; ii < bs; ii++, value += bs) { 302ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 303880c6e6aSBarry Smith bap += bs; 304880c6e6aSBarry Smith } 305880c6e6aSBarry Smith } 306880c6e6aSBarry Smith } 307880c6e6aSBarry Smith goto noinsert2; 308880c6e6aSBarry Smith } 309880c6e6aSBarry Smith } 310880c6e6aSBarry Smith if (nonew == 1) goto noinsert2; 3115f80ce2aSJacob Faibussowitsch PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 312880c6e6aSBarry Smith MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 3139371c9d4SSatish Balay N = nrow++ - 1; 3149371c9d4SSatish Balay high++; 315880c6e6aSBarry Smith /* shift up all the later entries in this row */ 3169566063dSJacob Faibussowitsch PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 3179566063dSJacob Faibussowitsch PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 318880c6e6aSBarry Smith rp[i] = col; 319880c6e6aSBarry Smith bap = ap + bs2 * i; 320880c6e6aSBarry Smith if (roworiented) { 3218ab52850SBarry Smith for (ii = 0; ii < bs; ii++) { 322ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 323880c6e6aSBarry Smith } 324880c6e6aSBarry Smith } else { 3258ab52850SBarry Smith for (ii = 0; ii < bs; ii++) { 326ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *bap++ = *value++; 327880c6e6aSBarry Smith } 328880c6e6aSBarry Smith } 329880c6e6aSBarry Smith noinsert2:; 330880c6e6aSBarry Smith ailen[row] = nrow; 331880c6e6aSBarry Smith PetscFunctionReturn(0); 332880c6e6aSBarry Smith } 333880c6e6aSBarry Smith 3348ab52850SBarry Smith /* 3358ab52850SBarry Smith This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 3368ab52850SBarry Smith by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 3378ab52850SBarry Smith */ 3389371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 339ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 340dd6ea824SBarry Smith const PetscScalar *value; 341f15d580aSBarry Smith MatScalar *barray = baij->barray; 342ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 343899cda47SBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 344899cda47SBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 345d0f46423SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 346ab26458aSBarry Smith 347b16ae2b1SBarry Smith PetscFunctionBegin; 34830793edcSSatish Balay if (!barray) { 3499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 35082502324SSatish Balay baij->barray = barray; 35130793edcSSatish Balay } 35230793edcSSatish Balay 35326fbe8dcSKarl Rupp if (roworiented) stepval = (n - 1) * bs; 35426fbe8dcSKarl Rupp else stepval = (m - 1) * bs; 35526fbe8dcSKarl Rupp 356ab26458aSBarry Smith for (i = 0; i < m; i++) { 3575ef9f2a5SBarry Smith if (im[i] < 0) continue; 3586bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 359ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 360ab26458aSBarry Smith row = im[i] - rstart; 361ab26458aSBarry Smith for (j = 0; j < n; j++) { 36215b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 36315b57d14SSatish Balay if ((roworiented) && (n == 1)) { 364f15d580aSBarry Smith barray = (MatScalar *)v + i * bs2; 36515b57d14SSatish Balay } else if ((!roworiented) && (m == 1)) { 366f15d580aSBarry Smith barray = (MatScalar *)v + j * bs2; 36715b57d14SSatish Balay } else { /* Here a copy is required */ 368ab26458aSBarry Smith if (roworiented) { 36953ef36baSBarry Smith value = v + (i * (stepval + bs) + j) * bs; 370ab26458aSBarry Smith } else { 37153ef36baSBarry Smith value = v + (j * (stepval + bs) + i) * bs; 372abef11f7SSatish Balay } 37353ef36baSBarry Smith for (ii = 0; ii < bs; ii++, value += bs + stepval) { 37426fbe8dcSKarl Rupp for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 37553ef36baSBarry Smith barray += bs; 37647513183SBarry Smith } 37730793edcSSatish Balay barray -= bs2; 37815b57d14SSatish Balay } 379abef11f7SSatish Balay 380abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend) { 381abef11f7SSatish Balay col = in[j] - cstart; 3829566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 383f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 384f7d195e4SLawrence Mitchell continue; 385f7d195e4SLawrence Mitchell } else { 386f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 387ab26458aSBarry Smith if (mat->was_assembled) { 38848a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 389a5eb4965SSatish Balay 3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 391aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 3929371c9d4SSatish Balay { 3939371c9d4SSatish Balay PetscInt data; 3949566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 39508401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 396fa46199cSSatish Balay } 39748e59246SSatish Balay #else 39808401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 399a5eb4965SSatish Balay #endif 40048e59246SSatish Balay #endif 401aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 4029566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 403fa46199cSSatish Balay col = (col - 1) / bs; 40448e59246SSatish Balay #else 405a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1) / bs; 40648e59246SSatish Balay #endif 4070e9bae81SBarry Smith if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 4089566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIBAIJ(mat)); 409ab26458aSBarry Smith col = in[j]; 4105f80ce2aSJacob Faibussowitsch } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 411db4deed7SKarl Rupp } else col = in[j]; 4129566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 413ab26458aSBarry Smith } 414ab26458aSBarry Smith } 415d64ed03dSBarry Smith } else { 4165f80ce2aSJacob Faibussowitsch PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 417ab26458aSBarry Smith if (!baij->donotstash) { 418ff2fd236SBarry Smith if (roworiented) { 4199566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 420ff2fd236SBarry Smith } else { 4219566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 422ff2fd236SBarry Smith } 423abef11f7SSatish Balay } 424ab26458aSBarry Smith } 425ab26458aSBarry Smith } 4263a40ed3dSBarry Smith PetscFunctionReturn(0); 427ab26458aSBarry Smith } 4286fa18ffdSBarry Smith 4290bdbc534SSatish Balay #define HASH_KEY 0.6180339887 430b24ad042SBarry Smith #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 431b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 432b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 4339371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 4340bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 435ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 436b24ad042SBarry Smith PetscInt i, j, row, col; 437d0f46423SBarry Smith PetscInt rstart_orig = mat->rmap->rstart; 438d0f46423SBarry Smith PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 439d0f46423SBarry Smith PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 440329f5518SBarry Smith PetscReal tmp; 4413eda8832SBarry Smith MatScalar **HD = baij->hd, value; 442b24ad042SBarry Smith PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 4430bdbc534SSatish Balay 4440bdbc534SSatish Balay PetscFunctionBegin; 4450bdbc534SSatish Balay for (i = 0; i < m; i++) { 44676bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 4475f80ce2aSJacob Faibussowitsch PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 4485f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 44976bd3646SJed Brown } 4500bdbc534SSatish Balay row = im[i]; 451c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4520bdbc534SSatish Balay for (j = 0; j < n; j++) { 4530bdbc534SSatish Balay col = in[j]; 454db4deed7SKarl Rupp if (roworiented) value = v[i * n + j]; 455db4deed7SKarl Rupp else value = v[i + j * m]; 456b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 457c2760754SSatish Balay key = (row / bs) * Nbs + (col / bs) + 1; 458c2760754SSatish Balay h1 = HASH(size, key, tmp); 4590bdbc534SSatish Balay 460c2760754SSatish Balay idx = h1; 46176bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 462187ce0cbSSatish Balay insert_ct++; 463187ce0cbSSatish Balay total_ct++; 464187ce0cbSSatish Balay if (HT[idx] != key) { 4659371c9d4SSatish Balay for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 4669371c9d4SSatish Balay ; 467187ce0cbSSatish Balay if (idx == size) { 4689371c9d4SSatish Balay for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 4699371c9d4SSatish Balay ; 4705f80ce2aSJacob Faibussowitsch PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 471187ce0cbSSatish Balay } 472187ce0cbSSatish Balay } 47376bd3646SJed Brown } else if (HT[idx] != key) { 4749371c9d4SSatish Balay for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 4759371c9d4SSatish Balay ; 476c2760754SSatish Balay if (idx == size) { 4779371c9d4SSatish Balay for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 4789371c9d4SSatish Balay ; 4795f80ce2aSJacob Faibussowitsch PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 480c2760754SSatish Balay } 481c2760754SSatish Balay } 482c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 483c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 484c2760754SSatish Balay else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 4850bdbc534SSatish Balay } 48626fbe8dcSKarl Rupp } else if (!baij->donotstash) { 487ff2fd236SBarry Smith if (roworiented) { 4889566063dSJacob Faibussowitsch PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 489ff2fd236SBarry Smith } else { 4909566063dSJacob Faibussowitsch PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 4910bdbc534SSatish Balay } 4920bdbc534SSatish Balay } 4930bdbc534SSatish Balay } 49476bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 495abf3b562SBarry Smith baij->ht_total_ct += total_ct; 496abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 49776bd3646SJed Brown } 4980bdbc534SSatish Balay PetscFunctionReturn(0); 4990bdbc534SSatish Balay } 5000bdbc534SSatish Balay 5019371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 5020bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 503ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 504b24ad042SBarry Smith PetscInt i, j, ii, jj, row, col; 505899cda47SBarry Smith PetscInt rstart = baij->rstartbs; 506d0f46423SBarry Smith PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 507b24ad042SBarry Smith PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 508329f5518SBarry Smith PetscReal tmp; 5093eda8832SBarry Smith MatScalar **HD = baij->hd, *baij_a; 510dd6ea824SBarry Smith const PetscScalar *v_t, *value; 511b24ad042SBarry Smith PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 5120bdbc534SSatish Balay 513d0a41580SSatish Balay PetscFunctionBegin; 51426fbe8dcSKarl Rupp if (roworiented) stepval = (n - 1) * bs; 51526fbe8dcSKarl Rupp else stepval = (m - 1) * bs; 51626fbe8dcSKarl Rupp 5170bdbc534SSatish Balay for (i = 0; i < m; i++) { 51876bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 5195f80ce2aSJacob Faibussowitsch PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 5205f80ce2aSJacob Faibussowitsch PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 52176bd3646SJed Brown } 5220bdbc534SSatish Balay row = im[i]; 523ab715e2cSSatish Balay v_t = v + i * nbs2; 524c2760754SSatish Balay if (row >= rstart && row < rend) { 5250bdbc534SSatish Balay for (j = 0; j < n; j++) { 5260bdbc534SSatish Balay col = in[j]; 5270bdbc534SSatish Balay 5280bdbc534SSatish Balay /* Look up into the Hash Table */ 529c2760754SSatish Balay key = row * Nbs + col + 1; 530c2760754SSatish Balay h1 = HASH(size, key, tmp); 5310bdbc534SSatish Balay 532c2760754SSatish Balay idx = h1; 53376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 534187ce0cbSSatish Balay total_ct++; 535187ce0cbSSatish Balay insert_ct++; 536187ce0cbSSatish Balay if (HT[idx] != key) { 5379371c9d4SSatish Balay for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 5389371c9d4SSatish Balay ; 539187ce0cbSSatish Balay if (idx == size) { 5409371c9d4SSatish Balay for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 5419371c9d4SSatish Balay ; 5425f80ce2aSJacob Faibussowitsch PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 543187ce0cbSSatish Balay } 544187ce0cbSSatish Balay } 54576bd3646SJed Brown } else if (HT[idx] != key) { 5469371c9d4SSatish Balay for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 5479371c9d4SSatish Balay ; 548c2760754SSatish Balay if (idx == size) { 5499371c9d4SSatish Balay for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 5509371c9d4SSatish Balay ; 5515f80ce2aSJacob Faibussowitsch PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 552c2760754SSatish Balay } 553c2760754SSatish Balay } 554c2760754SSatish Balay baij_a = HD[idx]; 5550bdbc534SSatish Balay if (roworiented) { 556c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 557187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 558187ce0cbSSatish Balay value = v_t; 559187ce0cbSSatish Balay v_t += bs; 560fef45726SSatish Balay if (addv == ADD_VALUES) { 561c2760754SSatish Balay for (ii = 0; ii < bs; ii++, value += stepval) { 562ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 563b4cc0f5aSSatish Balay } 564fef45726SSatish Balay } else { 565c2760754SSatish Balay for (ii = 0; ii < bs; ii++, value += stepval) { 566ad540459SPierre Jolivet for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 567fef45726SSatish Balay } 568fef45726SSatish Balay } 5690bdbc534SSatish Balay } else { 5700bdbc534SSatish Balay value = v + j * (stepval + bs) * bs + i * bs; 571fef45726SSatish Balay if (addv == ADD_VALUES) { 572b4cc0f5aSSatish Balay for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 573ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 574fef45726SSatish Balay } 575fef45726SSatish Balay } else { 576fef45726SSatish Balay for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 577ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 578b4cc0f5aSSatish Balay } 5790bdbc534SSatish Balay } 5800bdbc534SSatish Balay } 5810bdbc534SSatish Balay } 5820bdbc534SSatish Balay } else { 5830bdbc534SSatish Balay if (!baij->donotstash) { 5840bdbc534SSatish Balay if (roworiented) { 5859566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5860bdbc534SSatish Balay } else { 5879566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 5880bdbc534SSatish Balay } 5890bdbc534SSatish Balay } 5900bdbc534SSatish Balay } 5910bdbc534SSatish Balay } 59276bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 593abf3b562SBarry Smith baij->ht_total_ct += total_ct; 594abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 59576bd3646SJed Brown } 5960bdbc534SSatish Balay PetscFunctionReturn(0); 5970bdbc534SSatish Balay } 598133cdb44SSatish Balay 5999371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 600d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 601d0f46423SBarry Smith PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 602d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 603d6de1c52SSatish Balay 604133cdb44SSatish Balay PetscFunctionBegin; 605d6de1c52SSatish Balay for (i = 0; i < m; i++) { 60654c59aa7SJacob Faibussowitsch if (idxm[i] < 0) continue; /* negative row */ 60754c59aa7SJacob Faibussowitsch PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 608d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 609d6de1c52SSatish Balay row = idxm[i] - bsrstart; 610d6de1c52SSatish Balay for (j = 0; j < n; j++) { 61154c59aa7SJacob Faibussowitsch if (idxn[j] < 0) continue; /* negative column */ 61254c59aa7SJacob Faibussowitsch PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 613d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 614d6de1c52SSatish Balay col = idxn[j] - bscstart; 6159566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 616d64ed03dSBarry Smith } else { 61748a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 618aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 6199566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 620fa46199cSSatish Balay data--; 62148e59246SSatish Balay #else 62248e59246SSatish Balay data = baij->colmap[idxn[j] / bs] - 1; 62348e59246SSatish Balay #endif 62448e59246SSatish Balay if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 625d9d09a02SSatish Balay else { 62648e59246SSatish Balay col = data + idxn[j] % bs; 6279566063dSJacob Faibussowitsch PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 628d6de1c52SSatish Balay } 629d6de1c52SSatish Balay } 630d6de1c52SSatish Balay } 631f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 632d6de1c52SSatish Balay } 6333a40ed3dSBarry Smith PetscFunctionReturn(0); 634d6de1c52SSatish Balay } 635d6de1c52SSatish Balay 6369371c9d4SSatish Balay PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) { 637d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 638d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 639d0f46423SBarry Smith PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 640329f5518SBarry Smith PetscReal sum = 0.0; 6413eda8832SBarry Smith MatScalar *v; 642d6de1c52SSatish Balay 643d64ed03dSBarry Smith PetscFunctionBegin; 644d6de1c52SSatish Balay if (baij->size == 1) { 6459566063dSJacob Faibussowitsch PetscCall(MatNorm(baij->A, type, nrm)); 646d6de1c52SSatish Balay } else { 647d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 648d6de1c52SSatish Balay v = amat->a; 6498a62d963SHong Zhang nz = amat->nz * bs2; 6508a62d963SHong Zhang for (i = 0; i < nz; i++) { 6519371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 6529371c9d4SSatish Balay v++; 653d6de1c52SSatish Balay } 654d6de1c52SSatish Balay v = bmat->a; 6558a62d963SHong Zhang nz = bmat->nz * bs2; 6568a62d963SHong Zhang for (i = 0; i < nz; i++) { 6579371c9d4SSatish Balay sum += PetscRealPart(PetscConj(*v) * (*v)); 6589371c9d4SSatish Balay v++; 659d6de1c52SSatish Balay } 6601c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 6618f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 6628a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 6638a62d963SHong Zhang PetscReal *tmp, *tmp2; 664899cda47SBarry Smith PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 6659566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 6669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 6679371c9d4SSatish Balay v = amat->a; 6689371c9d4SSatish Balay jj = amat->j; 6698a62d963SHong Zhang for (i = 0; i < amat->nz; i++) { 6708a62d963SHong Zhang for (j = 0; j < bs; j++) { 6718a62d963SHong Zhang col = bs * (cstart + *jj) + j; /* column index */ 6728a62d963SHong Zhang for (row = 0; row < bs; row++) { 6739371c9d4SSatish Balay tmp[col] += PetscAbsScalar(*v); 6749371c9d4SSatish Balay v++; 6758a62d963SHong Zhang } 6768a62d963SHong Zhang } 6778a62d963SHong Zhang jj++; 6788a62d963SHong Zhang } 6799371c9d4SSatish Balay v = bmat->a; 6809371c9d4SSatish Balay jj = bmat->j; 6818a62d963SHong Zhang for (i = 0; i < bmat->nz; i++) { 6828a62d963SHong Zhang for (j = 0; j < bs; j++) { 6838a62d963SHong Zhang col = bs * garray[*jj] + j; 6848a62d963SHong Zhang for (row = 0; row < bs; row++) { 6859371c9d4SSatish Balay tmp[col] += PetscAbsScalar(*v); 6869371c9d4SSatish Balay v++; 6878a62d963SHong Zhang } 6888a62d963SHong Zhang } 6898a62d963SHong Zhang jj++; 6908a62d963SHong Zhang } 6911c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 6928a62d963SHong Zhang *nrm = 0.0; 693d0f46423SBarry Smith for (j = 0; j < mat->cmap->N; j++) { 6948a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 6958a62d963SHong Zhang } 6969566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp)); 6979566063dSJacob Faibussowitsch PetscCall(PetscFree(tmp2)); 6988a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 699577dd1f9SKris Buschelman PetscReal *sums; 7009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &sums)); 7018a62d963SHong Zhang sum = 0.0; 7028a62d963SHong Zhang for (j = 0; j < amat->mbs; j++) { 7038a62d963SHong Zhang for (row = 0; row < bs; row++) sums[row] = 0.0; 7048a62d963SHong Zhang v = amat->a + bs2 * amat->i[j]; 7058a62d963SHong Zhang nz = amat->i[j + 1] - amat->i[j]; 7068a62d963SHong Zhang for (i = 0; i < nz; i++) { 7078a62d963SHong Zhang for (col = 0; col < bs; col++) { 7088a62d963SHong Zhang for (row = 0; row < bs; row++) { 7099371c9d4SSatish Balay sums[row] += PetscAbsScalar(*v); 7109371c9d4SSatish Balay v++; 7118a62d963SHong Zhang } 7128a62d963SHong Zhang } 7138a62d963SHong Zhang } 7148a62d963SHong Zhang v = bmat->a + bs2 * bmat->i[j]; 7158a62d963SHong Zhang nz = bmat->i[j + 1] - bmat->i[j]; 7168a62d963SHong Zhang for (i = 0; i < nz; i++) { 7178a62d963SHong Zhang for (col = 0; col < bs; col++) { 7188a62d963SHong Zhang for (row = 0; row < bs; row++) { 7199371c9d4SSatish Balay sums[row] += PetscAbsScalar(*v); 7209371c9d4SSatish Balay v++; 7218a62d963SHong Zhang } 7228a62d963SHong Zhang } 7238a62d963SHong Zhang } 7248a62d963SHong Zhang for (row = 0; row < bs; row++) { 7258a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 7268a62d963SHong Zhang } 7278a62d963SHong Zhang } 7281c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 7299566063dSJacob Faibussowitsch PetscCall(PetscFree(sums)); 730ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 731d64ed03dSBarry Smith } 7323a40ed3dSBarry Smith PetscFunctionReturn(0); 733d6de1c52SSatish Balay } 73457b952d6SSatish Balay 735fef45726SSatish Balay /* 736fef45726SSatish Balay Creates the hash table, and sets the table 737fef45726SSatish Balay This table is created only once. 738fef45726SSatish Balay If new entried need to be added to the matrix 739fef45726SSatish Balay then the hash table has to be destroyed and 740fef45726SSatish Balay recreated. 741fef45726SSatish Balay */ 7429371c9d4SSatish Balay PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) { 743596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 744596b8d2eSBarry Smith Mat A = baij->A, B = baij->B; 745596b8d2eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 746b24ad042SBarry Smith PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 747fca92195SBarry Smith PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 748899cda47SBarry Smith PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 749b24ad042SBarry Smith PetscInt *HT, key; 7503eda8832SBarry Smith MatScalar **HD; 751329f5518SBarry Smith PetscReal tmp; 7526cf91177SBarry Smith #if defined(PETSC_USE_INFO) 753b24ad042SBarry Smith PetscInt ct = 0, max = 0; 7544a15367fSSatish Balay #endif 755fef45726SSatish Balay 756d64ed03dSBarry Smith PetscFunctionBegin; 757fca92195SBarry Smith if (baij->ht) PetscFunctionReturn(0); 758fef45726SSatish Balay 759fca92195SBarry Smith baij->ht_size = (PetscInt)(factor * nz); 760fca92195SBarry Smith ht_size = baij->ht_size; 7610bdbc534SSatish Balay 762fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7639566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 764b9e4cc15SSatish Balay HD = baij->hd; 765a07cd24cSSatish Balay HT = baij->ht; 766b9e4cc15SSatish Balay 767596b8d2eSBarry Smith /* Loop Over A */ 7680bdbc534SSatish Balay for (i = 0; i < a->mbs; i++) { 769596b8d2eSBarry Smith for (j = ai[i]; j < ai[i + 1]; j++) { 7700bdbc534SSatish Balay row = i + rstart; 7710bdbc534SSatish Balay col = aj[j] + cstart; 772596b8d2eSBarry Smith 773187ce0cbSSatish Balay key = row * Nbs + col + 1; 774fca92195SBarry Smith h1 = HASH(ht_size, key, tmp); 775fca92195SBarry Smith for (k = 0; k < ht_size; k++) { 776fca92195SBarry Smith if (!HT[(h1 + k) % ht_size]) { 777fca92195SBarry Smith HT[(h1 + k) % ht_size] = key; 778fca92195SBarry Smith HD[(h1 + k) % ht_size] = a->a + j * bs2; 779596b8d2eSBarry Smith break; 7806cf91177SBarry Smith #if defined(PETSC_USE_INFO) 781187ce0cbSSatish Balay } else { 782187ce0cbSSatish Balay ct++; 783187ce0cbSSatish Balay #endif 784596b8d2eSBarry Smith } 785187ce0cbSSatish Balay } 7866cf91177SBarry Smith #if defined(PETSC_USE_INFO) 787187ce0cbSSatish Balay if (k > max) max = k; 788187ce0cbSSatish Balay #endif 789596b8d2eSBarry Smith } 790596b8d2eSBarry Smith } 791596b8d2eSBarry Smith /* Loop Over B */ 7920bdbc534SSatish Balay for (i = 0; i < b->mbs; i++) { 793596b8d2eSBarry Smith for (j = bi[i]; j < bi[i + 1]; j++) { 7940bdbc534SSatish Balay row = i + rstart; 7950bdbc534SSatish Balay col = garray[bj[j]]; 796187ce0cbSSatish Balay key = row * Nbs + col + 1; 797fca92195SBarry Smith h1 = HASH(ht_size, key, tmp); 798fca92195SBarry Smith for (k = 0; k < ht_size; k++) { 799fca92195SBarry Smith if (!HT[(h1 + k) % ht_size]) { 800fca92195SBarry Smith HT[(h1 + k) % ht_size] = key; 801fca92195SBarry Smith HD[(h1 + k) % ht_size] = b->a + j * bs2; 802596b8d2eSBarry Smith break; 8036cf91177SBarry Smith #if defined(PETSC_USE_INFO) 804187ce0cbSSatish Balay } else { 805187ce0cbSSatish Balay ct++; 806187ce0cbSSatish Balay #endif 807596b8d2eSBarry Smith } 808187ce0cbSSatish Balay } 8096cf91177SBarry Smith #if defined(PETSC_USE_INFO) 810187ce0cbSSatish Balay if (k > max) max = k; 811187ce0cbSSatish Balay #endif 812596b8d2eSBarry Smith } 813596b8d2eSBarry Smith } 814596b8d2eSBarry Smith 815596b8d2eSBarry Smith /* Print Summary */ 8166cf91177SBarry Smith #if defined(PETSC_USE_INFO) 817fca92195SBarry Smith for (i = 0, j = 0; i < ht_size; i++) { 81826fbe8dcSKarl Rupp if (HT[i]) j++; 819c38d4ed2SBarry Smith } 8209566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 821187ce0cbSSatish Balay #endif 8223a40ed3dSBarry Smith PetscFunctionReturn(0); 823596b8d2eSBarry Smith } 82457b952d6SSatish Balay 8259371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) { 826bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 827b24ad042SBarry Smith PetscInt nstash, reallocs; 828bbb85fb3SSatish Balay 829bbb85fb3SSatish Balay PetscFunctionBegin; 83026fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 831bbb85fb3SSatish Balay 8329566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 8339566063dSJacob Faibussowitsch PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 8349566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 8359566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 8369566063dSJacob Faibussowitsch PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 8379566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 838bbb85fb3SSatish Balay PetscFunctionReturn(0); 839bbb85fb3SSatish Balay } 840bbb85fb3SSatish Balay 8419371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) { 842bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 84391c97fd4SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 844b24ad042SBarry Smith PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 845e44c0bd4SBarry Smith PetscInt *row, *col; 846ace3abfcSBarry Smith PetscBool r1, r2, r3, other_disassembled; 8473eda8832SBarry Smith MatScalar *val; 848b24ad042SBarry Smith PetscMPIInt n; 849bbb85fb3SSatish Balay 850bbb85fb3SSatish Balay PetscFunctionBegin; 8515fd66863SKarl Rupp /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 8524cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 853a2d1c673SSatish Balay while (1) { 8549566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 855a2d1c673SSatish Balay if (!flg) break; 856a2d1c673SSatish Balay 857bbb85fb3SSatish Balay for (i = 0; i < n;) { 858bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 85926fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 86026fbe8dcSKarl Rupp if (row[j] != rstart) break; 86126fbe8dcSKarl Rupp } 862bbb85fb3SSatish Balay if (j < n) ncols = j - i; 863bbb85fb3SSatish Balay else ncols = n - i; 864bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 8659566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 866bbb85fb3SSatish Balay i = j; 867bbb85fb3SSatish Balay } 868bbb85fb3SSatish Balay } 8699566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->stash)); 870a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 871a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 872a2d1c673SSatish Balay restore the original flags */ 873a2d1c673SSatish Balay r1 = baij->roworiented; 874a2d1c673SSatish Balay r2 = a->roworiented; 87591c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 87626fbe8dcSKarl Rupp 8777c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 8787c922b88SBarry Smith a->roworiented = PETSC_FALSE; 87926fbe8dcSKarl Rupp 88091c97fd4SSatish Balay (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 881a2d1c673SSatish Balay while (1) { 8829566063dSJacob Faibussowitsch PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 883a2d1c673SSatish Balay if (!flg) break; 884a2d1c673SSatish Balay 885a2d1c673SSatish Balay for (i = 0; i < n;) { 886a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 88726fbe8dcSKarl Rupp for (j = i, rstart = row[j]; j < n; j++) { 88826fbe8dcSKarl Rupp if (row[j] != rstart) break; 88926fbe8dcSKarl Rupp } 890a2d1c673SSatish Balay if (j < n) ncols = j - i; 891a2d1c673SSatish Balay else ncols = n - i; 8929566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 893a2d1c673SSatish Balay i = j; 894a2d1c673SSatish Balay } 895a2d1c673SSatish Balay } 8969566063dSJacob Faibussowitsch PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 89726fbe8dcSKarl Rupp 898a2d1c673SSatish Balay baij->roworiented = r1; 899a2d1c673SSatish Balay a->roworiented = r2; 90026fbe8dcSKarl Rupp 90191c97fd4SSatish Balay ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 902bbb85fb3SSatish Balay } 903bbb85fb3SSatish Balay 9049566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->A, mode)); 9059566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->A, mode)); 906bbb85fb3SSatish Balay 907bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 9086aad120cSJose E. Roman also disassemble ourselves, in order that we may reassemble. */ 909bbb85fb3SSatish Balay /* 910bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 911bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 912bbb85fb3SSatish Balay */ 913bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 9145f9db2b2SJunchao Zhang PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 91548a46eb9SPierre Jolivet if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 916bbb85fb3SSatish Balay } 917bbb85fb3SSatish Balay 91848a46eb9SPierre Jolivet if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 9199566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(baij->B, mode)); 9209566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(baij->B, mode)); 921bbb85fb3SSatish Balay 9226cf91177SBarry Smith #if defined(PETSC_USE_INFO) 923bbb85fb3SSatish Balay if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 9249566063dSJacob Faibussowitsch PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 92526fbe8dcSKarl Rupp 926bbb85fb3SSatish Balay baij->ht_total_ct = 0; 927bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 928bbb85fb3SSatish Balay } 929bbb85fb3SSatish Balay #endif 930bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 9319566063dSJacob Faibussowitsch PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 93226fbe8dcSKarl Rupp 933bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 934bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 935bbb85fb3SSatish Balay } 936bbb85fb3SSatish Balay 9379566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 93826fbe8dcSKarl Rupp 939f4259b30SLisandro Dalcin baij->rowvalues = NULL; 9404f9cfa9eSBarry Smith 9414f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 9424f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 943e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 9441c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 945e56f5c9eSBarry Smith } 946bbb85fb3SSatish Balay PetscFunctionReturn(0); 947bbb85fb3SSatish Balay } 94857b952d6SSatish Balay 9497da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 9509804daf3SBarry Smith #include <petscdraw.h> 9519371c9d4SSatish Balay static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 95257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 9537da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 954d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 955ace3abfcSBarry Smith PetscBool iascii, isdraw; 956b0a32e0cSBarry Smith PetscViewer sviewer; 957f3ef73ceSBarry Smith PetscViewerFormat format; 95857b952d6SSatish Balay 959d64ed03dSBarry Smith PetscFunctionBegin; 9609566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 9619566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 96232077d6dSBarry Smith if (iascii) { 9639566063dSJacob Faibussowitsch PetscCall(PetscViewerGetFormat(viewer, &format)); 964456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 9654e220ebcSLois Curfman McInnes MatInfo info; 9669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 9679566063dSJacob Faibussowitsch PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 9689566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 9699371c9d4SSatish Balay PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 9709371c9d4SSatish Balay mat->rmap->bs, (double)info.memory)); 9719566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 9729566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9739566063dSJacob Faibussowitsch PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 9749566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 9759566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 9769566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 9779566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 9789566063dSJacob Faibussowitsch PetscCall(VecScatterView(baij->Mvctx, viewer)); 9793a40ed3dSBarry Smith PetscFunctionReturn(0); 980fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 9819566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 9823a40ed3dSBarry Smith PetscFunctionReturn(0); 98304929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 98404929863SHong Zhang PetscFunctionReturn(0); 98557b952d6SSatish Balay } 98657b952d6SSatish Balay } 98757b952d6SSatish Balay 9880f5bd95cSBarry Smith if (isdraw) { 989b0a32e0cSBarry Smith PetscDraw draw; 990ace3abfcSBarry Smith PetscBool isnull; 9919566063dSJacob Faibussowitsch PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 9929566063dSJacob Faibussowitsch PetscCall(PetscDrawIsNull(draw, &isnull)); 99345f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 99457b952d6SSatish Balay } 99557b952d6SSatish Balay 9967da1fb6eSBarry Smith { 99757b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 99857b952d6SSatish Balay Mat A; 99957b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1000d0f46423SBarry Smith PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 10013eda8832SBarry Smith MatScalar *a; 10023e219373SBarry Smith const char *matname; 100357b952d6SSatish Balay 1004f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1005f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 10069566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1007dd400576SPatrick Sanan if (rank == 0) { 10089566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, M, N, M, N)); 1009d64ed03dSBarry Smith } else { 10109566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, 0, 0, M, N)); 101157b952d6SSatish Balay } 10129566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATMPIBAIJ)); 10139566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 10149566063dSJacob Faibussowitsch PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 101557b952d6SSatish Balay 101657b952d6SSatish Balay /* copy over the A part */ 101757b952d6SSatish Balay Aloc = (Mat_SeqBAIJ *)baij->A->data; 10189371c9d4SSatish Balay ai = Aloc->i; 10199371c9d4SSatish Balay aj = Aloc->j; 10209371c9d4SSatish Balay a = Aloc->a; 10219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 102257b952d6SSatish Balay 102357b952d6SSatish Balay for (i = 0; i < mbs; i++) { 1024899cda47SBarry Smith rvals[0] = bs * (baij->rstartbs + i); 102526fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 102657b952d6SSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 1027899cda47SBarry Smith col = (baij->cstartbs + aj[j]) * bs; 102857b952d6SSatish Balay for (k = 0; k < bs; k++) { 10299566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 10309371c9d4SSatish Balay col++; 10319371c9d4SSatish Balay a += bs; 103257b952d6SSatish Balay } 103357b952d6SSatish Balay } 103457b952d6SSatish Balay } 103557b952d6SSatish Balay /* copy over the B part */ 103657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ *)baij->B->data; 10379371c9d4SSatish Balay ai = Aloc->i; 10389371c9d4SSatish Balay aj = Aloc->j; 10399371c9d4SSatish Balay a = Aloc->a; 104057b952d6SSatish Balay for (i = 0; i < mbs; i++) { 1041899cda47SBarry Smith rvals[0] = bs * (baij->rstartbs + i); 104226fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 104357b952d6SSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 104457b952d6SSatish Balay col = baij->garray[aj[j]] * bs; 104557b952d6SSatish Balay for (k = 0; k < bs; k++) { 10469566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 10479371c9d4SSatish Balay col++; 10489371c9d4SSatish Balay a += bs; 104957b952d6SSatish Balay } 105057b952d6SSatish Balay } 105157b952d6SSatish Balay } 10529566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 10539566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 10549566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 105555843e3eSBarry Smith /* 105655843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1057b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 105855843e3eSBarry Smith */ 10599566063dSJacob Faibussowitsch PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 10609566063dSJacob Faibussowitsch PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1061dd400576SPatrick Sanan if (rank == 0) { 10629566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 10639566063dSJacob Faibussowitsch PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 106457b952d6SSatish Balay } 10659566063dSJacob Faibussowitsch PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 10669566063dSJacob Faibussowitsch PetscCall(PetscViewerFlush(viewer)); 10679566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 106857b952d6SSatish Balay } 10693a40ed3dSBarry Smith PetscFunctionReturn(0); 107057b952d6SSatish Balay } 107157b952d6SSatish Balay 1072618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 10739371c9d4SSatish Balay PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 1074b51a4376SLisandro Dalcin Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1075b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1076b51a4376SLisandro Dalcin Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1077b51a4376SLisandro Dalcin const PetscInt *garray = aij->garray; 1078b51a4376SLisandro Dalcin PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1079b51a4376SLisandro Dalcin PetscInt *rowlens, *colidxs; 1080b51a4376SLisandro Dalcin PetscScalar *matvals; 1081660746e0SBarry Smith 1082660746e0SBarry Smith PetscFunctionBegin; 10839566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 1084b51a4376SLisandro Dalcin 1085b51a4376SLisandro Dalcin M = mat->rmap->N; 1086b51a4376SLisandro Dalcin N = mat->cmap->N; 1087b51a4376SLisandro Dalcin m = mat->rmap->n; 1088b51a4376SLisandro Dalcin rs = mat->rmap->rstart; 1089b51a4376SLisandro Dalcin cs = mat->cmap->rstart; 1090b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1091b51a4376SLisandro Dalcin nz = bs * bs * (A->nz + B->nz); 1092b51a4376SLisandro Dalcin 1093b51a4376SLisandro Dalcin /* write matrix header */ 1094660746e0SBarry Smith header[0] = MAT_FILE_CLASSID; 10959371c9d4SSatish Balay header[1] = M; 10969371c9d4SSatish Balay header[2] = N; 10979371c9d4SSatish Balay header[3] = nz; 10989566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 10999566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1100660746e0SBarry Smith 1101b51a4376SLisandro Dalcin /* fill in and store row lengths */ 11029566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m, &rowlens)); 1103b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) 11049371c9d4SSatish Balay for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 11059566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 11069566063dSJacob Faibussowitsch PetscCall(PetscFree(rowlens)); 1107660746e0SBarry Smith 1108b51a4376SLisandro Dalcin /* fill in and store column indices */ 11099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &colidxs)); 1110b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) { 1111b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) { 1112b51a4376SLisandro Dalcin for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1113b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs / bs) break; 11149371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1115660746e0SBarry Smith } 1116b51a4376SLisandro Dalcin for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 11179371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1118b51a4376SLisandro Dalcin for (; jb < B->i[i + 1]; jb++) 11199371c9d4SSatish Balay for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1120660746e0SBarry Smith } 1121660746e0SBarry Smith } 11225f80ce2aSJacob Faibussowitsch PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 11239566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 11249566063dSJacob Faibussowitsch PetscCall(PetscFree(colidxs)); 1125660746e0SBarry Smith 1126b51a4376SLisandro Dalcin /* fill in and store nonzero values */ 11279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nz, &matvals)); 1128b51a4376SLisandro Dalcin for (cnt = 0, i = 0; i < A->mbs; i++) { 1129b51a4376SLisandro Dalcin for (k = 0; k < bs; k++) { 1130b51a4376SLisandro Dalcin for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1131b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs / bs) break; 11329371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1133660746e0SBarry Smith } 1134b51a4376SLisandro Dalcin for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 11359371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1136b51a4376SLisandro Dalcin for (; jb < B->i[i + 1]; jb++) 11379371c9d4SSatish Balay for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1138660746e0SBarry Smith } 1139b51a4376SLisandro Dalcin } 11409566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 11419566063dSJacob Faibussowitsch PetscCall(PetscFree(matvals)); 1142660746e0SBarry Smith 1143b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 11449566063dSJacob Faibussowitsch PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1145660746e0SBarry Smith PetscFunctionReturn(0); 1146660746e0SBarry Smith } 1147660746e0SBarry Smith 11489371c9d4SSatish Balay PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) { 1149ace3abfcSBarry Smith PetscBool iascii, isdraw, issocket, isbinary; 115057b952d6SSatish Balay 1151d64ed03dSBarry Smith PetscFunctionBegin; 11529566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 11539566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 11549566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 11559566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1156660746e0SBarry Smith if (iascii || isdraw || issocket) { 11579566063dSJacob Faibussowitsch PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 11581baa6e33SBarry Smith } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 11593a40ed3dSBarry Smith PetscFunctionReturn(0); 116057b952d6SSatish Balay } 116157b952d6SSatish Balay 11629371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) { 116379bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 116479bdfe76SSatish Balay 1165d64ed03dSBarry Smith PetscFunctionBegin; 1166aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1167c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 116879bdfe76SSatish Balay #endif 11699566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->stash)); 11709566063dSJacob Faibussowitsch PetscCall(MatStashDestroy_Private(&mat->bstash)); 11719566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->A)); 11729566063dSJacob Faibussowitsch PetscCall(MatDestroy(&baij->B)); 1173aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 11749566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&baij->colmap)); 117548e59246SSatish Balay #else 11769566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->colmap)); 117748e59246SSatish Balay #endif 11789566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->garray)); 11799566063dSJacob Faibussowitsch PetscCall(VecDestroy(&baij->lvec)); 11809566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&baij->Mvctx)); 11819566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 11829566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->barray)); 11839566063dSJacob Faibussowitsch PetscCall(PetscFree2(baij->hd, baij->ht)); 11849566063dSJacob Faibussowitsch PetscCall(PetscFree(baij->rangebs)); 11859566063dSJacob Faibussowitsch PetscCall(PetscFree(mat->data)); 1186901853e0SKris Buschelman 11879566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 11889566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 11899566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 11909566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 11919566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 11929566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 11939566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 11949566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 11952e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 11962e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 11977ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 11989566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 11997ea3e4caSstefano_zampini #endif 12009566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 12013a40ed3dSBarry Smith PetscFunctionReturn(0); 120279bdfe76SSatish Balay } 120379bdfe76SSatish Balay 12049371c9d4SSatish Balay PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1205cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1206b24ad042SBarry Smith PetscInt nt; 1207cee3aa6bSSatish Balay 1208d64ed03dSBarry Smith PetscFunctionBegin; 12099566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(xx, &nt)); 12105f80ce2aSJacob Faibussowitsch PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 12119566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(yy, &nt)); 12125f80ce2aSJacob Faibussowitsch PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy"); 12139566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 12149566063dSJacob Faibussowitsch PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 12159566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 12169566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 12173a40ed3dSBarry Smith PetscFunctionReturn(0); 1218cee3aa6bSSatish Balay } 1219cee3aa6bSSatish Balay 12209371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1221cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1222d64ed03dSBarry Smith 1223d64ed03dSBarry Smith PetscFunctionBegin; 12249566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 12259566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 12269566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 12279566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 12283a40ed3dSBarry Smith PetscFunctionReturn(0); 1229cee3aa6bSSatish Balay } 1230cee3aa6bSSatish Balay 12319371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1232cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1233cee3aa6bSSatish Balay 1234d64ed03dSBarry Smith PetscFunctionBegin; 1235cee3aa6bSSatish Balay /* do nondiagonal part */ 12369566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1237cee3aa6bSSatish Balay /* do local part */ 12389566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1239e4a140f6SJunchao Zhang /* add partial results together */ 12409566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 12419566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 12423a40ed3dSBarry Smith PetscFunctionReturn(0); 1243cee3aa6bSSatish Balay } 1244cee3aa6bSSatish Balay 12459371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1246cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1247cee3aa6bSSatish Balay 1248d64ed03dSBarry Smith PetscFunctionBegin; 1249cee3aa6bSSatish Balay /* do nondiagonal part */ 12509566063dSJacob Faibussowitsch PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1251cee3aa6bSSatish Balay /* do local part */ 12529566063dSJacob Faibussowitsch PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1253e4a140f6SJunchao Zhang /* add partial results together */ 12549566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 12559566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 12563a40ed3dSBarry Smith PetscFunctionReturn(0); 1257cee3aa6bSSatish Balay } 1258cee3aa6bSSatish Balay 1259cee3aa6bSSatish Balay /* 1260cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1261cee3aa6bSSatish Balay diagonal block 1262cee3aa6bSSatish Balay */ 12639371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) { 1264d64ed03dSBarry Smith PetscFunctionBegin; 12655f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 12669566063dSJacob Faibussowitsch PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 12673a40ed3dSBarry Smith PetscFunctionReturn(0); 1268cee3aa6bSSatish Balay } 1269cee3aa6bSSatish Balay 12709371c9d4SSatish Balay PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) { 1271cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1272d64ed03dSBarry Smith 1273d64ed03dSBarry Smith PetscFunctionBegin; 12749566063dSJacob Faibussowitsch PetscCall(MatScale(a->A, aa)); 12759566063dSJacob Faibussowitsch PetscCall(MatScale(a->B, aa)); 12763a40ed3dSBarry Smith PetscFunctionReturn(0); 1277cee3aa6bSSatish Balay } 1278026e39d0SSatish Balay 12799371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1280acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 128187828ca2SBarry Smith PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1282d0f46423SBarry Smith PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1283d0f46423SBarry Smith PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1284899cda47SBarry Smith PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1285acdf5bf4SSatish Balay 1286d64ed03dSBarry Smith PetscFunctionBegin; 12875f80ce2aSJacob Faibussowitsch PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 12885f80ce2aSJacob Faibussowitsch PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1289acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1290acdf5bf4SSatish Balay 1291acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1292acdf5bf4SSatish Balay /* 1293acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1294acdf5bf4SSatish Balay */ 1295acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1296b24ad042SBarry Smith PetscInt max = 1, mbs = mat->mbs, tmp; 1297bd16c2feSSatish Balay for (i = 0; i < mbs; i++) { 1298acdf5bf4SSatish Balay tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 129926fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1300acdf5bf4SSatish Balay } 13019566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1302acdf5bf4SSatish Balay } 1303d9d09a02SSatish Balay lrow = row - brstart; 1304acdf5bf4SSatish Balay 13059371c9d4SSatish Balay pvA = &vworkA; 13069371c9d4SSatish Balay pcA = &cworkA; 13079371c9d4SSatish Balay pvB = &vworkB; 13089371c9d4SSatish Balay pcB = &cworkB; 13099371c9d4SSatish Balay if (!v) { 13109371c9d4SSatish Balay pvA = NULL; 13119371c9d4SSatish Balay pvB = NULL; 13129371c9d4SSatish Balay } 13139371c9d4SSatish Balay if (!idx) { 13149371c9d4SSatish Balay pcA = NULL; 13159371c9d4SSatish Balay if (!v) pcB = NULL; 13169371c9d4SSatish Balay } 13179566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 13189566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1319acdf5bf4SSatish Balay nztot = nzA + nzB; 1320acdf5bf4SSatish Balay 1321acdf5bf4SSatish Balay cmap = mat->garray; 1322acdf5bf4SSatish Balay if (v || idx) { 1323acdf5bf4SSatish Balay if (nztot) { 1324acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1325b24ad042SBarry Smith PetscInt imark = -1; 1326acdf5bf4SSatish Balay if (v) { 1327acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1328acdf5bf4SSatish Balay for (i = 0; i < nzB; i++) { 1329d9d09a02SSatish Balay if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1330acdf5bf4SSatish Balay else break; 1331acdf5bf4SSatish Balay } 1332acdf5bf4SSatish Balay imark = i; 1333acdf5bf4SSatish Balay for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1334acdf5bf4SSatish Balay for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1335acdf5bf4SSatish Balay } 1336acdf5bf4SSatish Balay if (idx) { 1337acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1338acdf5bf4SSatish Balay if (imark > -1) { 1339ad540459SPierre Jolivet for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1340acdf5bf4SSatish Balay } else { 1341acdf5bf4SSatish Balay for (i = 0; i < nzB; i++) { 134226fbe8dcSKarl Rupp if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1343acdf5bf4SSatish Balay else break; 1344acdf5bf4SSatish Balay } 1345acdf5bf4SSatish Balay imark = i; 1346acdf5bf4SSatish Balay } 1347d9d09a02SSatish Balay for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1348d9d09a02SSatish Balay for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1349acdf5bf4SSatish Balay } 1350d64ed03dSBarry Smith } else { 1351f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1352f4259b30SLisandro Dalcin if (v) *v = NULL; 1353d212a18eSSatish Balay } 1354acdf5bf4SSatish Balay } 1355acdf5bf4SSatish Balay *nz = nztot; 13569566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 13579566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 13583a40ed3dSBarry Smith PetscFunctionReturn(0); 1359acdf5bf4SSatish Balay } 1360acdf5bf4SSatish Balay 13619371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1362acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1363d64ed03dSBarry Smith 1364d64ed03dSBarry Smith PetscFunctionBegin; 13655f80ce2aSJacob Faibussowitsch PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1366acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 13673a40ed3dSBarry Smith PetscFunctionReturn(0); 1368acdf5bf4SSatish Balay } 1369acdf5bf4SSatish Balay 13709371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) { 137158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1372d64ed03dSBarry Smith 1373d64ed03dSBarry Smith PetscFunctionBegin; 13749566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->A)); 13759566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(l->B)); 13763a40ed3dSBarry Smith PetscFunctionReturn(0); 137758667388SSatish Balay } 13780ac07820SSatish Balay 13799371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 13804e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 13814e220ebcSLois Curfman McInnes Mat A = a->A, B = a->B; 13823966268fSBarry Smith PetscLogDouble isend[5], irecv[5]; 13830ac07820SSatish Balay 1384d64ed03dSBarry Smith PetscFunctionBegin; 1385d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 138626fbe8dcSKarl Rupp 13879566063dSJacob Faibussowitsch PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 138826fbe8dcSKarl Rupp 13899371c9d4SSatish Balay isend[0] = info->nz_used; 13909371c9d4SSatish Balay isend[1] = info->nz_allocated; 13919371c9d4SSatish Balay isend[2] = info->nz_unneeded; 13929371c9d4SSatish Balay isend[3] = info->memory; 13939371c9d4SSatish Balay isend[4] = info->mallocs; 139426fbe8dcSKarl Rupp 13959566063dSJacob Faibussowitsch PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 139626fbe8dcSKarl Rupp 13979371c9d4SSatish Balay isend[0] += info->nz_used; 13989371c9d4SSatish Balay isend[1] += info->nz_allocated; 13999371c9d4SSatish Balay isend[2] += info->nz_unneeded; 14009371c9d4SSatish Balay isend[3] += info->memory; 14019371c9d4SSatish Balay isend[4] += info->mallocs; 140226fbe8dcSKarl Rupp 14030ac07820SSatish Balay if (flag == MAT_LOCAL) { 14044e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14054e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14064e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14074e220ebcSLois Curfman McInnes info->memory = isend[3]; 14084e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14090ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14101c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 141126fbe8dcSKarl Rupp 14124e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14134e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14144e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14154e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14164e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14170ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14181c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 141926fbe8dcSKarl Rupp 14204e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14214e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14224e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14234e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14244e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 142598921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 14264e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14274e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14284e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14293a40ed3dSBarry Smith PetscFunctionReturn(0); 14300ac07820SSatish Balay } 14310ac07820SSatish Balay 14329371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) { 143358667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 143458667388SSatish Balay 1435d64ed03dSBarry Smith PetscFunctionBegin; 143612c028f9SKris Buschelman switch (op) { 1437512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 143812c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 143928b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1440a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 144112c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 144243674050SBarry Smith MatCheckPreallocated(A, 1); 14439566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14449566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 144512c028f9SKris Buschelman break; 144612c028f9SKris Buschelman case MAT_ROW_ORIENTED: 144743674050SBarry Smith MatCheckPreallocated(A, 1); 14484e0d8c25SBarry Smith a->roworiented = flg; 144926fbe8dcSKarl Rupp 14509566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->A, op, flg)); 14519566063dSJacob Faibussowitsch PetscCall(MatSetOption(a->B, op, flg)); 145212c028f9SKris Buschelman break; 14538c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 14549371c9d4SSatish Balay case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 14559371c9d4SSatish Balay case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 145612c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 14574e0d8c25SBarry Smith a->ht_flag = flg; 1458abf3b562SBarry Smith a->ht_fact = 1.39; 145912c028f9SKris Buschelman break; 146077e54ba9SKris Buschelman case MAT_SYMMETRIC: 146177e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 14622188ac68SBarry Smith case MAT_HERMITIAN: 1463c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 14642188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 1465b94d7dedSBarry Smith case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1466b94d7dedSBarry Smith case MAT_SPD_ETERNAL: 1467b94d7dedSBarry Smith /* if the diagonal matrix is square it inherits some of the properties above */ 146877e54ba9SKris Buschelman break; 14699371c9d4SSatish Balay default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1470d64ed03dSBarry Smith } 14713a40ed3dSBarry Smith PetscFunctionReturn(0); 147258667388SSatish Balay } 147358667388SSatish Balay 14749371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) { 14750ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 14760ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 14770ac07820SSatish Balay Mat B; 1478d0f46423SBarry Smith PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1479d0f46423SBarry Smith PetscInt bs = A->rmap->bs, mbs = baij->mbs; 14803eda8832SBarry Smith MatScalar *a; 14810ac07820SSatish Balay 1482d64ed03dSBarry Smith PetscFunctionBegin; 14837fb60732SBarry Smith if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1484cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 14859566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 14869566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 14879566063dSJacob Faibussowitsch PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 14882e72b8d9SBarry Smith /* Do not know preallocation information, but must set block size */ 14899566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1490fc4dec0aSBarry Smith } else { 1491fc4dec0aSBarry Smith B = *matout; 1492fc4dec0aSBarry Smith } 14930ac07820SSatish Balay 14940ac07820SSatish Balay /* copy over the A part */ 14950ac07820SSatish Balay Aloc = (Mat_SeqBAIJ *)baij->A->data; 14969371c9d4SSatish Balay ai = Aloc->i; 14979371c9d4SSatish Balay aj = Aloc->j; 14989371c9d4SSatish Balay a = Aloc->a; 14999566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs, &rvals)); 15000ac07820SSatish Balay 15010ac07820SSatish Balay for (i = 0; i < mbs; i++) { 1502899cda47SBarry Smith rvals[0] = bs * (baij->rstartbs + i); 150326fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 15040ac07820SSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 1505899cda47SBarry Smith col = (baij->cstartbs + aj[j]) * bs; 15060ac07820SSatish Balay for (k = 0; k < bs; k++) { 15079566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 150826fbe8dcSKarl Rupp 15099371c9d4SSatish Balay col++; 15109371c9d4SSatish Balay a += bs; 15110ac07820SSatish Balay } 15120ac07820SSatish Balay } 15130ac07820SSatish Balay } 15140ac07820SSatish Balay /* copy over the B part */ 15150ac07820SSatish Balay Aloc = (Mat_SeqBAIJ *)baij->B->data; 15169371c9d4SSatish Balay ai = Aloc->i; 15179371c9d4SSatish Balay aj = Aloc->j; 15189371c9d4SSatish Balay a = Aloc->a; 15190ac07820SSatish Balay for (i = 0; i < mbs; i++) { 1520899cda47SBarry Smith rvals[0] = bs * (baij->rstartbs + i); 152126fbe8dcSKarl Rupp for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 15220ac07820SSatish Balay for (j = ai[i]; j < ai[i + 1]; j++) { 15230ac07820SSatish Balay col = baij->garray[aj[j]] * bs; 15240ac07820SSatish Balay for (k = 0; k < bs; k++) { 15259566063dSJacob Faibussowitsch PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 152626fbe8dcSKarl Rupp col++; 152726fbe8dcSKarl Rupp a += bs; 15280ac07820SSatish Balay } 15290ac07820SSatish Balay } 15300ac07820SSatish Balay } 15319566063dSJacob Faibussowitsch PetscCall(PetscFree(rvals)); 15329566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 15339566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 15340ac07820SSatish Balay 1535cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 153648a46eb9SPierre Jolivet else PetscCall(MatHeaderMerge(A, &B)); 15373a40ed3dSBarry Smith PetscFunctionReturn(0); 15380ac07820SSatish Balay } 15390e95ebc0SSatish Balay 15409371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) { 154136c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 154236c4a09eSSatish Balay Mat a = baij->A, b = baij->B; 1543b24ad042SBarry Smith PetscInt s1, s2, s3; 15440e95ebc0SSatish Balay 1545d64ed03dSBarry Smith PetscFunctionBegin; 15469566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &s2, &s3)); 154736c4a09eSSatish Balay if (rr) { 15489566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(rr, &s1)); 15495f80ce2aSJacob Faibussowitsch PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 155036c4a09eSSatish Balay /* Overlap communication with computation. */ 15519566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 155236c4a09eSSatish Balay } 15530e95ebc0SSatish Balay if (ll) { 15549566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(ll, &s1)); 15555f80ce2aSJacob Faibussowitsch PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1556dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, ll, NULL); 15570e95ebc0SSatish Balay } 155836c4a09eSSatish Balay /* scale the diagonal block */ 1559dbbe0bcdSBarry Smith PetscUseTypeMethod(a, diagonalscale, ll, rr); 156036c4a09eSSatish Balay 156136c4a09eSSatish Balay if (rr) { 156236c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 15639566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1564dbbe0bcdSBarry Smith PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 156536c4a09eSSatish Balay } 15663a40ed3dSBarry Smith PetscFunctionReturn(0); 15670e95ebc0SSatish Balay } 15680e95ebc0SSatish Balay 15699371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 15700ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 157165a92638SMatthew G. Knepley PetscInt *lrows; 15726e520ac8SStefano Zampini PetscInt r, len; 157394342113SStefano Zampini PetscBool cong; 15740ac07820SSatish Balay 1575d64ed03dSBarry Smith PetscFunctionBegin; 15766e520ac8SStefano Zampini /* get locally owned rows */ 15779566063dSJacob Faibussowitsch PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 157897b48c8fSBarry Smith /* fix right hand side if needed */ 157997b48c8fSBarry Smith if (x && b) { 158065a92638SMatthew G. Knepley const PetscScalar *xx; 158165a92638SMatthew G. Knepley PetscScalar *bb; 158265a92638SMatthew G. Knepley 15839566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &xx)); 15849566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 158565a92638SMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 15869566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &xx)); 15879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 158897b48c8fSBarry Smith } 158997b48c8fSBarry Smith 15900ac07820SSatish Balay /* actually zap the local rows */ 159172dacd9aSBarry Smith /* 159272dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1593a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 159472dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 159572dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 159672dacd9aSBarry Smith 159772dacd9aSBarry Smith */ 15989c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 15999566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 16009566063dSJacob Faibussowitsch PetscCall(MatHasCongruentLayouts(A, &cong)); 160194342113SStefano Zampini if ((diag != 0.0) && cong) { 16029566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1603f4df32b1SMatthew Knepley } else if (diag != 0.0) { 16049566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 16055f80ce2aSJacob Faibussowitsch PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1606512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 160765a92638SMatthew G. Knepley for (r = 0; r < len; ++r) { 160865a92638SMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 16099566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1610a07cd24cSSatish Balay } 16119566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 16129566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 16139c957beeSSatish Balay } else { 16149566063dSJacob Faibussowitsch PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1615a07cd24cSSatish Balay } 16169566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 16174f9cfa9eSBarry Smith 16184f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 16194f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1620e56f5c9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 16211c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1622e56f5c9eSBarry Smith } 16233a40ed3dSBarry Smith PetscFunctionReturn(0); 16240ac07820SSatish Balay } 162572dacd9aSBarry Smith 16269371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 16276f0a72daSMatthew G. Knepley Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1628131c27b5Sprj- PetscMPIInt n = A->rmap->n, p = 0; 1629131c27b5Sprj- PetscInt i, j, k, r, len = 0, row, col, count; 16306f0a72daSMatthew G. Knepley PetscInt *lrows, *owners = A->rmap->range; 16316f0a72daSMatthew G. Knepley PetscSFNode *rrows; 16326f0a72daSMatthew G. Knepley PetscSF sf; 16336f0a72daSMatthew G. Knepley const PetscScalar *xx; 16346f0a72daSMatthew G. Knepley PetscScalar *bb, *mask; 16356f0a72daSMatthew G. Knepley Vec xmask, lmask; 16366f0a72daSMatthew G. Knepley Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 16376f0a72daSMatthew G. Knepley PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 16386f0a72daSMatthew G. Knepley PetscScalar *aa; 16396f0a72daSMatthew G. Knepley 16406f0a72daSMatthew G. Knepley PetscFunctionBegin; 16416f0a72daSMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 16429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &lrows)); 16436f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 16449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(N, &rrows)); 16456f0a72daSMatthew G. Knepley for (r = 0; r < N; ++r) { 16466f0a72daSMatthew G. Knepley const PetscInt idx = rows[r]; 16475f80ce2aSJacob Faibussowitsch PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 16485ba17502SJed Brown if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 16499566063dSJacob Faibussowitsch PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 16505ba17502SJed Brown } 16516f0a72daSMatthew G. Knepley rrows[r].rank = p; 16526f0a72daSMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 16536f0a72daSMatthew G. Knepley } 16549566063dSJacob Faibussowitsch PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 16559566063dSJacob Faibussowitsch PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 16566f0a72daSMatthew G. Knepley /* Collect flags for rows to be zeroed */ 16579566063dSJacob Faibussowitsch PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 16589566063dSJacob Faibussowitsch PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 16599566063dSJacob Faibussowitsch PetscCall(PetscSFDestroy(&sf)); 16606f0a72daSMatthew G. Knepley /* Compress and put in row numbers */ 16619371c9d4SSatish Balay for (r = 0; r < n; ++r) 16629371c9d4SSatish Balay if (lrows[r] >= 0) lrows[len++] = r; 16636f0a72daSMatthew G. Knepley /* zero diagonal part of matrix */ 16649566063dSJacob Faibussowitsch PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 16656f0a72daSMatthew G. Knepley /* handle off diagonal part of matrix */ 16669566063dSJacob Faibussowitsch PetscCall(MatCreateVecs(A, &xmask, NULL)); 16679566063dSJacob Faibussowitsch PetscCall(VecDuplicate(l->lvec, &lmask)); 16689566063dSJacob Faibussowitsch PetscCall(VecGetArray(xmask, &bb)); 16696f0a72daSMatthew G. Knepley for (i = 0; i < len; i++) bb[lrows[i]] = 1; 16709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xmask, &bb)); 16719566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 16729566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 16739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&xmask)); 16746f0a72daSMatthew G. Knepley if (x) { 16759566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 16769566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 16779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(l->lvec, &xx)); 16789566063dSJacob Faibussowitsch PetscCall(VecGetArray(b, &bb)); 16796f0a72daSMatthew G. Knepley } 16809566063dSJacob Faibussowitsch PetscCall(VecGetArray(lmask, &mask)); 16816f0a72daSMatthew G. Knepley /* remove zeroed rows of off diagonal matrix */ 16826f0a72daSMatthew G. Knepley for (i = 0; i < len; ++i) { 16836f0a72daSMatthew G. Knepley row = lrows[i]; 16846f0a72daSMatthew G. Knepley count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 16856f0a72daSMatthew G. Knepley aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 16866f0a72daSMatthew G. Knepley for (k = 0; k < count; ++k) { 16876f0a72daSMatthew G. Knepley aa[0] = 0.0; 16886f0a72daSMatthew G. Knepley aa += bs; 16896f0a72daSMatthew G. Knepley } 16906f0a72daSMatthew G. Knepley } 16916f0a72daSMatthew G. Knepley /* loop over all elements of off process part of matrix zeroing removed columns*/ 16926f0a72daSMatthew G. Knepley for (i = 0; i < l->B->rmap->N; ++i) { 16936f0a72daSMatthew G. Knepley row = i / bs; 16946f0a72daSMatthew G. Knepley for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 16956f0a72daSMatthew G. Knepley for (k = 0; k < bs; ++k) { 16966f0a72daSMatthew G. Knepley col = bs * baij->j[j] + k; 16976f0a72daSMatthew G. Knepley if (PetscAbsScalar(mask[col])) { 16986f0a72daSMatthew G. Knepley aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 169989ae1891SBarry Smith if (x) bb[i] -= aa[0] * xx[col]; 17006f0a72daSMatthew G. Knepley aa[0] = 0.0; 17016f0a72daSMatthew G. Knepley } 17026f0a72daSMatthew G. Knepley } 17036f0a72daSMatthew G. Knepley } 17046f0a72daSMatthew G. Knepley } 17056f0a72daSMatthew G. Knepley if (x) { 17069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(b, &bb)); 17079566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 17086f0a72daSMatthew G. Knepley } 17099566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(lmask, &mask)); 17109566063dSJacob Faibussowitsch PetscCall(VecDestroy(&lmask)); 17119566063dSJacob Faibussowitsch PetscCall(PetscFree(lrows)); 17124f9cfa9eSBarry Smith 17134f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 17144f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 17154f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 17161c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 17174f9cfa9eSBarry Smith } 17186f0a72daSMatthew G. Knepley PetscFunctionReturn(0); 17196f0a72daSMatthew G. Knepley } 17206f0a72daSMatthew G. Knepley 17219371c9d4SSatish Balay PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) { 1722bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1723d64ed03dSBarry Smith 1724d64ed03dSBarry Smith PetscFunctionBegin; 17259566063dSJacob Faibussowitsch PetscCall(MatSetUnfactored(a->A)); 17263a40ed3dSBarry Smith PetscFunctionReturn(0); 1727bb5a7306SBarry Smith } 1728bb5a7306SBarry Smith 17296849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 17300ac07820SSatish Balay 17319371c9d4SSatish Balay PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) { 17327fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 17337fc3c18eSBarry Smith Mat a, b, c, d; 1734ace3abfcSBarry Smith PetscBool flg; 17357fc3c18eSBarry Smith 17367fc3c18eSBarry Smith PetscFunctionBegin; 17379371c9d4SSatish Balay a = matA->A; 17389371c9d4SSatish Balay b = matA->B; 17399371c9d4SSatish Balay c = matB->A; 17409371c9d4SSatish Balay d = matB->B; 17417fc3c18eSBarry Smith 17429566063dSJacob Faibussowitsch PetscCall(MatEqual(a, c, &flg)); 174348a46eb9SPierre Jolivet if (flg) PetscCall(MatEqual(b, d, &flg)); 17441c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 17457fc3c18eSBarry Smith PetscFunctionReturn(0); 17467fc3c18eSBarry Smith } 17477fc3c18eSBarry Smith 17489371c9d4SSatish Balay PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) { 17493c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 17503c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 17513c896bc6SHong Zhang 17523c896bc6SHong Zhang PetscFunctionBegin; 17533c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 17543c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 17559566063dSJacob Faibussowitsch PetscCall(MatCopy_Basic(A, B, str)); 17563c896bc6SHong Zhang } else { 17579566063dSJacob Faibussowitsch PetscCall(MatCopy(a->A, b->A, str)); 17589566063dSJacob Faibussowitsch PetscCall(MatCopy(a->B, b->B, str)); 17593c896bc6SHong Zhang } 17609566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)B)); 17613c896bc6SHong Zhang PetscFunctionReturn(0); 17623c896bc6SHong Zhang } 1763273d9f13SBarry Smith 17649371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIBAIJ(Mat A) { 1765273d9f13SBarry Smith PetscFunctionBegin; 17669566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1767273d9f13SBarry Smith PetscFunctionReturn(0); 1768273d9f13SBarry Smith } 1769273d9f13SBarry Smith 17709371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 1771001ddc4fSHong Zhang PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 17724de5dceeSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 17734de5dceeSHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 17744de5dceeSHong Zhang 17754de5dceeSHong Zhang PetscFunctionBegin; 17769566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 17774de5dceeSHong Zhang PetscFunctionReturn(0); 17784de5dceeSHong Zhang } 17794de5dceeSHong Zhang 17809371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 17814fe895cdSHong Zhang Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 17824fe895cdSHong Zhang PetscBLASInt bnz, one = 1; 17834fe895cdSHong Zhang Mat_SeqBAIJ *x, *y; 1784b31f67cfSBarry Smith PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 17854fe895cdSHong Zhang 17864fe895cdSHong Zhang PetscFunctionBegin; 17874fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 17884fe895cdSHong Zhang PetscScalar alpha = a; 17894fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->A->data; 17904fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->A->data; 17919566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1792792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 17934fe895cdSHong Zhang x = (Mat_SeqBAIJ *)xx->B->data; 17944fe895cdSHong Zhang y = (Mat_SeqBAIJ *)yy->B->data; 17959566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1796792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 17979566063dSJacob Faibussowitsch PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1798ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 17999566063dSJacob Faibussowitsch PetscCall(MatAXPY_Basic(Y, a, X, str)); 18004fe895cdSHong Zhang } else { 18014de5dceeSHong Zhang Mat B; 18024de5dceeSHong Zhang PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 18039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 18049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 18059566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 18069566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 18079566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 18089566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 18099566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIBAIJ)); 18109566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 18119566063dSJacob Faibussowitsch PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 18129566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 18134de5dceeSHong Zhang /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 18149566063dSJacob Faibussowitsch PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 18159566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(Y, &B)); 18169566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_d)); 18179566063dSJacob Faibussowitsch PetscCall(PetscFree(nnz_o)); 18184fe895cdSHong Zhang } 18194fe895cdSHong Zhang PetscFunctionReturn(0); 18204fe895cdSHong Zhang } 18214fe895cdSHong Zhang 18229371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) { 18235f80ce2aSJacob Faibussowitsch PetscFunctionBegin; 18245f80ce2aSJacob Faibussowitsch if (PetscDefined(USE_COMPLEX)) { 18252726fb6dSPierre Jolivet Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 18262726fb6dSPierre Jolivet 18279566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqBAIJ(a->A)); 18289566063dSJacob Faibussowitsch PetscCall(MatConjugate_SeqBAIJ(a->B)); 18295f80ce2aSJacob Faibussowitsch } 18302726fb6dSPierre Jolivet PetscFunctionReturn(0); 18312726fb6dSPierre Jolivet } 18322726fb6dSPierre Jolivet 18339371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIBAIJ(Mat A) { 183499cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 183599cafbc1SBarry Smith 183699cafbc1SBarry Smith PetscFunctionBegin; 18379566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->A)); 18389566063dSJacob Faibussowitsch PetscCall(MatRealPart(a->B)); 183999cafbc1SBarry Smith PetscFunctionReturn(0); 184099cafbc1SBarry Smith } 184199cafbc1SBarry Smith 18429371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) { 184399cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 184499cafbc1SBarry Smith 184599cafbc1SBarry Smith PetscFunctionBegin; 18469566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->A)); 18479566063dSJacob Faibussowitsch PetscCall(MatImaginaryPart(a->B)); 184899cafbc1SBarry Smith PetscFunctionReturn(0); 184999cafbc1SBarry Smith } 185099cafbc1SBarry Smith 18519371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 18524aa3045dSJed Brown IS iscol_local; 18534aa3045dSJed Brown PetscInt csize; 18544aa3045dSJed Brown 18554aa3045dSJed Brown PetscFunctionBegin; 18569566063dSJacob Faibussowitsch PetscCall(ISGetLocalSize(iscol, &csize)); 1857b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 18589566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 18595f80ce2aSJacob Faibussowitsch PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1860b79d0421SJed Brown } else { 18619566063dSJacob Faibussowitsch PetscCall(ISAllGather(iscol, &iscol_local)); 1862b79d0421SJed Brown } 18639566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1864b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 18659566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 18669566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_local)); 1867b79d0421SJed Brown } 18684aa3045dSJed Brown PetscFunctionReturn(0); 18694aa3045dSJed Brown } 187017df9f7cSHong Zhang 187182094794SBarry Smith /* 187282094794SBarry Smith Not great since it makes two copies of the submatrix, first an SeqBAIJ 187382094794SBarry Smith in local and then by concatenating the local matrices the end result. 18747dae84e0SHong Zhang Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 18758f46ffcaSHong Zhang This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 187682094794SBarry Smith */ 18779371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 187882094794SBarry Smith PetscMPIInt rank, size; 187982094794SBarry Smith PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1880c9ffca76SHong Zhang PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 188129dcf524SDmitry Karpeev Mat M, Mreuse; 188282094794SBarry Smith MatScalar *vwork, *aa; 1883ce94432eSBarry Smith MPI_Comm comm; 188429dcf524SDmitry Karpeev IS isrow_new, iscol_new; 188582094794SBarry Smith Mat_SeqBAIJ *aij; 188682094794SBarry Smith 188782094794SBarry Smith PetscFunctionBegin; 18889566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 18899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 18909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 189129dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 189229dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 18939566063dSJacob Faibussowitsch PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 18949566063dSJacob Faibussowitsch PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 189582094794SBarry Smith 189682094794SBarry Smith if (call == MAT_REUSE_MATRIX) { 18979566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 18985f80ce2aSJacob Faibussowitsch PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 18999566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 190082094794SBarry Smith } else { 19019566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 190282094794SBarry Smith } 19039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isrow_new)); 19049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&iscol_new)); 190582094794SBarry Smith /* 190682094794SBarry Smith m - number of local rows 190782094794SBarry Smith n - number of columns (same on all processors) 190882094794SBarry Smith rstart - first row in new global matrix generated 190982094794SBarry Smith */ 19109566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 19119566063dSJacob Faibussowitsch PetscCall(MatGetSize(Mreuse, &m, &n)); 191282094794SBarry Smith m = m / bs; 191382094794SBarry Smith n = n / bs; 191482094794SBarry Smith 191582094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 191682094794SBarry Smith aij = (Mat_SeqBAIJ *)(Mreuse)->data; 191782094794SBarry Smith ii = aij->i; 191882094794SBarry Smith jj = aij->j; 191982094794SBarry Smith 192082094794SBarry Smith /* 192182094794SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 192282094794SBarry Smith portions of the matrix in order to do correct preallocation 192382094794SBarry Smith */ 192482094794SBarry Smith 192582094794SBarry Smith /* first get start and end of "diagonal" columns */ 192682094794SBarry Smith if (csize == PETSC_DECIDE) { 19279566063dSJacob Faibussowitsch PetscCall(ISGetSize(isrow, &mglobal)); 192882094794SBarry Smith if (mglobal == n * bs) { /* square matrix */ 192982094794SBarry Smith nlocal = m; 193082094794SBarry Smith } else { 193182094794SBarry Smith nlocal = n / size + ((n % size) > rank); 193282094794SBarry Smith } 193382094794SBarry Smith } else { 193482094794SBarry Smith nlocal = csize / bs; 193582094794SBarry Smith } 19369566063dSJacob Faibussowitsch PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 193782094794SBarry Smith rstart = rend - nlocal; 1938aed4548fSBarry Smith PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 193982094794SBarry Smith 194082094794SBarry Smith /* next, compute all the lengths */ 19419566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 194282094794SBarry Smith for (i = 0; i < m; i++) { 194382094794SBarry Smith jend = ii[i + 1] - ii[i]; 194482094794SBarry Smith olen = 0; 194582094794SBarry Smith dlen = 0; 194682094794SBarry Smith for (j = 0; j < jend; j++) { 194782094794SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 194882094794SBarry Smith else dlen++; 194982094794SBarry Smith jj++; 195082094794SBarry Smith } 195182094794SBarry Smith olens[i] = olen; 195282094794SBarry Smith dlens[i] = dlen; 195382094794SBarry Smith } 19549566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, &M)); 19559566063dSJacob Faibussowitsch PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 19569566063dSJacob Faibussowitsch PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 19579566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 19589566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 19599566063dSJacob Faibussowitsch PetscCall(PetscFree2(dlens, olens)); 196082094794SBarry Smith } else { 196182094794SBarry Smith PetscInt ml, nl; 196282094794SBarry Smith 196382094794SBarry Smith M = *newmat; 19649566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(M, &ml, &nl)); 19655f80ce2aSJacob Faibussowitsch PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 19669566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(M)); 196782094794SBarry Smith /* 196882094794SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 196982094794SBarry Smith rather than the slower MatSetValues(). 197082094794SBarry Smith */ 197182094794SBarry Smith M->was_assembled = PETSC_TRUE; 197282094794SBarry Smith M->assembled = PETSC_FALSE; 197382094794SBarry Smith } 19749566063dSJacob Faibussowitsch PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 19759566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 197682094794SBarry Smith aij = (Mat_SeqBAIJ *)(Mreuse)->data; 197782094794SBarry Smith ii = aij->i; 197882094794SBarry Smith jj = aij->j; 197982094794SBarry Smith aa = aij->a; 198082094794SBarry Smith for (i = 0; i < m; i++) { 198182094794SBarry Smith row = rstart / bs + i; 198282094794SBarry Smith nz = ii[i + 1] - ii[i]; 19839371c9d4SSatish Balay cwork = jj; 19849371c9d4SSatish Balay jj += nz; 19859371c9d4SSatish Balay vwork = aa; 19869371c9d4SSatish Balay aa += nz * bs * bs; 19879566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 198882094794SBarry Smith } 198982094794SBarry Smith 19909566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 19919566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 199282094794SBarry Smith *newmat = M; 199382094794SBarry Smith 199482094794SBarry Smith /* save submatrix used in processor for next request */ 199582094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 19969566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 19979566063dSJacob Faibussowitsch PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 199882094794SBarry Smith } 199982094794SBarry Smith PetscFunctionReturn(0); 200082094794SBarry Smith } 200182094794SBarry Smith 20029371c9d4SSatish Balay PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) { 200382094794SBarry Smith MPI_Comm comm, pcomm; 2004a0a83eb5SRémi Lacroix PetscInt clocal_size, nrows; 200582094794SBarry Smith const PetscInt *rows; 2006dbf0e21dSBarry Smith PetscMPIInt size; 2007a0a83eb5SRémi Lacroix IS crowp, lcolp; 200882094794SBarry Smith 200982094794SBarry Smith PetscFunctionBegin; 20109566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 201182094794SBarry Smith /* make a collective version of 'rowp' */ 20129566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 201382094794SBarry Smith if (pcomm == comm) { 201482094794SBarry Smith crowp = rowp; 201582094794SBarry Smith } else { 20169566063dSJacob Faibussowitsch PetscCall(ISGetSize(rowp, &nrows)); 20179566063dSJacob Faibussowitsch PetscCall(ISGetIndices(rowp, &rows)); 20189566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 20199566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(rowp, &rows)); 202082094794SBarry Smith } 20219566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(crowp)); 2022a0a83eb5SRémi Lacroix /* make a local version of 'colp' */ 20239566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 20249566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2025dbf0e21dSBarry Smith if (size == 1) { 202682094794SBarry Smith lcolp = colp; 202782094794SBarry Smith } else { 20289566063dSJacob Faibussowitsch PetscCall(ISAllGather(colp, &lcolp)); 202982094794SBarry Smith } 20309566063dSJacob Faibussowitsch PetscCall(ISSetPermutation(lcolp)); 203175f6568bSJed Brown /* now we just get the submatrix */ 20329566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 20339566063dSJacob Faibussowitsch PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2034a0a83eb5SRémi Lacroix /* clean up */ 203548a46eb9SPierre Jolivet if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 203648a46eb9SPierre Jolivet if (size > 1) PetscCall(ISDestroy(&lcolp)); 203782094794SBarry Smith PetscFunctionReturn(0); 203882094794SBarry Smith } 203982094794SBarry Smith 20409371c9d4SSatish Balay PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 20418c7482ecSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 20428c7482ecSBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 20438c7482ecSBarry Smith 20448c7482ecSBarry Smith PetscFunctionBegin; 204526fbe8dcSKarl Rupp if (nghosts) *nghosts = B->nbs; 204626fbe8dcSKarl Rupp if (ghosts) *ghosts = baij->garray; 20478c7482ecSBarry Smith PetscFunctionReturn(0); 20488c7482ecSBarry Smith } 20498c7482ecSBarry Smith 20509371c9d4SSatish Balay PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) { 2051f6d58c54SBarry Smith Mat B; 2052f6d58c54SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2053f6d58c54SBarry Smith Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2054f6d58c54SBarry Smith Mat_SeqAIJ *b; 2055f4259b30SLisandro Dalcin PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2056f6d58c54SBarry Smith PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2057f6d58c54SBarry Smith PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2058f6d58c54SBarry Smith 2059f6d58c54SBarry Smith PetscFunctionBegin; 20609566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 20619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2062f6d58c54SBarry Smith 2063f6d58c54SBarry Smith /* ---------------------------------------------------------------- 2064f6d58c54SBarry Smith Tell every processor the number of nonzeros per row 2065f6d58c54SBarry Smith */ 20669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2067ad540459SPierre Jolivet for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 20689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2069f6d58c54SBarry Smith displs = recvcounts + size; 2070f6d58c54SBarry Smith for (i = 0; i < size; i++) { 2071f6d58c54SBarry Smith recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2072f6d58c54SBarry Smith displs[i] = A->rmap->range[i] / bs; 2073f6d58c54SBarry Smith } 20749566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2075f6d58c54SBarry Smith /* --------------------------------------------------------------- 2076f6d58c54SBarry Smith Create the sequential matrix of the same type as the local block diagonal 2077f6d58c54SBarry Smith */ 20789566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 20799566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 20809566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATSEQAIJ)); 20819566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2082f6d58c54SBarry Smith b = (Mat_SeqAIJ *)B->data; 2083f6d58c54SBarry Smith 2084f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2085f6d58c54SBarry Smith Copy my part of matrix column indices over 2086f6d58c54SBarry Smith */ 2087f6d58c54SBarry Smith sendcount = ad->nz + bd->nz; 2088f6d58c54SBarry Smith jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2089f6d58c54SBarry Smith a_jsendbuf = ad->j; 2090f6d58c54SBarry Smith b_jsendbuf = bd->j; 2091f6d58c54SBarry Smith n = A->rmap->rend / bs - A->rmap->rstart / bs; 2092f6d58c54SBarry Smith cnt = 0; 2093f6d58c54SBarry Smith for (i = 0; i < n; i++) { 2094f6d58c54SBarry Smith /* put in lower diagonal portion */ 2095f6d58c54SBarry Smith m = bd->i[i + 1] - bd->i[i]; 2096f6d58c54SBarry Smith while (m > 0) { 2097f6d58c54SBarry Smith /* is it above diagonal (in bd (compressed) numbering) */ 2098f6d58c54SBarry Smith if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2099f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2100f6d58c54SBarry Smith m--; 2101f6d58c54SBarry Smith } 2102f6d58c54SBarry Smith 2103f6d58c54SBarry Smith /* put in diagonal portion */ 2104ad540459SPierre Jolivet for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2105f6d58c54SBarry Smith 2106f6d58c54SBarry Smith /* put in upper diagonal portion */ 2107ad540459SPierre Jolivet while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2108f6d58c54SBarry Smith } 21095f80ce2aSJacob Faibussowitsch PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2110f6d58c54SBarry Smith 2111f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2112f6d58c54SBarry Smith Gather all column indices to all processors 2113f6d58c54SBarry Smith */ 2114f6d58c54SBarry Smith for (i = 0; i < size; i++) { 2115f6d58c54SBarry Smith recvcounts[i] = 0; 2116ad540459SPierre Jolivet for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2117f6d58c54SBarry Smith } 2118f6d58c54SBarry Smith displs[0] = 0; 2119ad540459SPierre Jolivet for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 21209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2122f6d58c54SBarry Smith Assemble the matrix into useable form (note numerical values not yet set) 2123f6d58c54SBarry Smith */ 2124f6d58c54SBarry Smith /* set the b->ilen (length of each row) values */ 21259566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2126f6d58c54SBarry Smith /* set the b->i indices */ 2127f6d58c54SBarry Smith b->i[0] = 0; 2128ad540459SPierre Jolivet for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 21299566063dSJacob Faibussowitsch PetscCall(PetscFree(lens)); 21309566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 21319566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 21329566063dSJacob Faibussowitsch PetscCall(PetscFree(recvcounts)); 2133f6d58c54SBarry Smith 2134b94d7dedSBarry Smith PetscCall(MatPropagateSymmetryOptions(A, B)); 2135f6d58c54SBarry Smith *newmat = B; 2136f6d58c54SBarry Smith PetscFunctionReturn(0); 2137f6d58c54SBarry Smith } 2138f6d58c54SBarry Smith 21399371c9d4SSatish Balay PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2140b1a666ecSBarry Smith Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2141f4259b30SLisandro Dalcin Vec bb1 = NULL; 2142b1a666ecSBarry Smith 2143b1a666ecSBarry Smith PetscFunctionBegin; 2144b1a666ecSBarry Smith if (flag == SOR_APPLY_UPPER) { 21459566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2146b1a666ecSBarry Smith PetscFunctionReturn(0); 2147b1a666ecSBarry Smith } 2148b1a666ecSBarry Smith 214948a46eb9SPierre Jolivet if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 21504e980039SJed Brown 2151b1a666ecSBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2152b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 21539566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2154b1a666ecSBarry Smith its--; 2155b1a666ecSBarry Smith } 2156b1a666ecSBarry Smith 2157b1a666ecSBarry Smith while (its--) { 21589566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 21599566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2160b1a666ecSBarry Smith 2161b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 21629566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 21639566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2164b1a666ecSBarry Smith 2165b1a666ecSBarry Smith /* local sweep */ 21669566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2167b1a666ecSBarry Smith } 2168b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2169b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 21709566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2171b1a666ecSBarry Smith its--; 2172b1a666ecSBarry Smith } 2173b1a666ecSBarry Smith while (its--) { 21749566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 21759566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2176b1a666ecSBarry Smith 2177b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 21789566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 21799566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2180b1a666ecSBarry Smith 2181b1a666ecSBarry Smith /* local sweep */ 21829566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2183b1a666ecSBarry Smith } 2184b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2185b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 21869566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2187b1a666ecSBarry Smith its--; 2188b1a666ecSBarry Smith } 2189b1a666ecSBarry Smith while (its--) { 21909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 21919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2192b1a666ecSBarry Smith 2193b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 21949566063dSJacob Faibussowitsch PetscCall(VecScale(mat->lvec, -1.0)); 21959566063dSJacob Faibussowitsch PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2196b1a666ecSBarry Smith 2197b1a666ecSBarry Smith /* local sweep */ 21989566063dSJacob Faibussowitsch PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2199b1a666ecSBarry Smith } 2200ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2201b1a666ecSBarry Smith 22029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&bb1)); 2203b1a666ecSBarry Smith PetscFunctionReturn(0); 2204b1a666ecSBarry Smith } 2205b1a666ecSBarry Smith 22069371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 220747f7623dSRémi Lacroix Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2208a873a8cdSSam Reynolds PetscInt m, N, i, *garray = aij->garray; 220947f7623dSRémi Lacroix PetscInt ib, jb, bs = A->rmap->bs; 221047f7623dSRémi Lacroix Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 221147f7623dSRémi Lacroix MatScalar *a_val = a_aij->a; 221247f7623dSRémi Lacroix Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 221347f7623dSRémi Lacroix MatScalar *b_val = b_aij->a; 221447f7623dSRémi Lacroix PetscReal *work; 221547f7623dSRémi Lacroix 221647f7623dSRémi Lacroix PetscFunctionBegin; 22179566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &m, &N)); 22189566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N, &work)); 2219857cbf51SRichard Tran Mills if (type == NORM_2) { 222047f7623dSRémi Lacroix for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 222147f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 222247f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 222347f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 222447f7623dSRémi Lacroix a_val++; 222547f7623dSRémi Lacroix } 222647f7623dSRémi Lacroix } 222747f7623dSRémi Lacroix } 222847f7623dSRémi Lacroix for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 222947f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 223047f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 223147f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 223247f7623dSRémi Lacroix b_val++; 223347f7623dSRémi Lacroix } 223447f7623dSRémi Lacroix } 223547f7623dSRémi Lacroix } 2236857cbf51SRichard Tran Mills } else if (type == NORM_1) { 223747f7623dSRémi Lacroix for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 223847f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 223947f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 224047f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 224147f7623dSRémi Lacroix a_val++; 224247f7623dSRémi Lacroix } 224347f7623dSRémi Lacroix } 224447f7623dSRémi Lacroix } 224547f7623dSRémi Lacroix for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 224647f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 224747f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 224847f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 224947f7623dSRémi Lacroix b_val++; 225047f7623dSRémi Lacroix } 225147f7623dSRémi Lacroix } 225247f7623dSRémi Lacroix } 2253857cbf51SRichard Tran Mills } else if (type == NORM_INFINITY) { 225447f7623dSRémi Lacroix for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 225547f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 225647f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 225747f7623dSRémi Lacroix int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 225847f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 225947f7623dSRémi Lacroix a_val++; 226047f7623dSRémi Lacroix } 226147f7623dSRémi Lacroix } 226247f7623dSRémi Lacroix } 226347f7623dSRémi Lacroix for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 226447f7623dSRémi Lacroix for (jb = 0; jb < bs; jb++) { 226547f7623dSRémi Lacroix for (ib = 0; ib < bs; ib++) { 226647f7623dSRémi Lacroix int col = garray[b_aij->j[i]] * bs + jb; 226747f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 226847f7623dSRémi Lacroix b_val++; 226947f7623dSRémi Lacroix } 227047f7623dSRémi Lacroix } 227147f7623dSRémi Lacroix } 2272857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2273a873a8cdSSam Reynolds for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2274a873a8cdSSam Reynolds for (jb = 0; jb < bs; jb++) { 2275a873a8cdSSam Reynolds for (ib = 0; ib < bs; ib++) { 2276857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2277a873a8cdSSam Reynolds a_val++; 2278a873a8cdSSam Reynolds } 2279a873a8cdSSam Reynolds } 2280a873a8cdSSam Reynolds } 2281a873a8cdSSam Reynolds for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2282a873a8cdSSam Reynolds for (jb = 0; jb < bs; jb++) { 2283a873a8cdSSam Reynolds for (ib = 0; ib < bs; ib++) { 2284857cbf51SRichard Tran Mills work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2285a873a8cdSSam Reynolds b_val++; 2286a873a8cdSSam Reynolds } 2287a873a8cdSSam Reynolds } 2288a873a8cdSSam Reynolds } 2289857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2290857cbf51SRichard Tran Mills for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2291857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 2292857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 2293857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2294857cbf51SRichard Tran Mills a_val++; 2295857cbf51SRichard Tran Mills } 2296857cbf51SRichard Tran Mills } 2297857cbf51SRichard Tran Mills } 2298857cbf51SRichard Tran Mills for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2299857cbf51SRichard Tran Mills for (jb = 0; jb < bs; jb++) { 2300857cbf51SRichard Tran Mills for (ib = 0; ib < bs; ib++) { 2301857cbf51SRichard Tran Mills work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2302857cbf51SRichard Tran Mills b_val++; 2303857cbf51SRichard Tran Mills } 2304857cbf51SRichard Tran Mills } 2305857cbf51SRichard Tran Mills } 2306857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2307857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 23081c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 230947f7623dSRémi Lacroix } else { 23101c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 231147f7623dSRémi Lacroix } 23129566063dSJacob Faibussowitsch PetscCall(PetscFree(work)); 2313857cbf51SRichard Tran Mills if (type == NORM_2) { 2314a873a8cdSSam Reynolds for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2315857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2316a873a8cdSSam Reynolds for (i = 0; i < N; i++) reductions[i] /= m; 231747f7623dSRémi Lacroix } 231847f7623dSRémi Lacroix PetscFunctionReturn(0); 231947f7623dSRémi Lacroix } 232047f7623dSRémi Lacroix 23219371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) { 2322bbead8a2SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2323bbead8a2SBarry Smith 2324bbead8a2SBarry Smith PetscFunctionBegin; 23259566063dSJacob Faibussowitsch PetscCall(MatInvertBlockDiagonal(a->A, values)); 23267b6c816cSBarry Smith A->factorerrortype = a->A->factorerrortype; 23277b6c816cSBarry Smith A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 23287b6c816cSBarry Smith A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2329bbead8a2SBarry Smith PetscFunctionReturn(0); 2330bbead8a2SBarry Smith } 2331bbead8a2SBarry Smith 23329371c9d4SSatish Balay PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) { 23337d68702bSBarry Smith Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 23346f33a894SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 23357d68702bSBarry Smith 23367d68702bSBarry Smith PetscFunctionBegin; 23376f33a894SBarry Smith if (!Y->preallocated) { 23389566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 23396f33a894SBarry Smith } else if (!aij->nz) { 2340b83222d8SBarry Smith PetscInt nonew = aij->nonew; 23419566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2342b83222d8SBarry Smith aij->nonew = nonew; 23437d68702bSBarry Smith } 23449566063dSJacob Faibussowitsch PetscCall(MatShift_Basic(Y, a)); 23457d68702bSBarry Smith PetscFunctionReturn(0); 23467d68702bSBarry Smith } 23478c7482ecSBarry Smith 23489371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 23493b49f96aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 23503b49f96aSBarry Smith 23513b49f96aSBarry Smith PetscFunctionBegin; 23525f80ce2aSJacob Faibussowitsch PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 23539566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(a->A, missing, d)); 23543b49f96aSBarry Smith if (d) { 23553b49f96aSBarry Smith PetscInt rstart; 23569566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 23573b49f96aSBarry Smith *d += rstart / A->rmap->bs; 23583b49f96aSBarry Smith } 23593b49f96aSBarry Smith PetscFunctionReturn(0); 23603b49f96aSBarry Smith } 23613b49f96aSBarry Smith 23629371c9d4SSatish Balay PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) { 2363a5b7ff6bSBarry Smith PetscFunctionBegin; 2364a5b7ff6bSBarry Smith *a = ((Mat_MPIBAIJ *)A->data)->A; 2365a5b7ff6bSBarry Smith PetscFunctionReturn(0); 2366a5b7ff6bSBarry Smith } 2367a5b7ff6bSBarry Smith 236879bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 23693964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2370cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 2371cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 2372cc2dc46cSBarry Smith MatMult_MPIBAIJ, 237397304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 23747c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 23757c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 2376f4259b30SLisandro Dalcin NULL, 2377f4259b30SLisandro Dalcin NULL, 2378f4259b30SLisandro Dalcin NULL, 2379f4259b30SLisandro Dalcin /*10*/ NULL, 2380f4259b30SLisandro Dalcin NULL, 2381f4259b30SLisandro Dalcin NULL, 2382b1a666ecSBarry Smith MatSOR_MPIBAIJ, 2383cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 238497304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 23857fc3c18eSBarry Smith MatEqual_MPIBAIJ, 2386cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 2387cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 2388cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 238997304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 2390cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2391cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2392cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 2393d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIBAIJ, 2394f4259b30SLisandro Dalcin NULL, 2395f4259b30SLisandro Dalcin NULL, 2396f4259b30SLisandro Dalcin NULL, 2397f4259b30SLisandro Dalcin NULL, 23984994cf47SJed Brown /*29*/ MatSetUp_MPIBAIJ, 2399f4259b30SLisandro Dalcin NULL, 2400f4259b30SLisandro Dalcin NULL, 2401a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPIBAIJ, 2402f4259b30SLisandro Dalcin NULL, 2403d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIBAIJ, 2404f4259b30SLisandro Dalcin NULL, 2405f4259b30SLisandro Dalcin NULL, 2406f4259b30SLisandro Dalcin NULL, 2407f4259b30SLisandro Dalcin NULL, 2408d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIBAIJ, 24097dae84e0SHong Zhang MatCreateSubMatrices_MPIBAIJ, 2410cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2411cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 24123c896bc6SHong Zhang MatCopy_MPIBAIJ, 2413f4259b30SLisandro Dalcin /*44*/ NULL, 2414cc2dc46cSBarry Smith MatScale_MPIBAIJ, 24157d68702bSBarry Smith MatShift_MPIBAIJ, 2416f4259b30SLisandro Dalcin NULL, 24176f0a72daSMatthew G. Knepley MatZeroRowsColumns_MPIBAIJ, 2418f4259b30SLisandro Dalcin /*49*/ NULL, 2419f4259b30SLisandro Dalcin NULL, 2420f4259b30SLisandro Dalcin NULL, 2421f4259b30SLisandro Dalcin NULL, 2422f4259b30SLisandro Dalcin NULL, 242393dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2424f4259b30SLisandro Dalcin NULL, 2425cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 242682094794SBarry Smith MatPermute_MPIBAIJ, 2427cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 24287dae84e0SHong Zhang /*59*/ MatCreateSubMatrix_MPIBAIJ, 2429f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2430f14a1c24SBarry Smith MatView_MPIBAIJ, 2431f4259b30SLisandro Dalcin NULL, 2432f4259b30SLisandro Dalcin NULL, 2433f4259b30SLisandro Dalcin /*64*/ NULL, 2434f4259b30SLisandro Dalcin NULL, 2435f4259b30SLisandro Dalcin NULL, 2436f4259b30SLisandro Dalcin NULL, 2437f4259b30SLisandro Dalcin NULL, 2438d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2439f4259b30SLisandro Dalcin NULL, 2440f4259b30SLisandro Dalcin NULL, 2441f4259b30SLisandro Dalcin NULL, 2442f4259b30SLisandro Dalcin NULL, 2443f4259b30SLisandro Dalcin /*74*/ NULL, 2444f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2445f4259b30SLisandro Dalcin NULL, 2446f4259b30SLisandro Dalcin NULL, 2447f4259b30SLisandro Dalcin NULL, 2448f4259b30SLisandro Dalcin /*79*/ NULL, 2449f4259b30SLisandro Dalcin NULL, 2450f4259b30SLisandro Dalcin NULL, 2451f4259b30SLisandro Dalcin NULL, 24525bba2384SShri Abhyankar MatLoad_MPIBAIJ, 2453f4259b30SLisandro Dalcin /*84*/ NULL, 2454f4259b30SLisandro Dalcin NULL, 2455f4259b30SLisandro Dalcin NULL, 2456f4259b30SLisandro Dalcin NULL, 2457f4259b30SLisandro Dalcin NULL, 2458f4259b30SLisandro Dalcin /*89*/ NULL, 2459f4259b30SLisandro Dalcin NULL, 2460f4259b30SLisandro Dalcin NULL, 2461f4259b30SLisandro Dalcin NULL, 2462f4259b30SLisandro Dalcin NULL, 2463f4259b30SLisandro Dalcin /*94*/ NULL, 2464f4259b30SLisandro Dalcin NULL, 2465f4259b30SLisandro Dalcin NULL, 2466f4259b30SLisandro Dalcin NULL, 2467f4259b30SLisandro Dalcin NULL, 2468f4259b30SLisandro Dalcin /*99*/ NULL, 2469f4259b30SLisandro Dalcin NULL, 2470f4259b30SLisandro Dalcin NULL, 24712726fb6dSPierre Jolivet MatConjugate_MPIBAIJ, 2472f4259b30SLisandro Dalcin NULL, 2473f4259b30SLisandro Dalcin /*104*/ NULL, 247499cafbc1SBarry Smith MatRealPart_MPIBAIJ, 24758c7482ecSBarry Smith MatImaginaryPart_MPIBAIJ, 2476f4259b30SLisandro Dalcin NULL, 2477f4259b30SLisandro Dalcin NULL, 2478f4259b30SLisandro Dalcin /*109*/ NULL, 2479f4259b30SLisandro Dalcin NULL, 2480f4259b30SLisandro Dalcin NULL, 2481f4259b30SLisandro Dalcin NULL, 24823b49f96aSBarry Smith MatMissingDiagonal_MPIBAIJ, 2483d1adec66SJed Brown /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2484f4259b30SLisandro Dalcin NULL, 24854683f7a4SShri Abhyankar MatGetGhosts_MPIBAIJ, 2486f4259b30SLisandro Dalcin NULL, 2487f4259b30SLisandro Dalcin NULL, 2488f4259b30SLisandro Dalcin /*119*/ NULL, 2489f4259b30SLisandro Dalcin NULL, 2490f4259b30SLisandro Dalcin NULL, 2491f4259b30SLisandro Dalcin NULL, 2492e8271787SHong Zhang MatGetMultiProcBlock_MPIBAIJ, 2493f4259b30SLisandro Dalcin /*124*/ NULL, 2494a873a8cdSSam Reynolds MatGetColumnReductions_MPIBAIJ, 24953964eb88SJed Brown MatInvertBlockDiagonal_MPIBAIJ, 2496f4259b30SLisandro Dalcin NULL, 2497f4259b30SLisandro Dalcin NULL, 2498f4259b30SLisandro Dalcin /*129*/ NULL, 2499f4259b30SLisandro Dalcin NULL, 2500f4259b30SLisandro Dalcin NULL, 2501f4259b30SLisandro Dalcin NULL, 2502f4259b30SLisandro Dalcin NULL, 2503f4259b30SLisandro Dalcin /*134*/ NULL, 2504f4259b30SLisandro Dalcin NULL, 2505f4259b30SLisandro Dalcin NULL, 2506f4259b30SLisandro Dalcin NULL, 2507f4259b30SLisandro Dalcin NULL, 250846533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2509f4259b30SLisandro Dalcin NULL, 2510f4259b30SLisandro Dalcin NULL, 2511bdf6f3fcSHong Zhang MatFDColoringSetUp_MPIXAIJ, 2512f4259b30SLisandro Dalcin NULL, 2513d70f29a3SPierre Jolivet /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2514d70f29a3SPierre Jolivet NULL, 2515d70f29a3SPierre Jolivet NULL, 251699a7f59eSMark Adams NULL, 251799a7f59eSMark Adams NULL, 25187fb60732SBarry Smith NULL, 25199371c9d4SSatish Balay /*150*/ NULL}; 252079bdfe76SSatish Balay 2521cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2522c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2523d94109b8SHong Zhang 25249371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 2525b8d659d7SLisandro Dalcin PetscInt m, rstart, cstart, cend; 2526f4259b30SLisandro Dalcin PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2527f4259b30SLisandro Dalcin const PetscInt *JJ = NULL; 2528f4259b30SLisandro Dalcin PetscScalar *values = NULL; 2529d47bf9aaSJed Brown PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 25303bd0feecSPierre Jolivet PetscBool nooffprocentries; 2531aac34f13SBarry Smith 2532aac34f13SBarry Smith PetscFunctionBegin; 25339566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 25349566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 25359566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 25369566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 25379566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2538d0f46423SBarry Smith m = B->rmap->n / bs; 2539d0f46423SBarry Smith rstart = B->rmap->rstart / bs; 2540d0f46423SBarry Smith cstart = B->cmap->rstart / bs; 2541d0f46423SBarry Smith cend = B->cmap->rend / bs; 2542b8d659d7SLisandro Dalcin 25435f80ce2aSJacob Faibussowitsch PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 25449566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2545aac34f13SBarry Smith for (i = 0; i < m; i++) { 2546cf12db73SBarry Smith nz = ii[i + 1] - ii[i]; 25475f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2548b8d659d7SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 254937cd3c0dSBarry Smith dlen = 0; 255037cd3c0dSBarry Smith olen = 0; 2551cf12db73SBarry Smith JJ = jj + ii[i]; 2552b8d659d7SLisandro Dalcin for (j = 0; j < nz; j++) { 255337cd3c0dSBarry Smith if (*JJ < cstart || *JJ >= cend) olen++; 255437cd3c0dSBarry Smith else dlen++; 2555aac34f13SBarry Smith JJ++; 2556aac34f13SBarry Smith } 255737cd3c0dSBarry Smith d_nnz[i] = dlen; 255837cd3c0dSBarry Smith o_nnz[i] = olen; 2559aac34f13SBarry Smith } 25609566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 25619566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 2562aac34f13SBarry Smith 2563b8d659d7SLisandro Dalcin values = (PetscScalar *)V; 256448a46eb9SPierre Jolivet if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2565b8d659d7SLisandro Dalcin for (i = 0; i < m; i++) { 2566b8d659d7SLisandro Dalcin PetscInt row = i + rstart; 2567cf12db73SBarry Smith PetscInt ncols = ii[i + 1] - ii[i]; 2568cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 2569bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2570cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 25719566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 25723adadaf3SJed Brown } else { /* block ordering does not match so we can only insert one block at a time. */ 25733adadaf3SJed Brown PetscInt j; 25743adadaf3SJed Brown for (j = 0; j < ncols; j++) { 25753adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 25769566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 25773adadaf3SJed Brown } 25783adadaf3SJed Brown } 2579aac34f13SBarry Smith } 2580aac34f13SBarry Smith 25819566063dSJacob Faibussowitsch if (!V) PetscCall(PetscFree(values)); 25823bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 25833bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 25849566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 25859566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 25863bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 25873bd0feecSPierre Jolivet 25889566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2589aac34f13SBarry Smith PetscFunctionReturn(0); 2590aac34f13SBarry Smith } 2591aac34f13SBarry Smith 2592aac34f13SBarry Smith /*@C 259311a5261eSBarry Smith MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2594aac34f13SBarry Smith 2595d083f849SBarry Smith Collective 2596aac34f13SBarry Smith 2597aac34f13SBarry Smith Input Parameters: 25981c4f3114SJed Brown + B - the matrix 2599dfb205c3SBarry Smith . bs - the block size 2600aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2601aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2602aac34f13SBarry Smith - v - optional values in the matrix 2603aac34f13SBarry Smith 2604664954b6SBarry Smith Level: advanced 2605aac34f13SBarry Smith 260695452b02SPatrick Sanan Notes: 260711a5261eSBarry Smith The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 260811a5261eSBarry Smith may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 26093adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 261011a5261eSBarry Smith `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 26113adadaf3SJed Brown block column and the second index is over columns within a block. 26123adadaf3SJed Brown 2613664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2614664954b6SBarry Smith 2615db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2616aac34f13SBarry Smith @*/ 26179371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 2618aac34f13SBarry Smith PetscFunctionBegin; 26196ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 26206ba663aaSJed Brown PetscValidType(B, 1); 26216ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2622cac4c232SBarry Smith PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2623aac34f13SBarry Smith PetscFunctionReturn(0); 2624aac34f13SBarry Smith } 2625aac34f13SBarry Smith 26269371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 2627a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2628535b19f3SBarry Smith PetscInt i; 26295d2a9ed1SStefano Zampini PetscMPIInt size; 2630a23d5eceSKris Buschelman 2631a23d5eceSKris Buschelman PetscFunctionBegin; 26329566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 26339566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->rmap)); 26349566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(B->cmap)); 26359566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2636899cda47SBarry Smith 2637a23d5eceSKris Buschelman if (d_nnz) { 2638ad540459SPierre Jolivet for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2639a23d5eceSKris Buschelman } 2640a23d5eceSKris Buschelman if (o_nnz) { 2641ad540459SPierre Jolivet for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2642a23d5eceSKris Buschelman } 2643a23d5eceSKris Buschelman 2644a23d5eceSKris Buschelman b = (Mat_MPIBAIJ *)B->data; 2645a23d5eceSKris Buschelman b->bs2 = bs * bs; 2646d0f46423SBarry Smith b->mbs = B->rmap->n / bs; 2647d0f46423SBarry Smith b->nbs = B->cmap->n / bs; 2648d0f46423SBarry Smith b->Mbs = B->rmap->N / bs; 2649d0f46423SBarry Smith b->Nbs = B->cmap->N / bs; 2650a23d5eceSKris Buschelman 2651ad540459SPierre Jolivet for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2652d0f46423SBarry Smith b->rstartbs = B->rmap->rstart / bs; 2653d0f46423SBarry Smith b->rendbs = B->rmap->rend / bs; 2654d0f46423SBarry Smith b->cstartbs = B->cmap->rstart / bs; 2655d0f46423SBarry Smith b->cendbs = B->cmap->rend / bs; 2656a23d5eceSKris Buschelman 2657cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 26589566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&b->colmap)); 2659cb7b82ddSBarry Smith #else 26609566063dSJacob Faibussowitsch PetscCall(PetscFree(b->colmap)); 2661cb7b82ddSBarry Smith #endif 26629566063dSJacob Faibussowitsch PetscCall(PetscFree(b->garray)); 26639566063dSJacob Faibussowitsch PetscCall(VecDestroy(&b->lvec)); 26649566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&b->Mvctx)); 2665cb7b82ddSBarry Smith 2666cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 26679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 26689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 26699566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 26709566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 26719566063dSJacob Faibussowitsch PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2672cb7b82ddSBarry Smith 2673526dfc15SBarry Smith if (!B->preallocated) { 26749566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 26759566063dSJacob Faibussowitsch PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 26769566063dSJacob Faibussowitsch PetscCall(MatSetType(b->A, MATSEQBAIJ)); 26779566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2678526dfc15SBarry Smith } 2679a23d5eceSKris Buschelman 26809566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 26819566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2682526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2683cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2684cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2685a23d5eceSKris Buschelman PetscFunctionReturn(0); 2686a23d5eceSKris Buschelman } 2687a23d5eceSKris Buschelman 26887087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 26897087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 26905bf65638SKris Buschelman 26919371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) { 269282094794SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 269382094794SBarry Smith Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 269482094794SBarry Smith PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 269582094794SBarry Smith const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 269682094794SBarry Smith 269782094794SBarry Smith PetscFunctionBegin; 26989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(M + 1, &ii)); 269982094794SBarry Smith ii[0] = 0; 270082094794SBarry Smith for (i = 0; i < M; i++) { 270108401ef6SPierre Jolivet PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 270208401ef6SPierre Jolivet PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 270382094794SBarry Smith ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 27045ee9ba1cSJed Brown /* remove one from count of matrix has diagonal */ 27055ee9ba1cSJed Brown for (j = id[i]; j < id[i + 1]; j++) { 27069371c9d4SSatish Balay if (jd[j] == i) { 27079371c9d4SSatish Balay ii[i + 1]--; 27089371c9d4SSatish Balay break; 27099371c9d4SSatish Balay } 27105ee9ba1cSJed Brown } 271182094794SBarry Smith } 27129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ii[M], &jj)); 271382094794SBarry Smith cnt = 0; 271482094794SBarry Smith for (i = 0; i < M; i++) { 271582094794SBarry Smith for (j = io[i]; j < io[i + 1]; j++) { 271682094794SBarry Smith if (garray[jo[j]] > rstart) break; 271782094794SBarry Smith jj[cnt++] = garray[jo[j]]; 271882094794SBarry Smith } 271982094794SBarry Smith for (k = id[i]; k < id[i + 1]; k++) { 2720ad540459SPierre Jolivet if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 272182094794SBarry Smith } 2722ad540459SPierre Jolivet for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 272382094794SBarry Smith } 27249566063dSJacob Faibussowitsch PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 272582094794SBarry Smith PetscFunctionReturn(0); 272682094794SBarry Smith } 272782094794SBarry Smith 2728c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 272962471d69SBarry Smith 2730cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2731b2573a8aSBarry Smith 27329371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 273362471d69SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 273485a69837SSatish Balay Mat_MPIAIJ *b; 27355f80ce2aSJacob Faibussowitsch Mat B; 273662471d69SBarry Smith 273762471d69SBarry Smith PetscFunctionBegin; 27385f80ce2aSJacob Faibussowitsch PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 273962471d69SBarry Smith 27400f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 27410f6d62edSLisandro Dalcin B = *newmat; 27420f6d62edSLisandro Dalcin } else { 27439566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 27449566063dSJacob Faibussowitsch PetscCall(MatSetType(B, MATMPIAIJ)); 27459566063dSJacob Faibussowitsch PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 27469566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 27479566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 27489566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 27490f6d62edSLisandro Dalcin } 275062471d69SBarry Smith b = (Mat_MPIAIJ *)B->data; 275162471d69SBarry Smith 27520f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 27539566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 27549566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 27550f6d62edSLisandro Dalcin } else { 27569566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->A)); 27579566063dSJacob Faibussowitsch PetscCall(MatDestroy(&b->B)); 27589566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIBAIJ(A)); 27599566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 27609566063dSJacob Faibussowitsch PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 27619566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 27629566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 27630f6d62edSLisandro Dalcin } 27649566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 27659566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 27660f6d62edSLisandro Dalcin 2767511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 27689566063dSJacob Faibussowitsch PetscCall(MatHeaderReplace(A, &B)); 276962471d69SBarry Smith } else { 277062471d69SBarry Smith *newmat = B; 277162471d69SBarry Smith } 277262471d69SBarry Smith PetscFunctionReturn(0); 277362471d69SBarry Smith } 277462471d69SBarry Smith 27750bad9183SKris Buschelman /*MC 2776fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 27770bad9183SKris Buschelman 27780bad9183SKris Buschelman Options Database Keys: 277911a5261eSBarry Smith + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 27808c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 27816679dcc1SBarry Smith . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 278267b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor 27830bad9183SKris Buschelman 27840bad9183SKris Buschelman Level: beginner 27850cd7f59aSBarry Smith 278611a5261eSBarry Smith Note: 278711a5261eSBarry Smith `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 278811a5261eSBarry Smith space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 27890bad9183SKris Buschelman 279011a5261eSBarry Smith .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 27910bad9183SKris Buschelman M*/ 27920bad9183SKris Buschelman 2793cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2794c0cdd4a1SDahai Guo 27959371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) { 2796273d9f13SBarry Smith Mat_MPIBAIJ *b; 279794ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2798273d9f13SBarry Smith 2799273d9f13SBarry Smith PetscFunctionBegin; 2800*4dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&b)); 280182502324SSatish Balay B->data = (void *)b; 280282502324SSatish Balay 28039566063dSJacob Faibussowitsch PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2804273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2805273d9f13SBarry Smith 2806273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 28079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 28089566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2809273d9f13SBarry Smith 2810273d9f13SBarry Smith /* build local table of row and column ownerships */ 28119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2812273d9f13SBarry Smith 2813273d9f13SBarry Smith /* build cache for off array entries formed */ 28149566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 281526fbe8dcSKarl Rupp 2816273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 28170298fd71SBarry Smith b->colmap = NULL; 28180298fd71SBarry Smith b->garray = NULL; 2819273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2820273d9f13SBarry Smith 2821273d9f13SBarry Smith /* stuff used in block assembly */ 2822f4259b30SLisandro Dalcin b->barray = NULL; 2823273d9f13SBarry Smith 2824273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2825f4259b30SLisandro Dalcin b->lvec = NULL; 2826f4259b30SLisandro Dalcin b->Mvctx = NULL; 2827273d9f13SBarry Smith 2828273d9f13SBarry Smith /* stuff for MatGetRow() */ 2829f4259b30SLisandro Dalcin b->rowindices = NULL; 2830f4259b30SLisandro Dalcin b->rowvalues = NULL; 2831273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2832273d9f13SBarry Smith 2833273d9f13SBarry Smith /* hash table stuff */ 2834f4259b30SLisandro Dalcin b->ht = NULL; 2835f4259b30SLisandro Dalcin b->hd = NULL; 2836273d9f13SBarry Smith b->ht_size = 0; 2837273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2838273d9f13SBarry Smith b->ht_fact = 0; 2839273d9f13SBarry Smith b->ht_total_ct = 0; 2840273d9f13SBarry Smith b->ht_insert_ct = 0; 2841273d9f13SBarry Smith 28427dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 28437a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 28447a868f3eSHong Zhang 28459566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 28469566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 28479566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 28487ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 28499566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 28507ea3e4caSstefano_zampini #endif 28519566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 28529566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 28539566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 28549566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 28559566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 28569566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 28579566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 28589566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 285994ae4db5SBarry Smith 2860d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 28619566063dSJacob Faibussowitsch PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 286294ae4db5SBarry Smith if (flg) { 286394ae4db5SBarry Smith PetscReal fact = 1.39; 28649566063dSJacob Faibussowitsch PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 28659566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 286694ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 28679566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 28689566063dSJacob Faibussowitsch PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 286994ae4db5SBarry Smith } 2870d0609cedSBarry Smith PetscOptionsEnd(); 2871273d9f13SBarry Smith PetscFunctionReturn(0); 2872273d9f13SBarry Smith } 2873273d9f13SBarry Smith 2874209238afSKris Buschelman /*MC 2875002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2876209238afSKris Buschelman 287711a5261eSBarry Smith This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 287811a5261eSBarry Smith and `MATMPIBAIJ` otherwise. 2879209238afSKris Buschelman 2880209238afSKris Buschelman Options Database Keys: 288111a5261eSBarry Smith . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2882209238afSKris Buschelman 2883209238afSKris Buschelman Level: beginner 2884209238afSKris Buschelman 2885c2e3fba1SPatrick Sanan .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2886209238afSKris Buschelman M*/ 2887209238afSKris Buschelman 2888273d9f13SBarry Smith /*@C 288911a5261eSBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2890273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 2891273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 2892273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2893273d9f13SBarry Smith performance can be increased by more than a factor of 50. 2894273d9f13SBarry Smith 2895273d9f13SBarry Smith Collective on Mat 2896273d9f13SBarry Smith 2897273d9f13SBarry Smith Input Parameters: 28981c4f3114SJed Brown + B - the matrix 289911a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2900bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2901273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 2902273d9f13SBarry Smith submatrix (same for all local rows) 2903273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 2904273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 29050298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 290695742e49SBarry Smith set it even if it is zero. 2907273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2908273d9f13SBarry Smith submatrix (same for all local rows). 2909273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 2910273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 29110298fd71SBarry Smith each block row) or NULL. 2912273d9f13SBarry Smith 291349a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 2914273d9f13SBarry Smith 2915273d9f13SBarry Smith Options Database Keys: 29168c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 291767b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor 2918273d9f13SBarry Smith 2919273d9f13SBarry Smith Notes: 292011a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2921273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 2922273d9f13SBarry Smith 2923273d9f13SBarry Smith Storage Information: 2924273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 2925273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 2926273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 2927273d9f13SBarry Smith local matrix (a rectangular submatrix). 2928273d9f13SBarry Smith 2929273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 2930273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 29310298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2932273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 2933273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2934273d9f13SBarry Smith 2935273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2936273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 2937273d9f13SBarry Smith 2938273d9f13SBarry Smith .vb 2939273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 2940a4b1a0f6SJed Brown -------------------------- 2941273d9f13SBarry Smith row 3 |o o o d d d o o o o o o 2942273d9f13SBarry Smith row 4 |o o o d d d o o o o o o 2943273d9f13SBarry Smith row 5 |o o o d d d o o o o o o 2944a4b1a0f6SJed Brown -------------------------- 2945273d9f13SBarry Smith .ve 2946273d9f13SBarry Smith 2947273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 2948273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 2949273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 295011a5261eSBarry Smith stored simply in the `MATSEQBAIJ` format for compressed row storage. 2951273d9f13SBarry Smith 2952273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2953273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 2954273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 2955273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2956273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 2957273d9f13SBarry Smith matrices. 2958273d9f13SBarry Smith 295911a5261eSBarry Smith You can call `MatGetInfo()` to get information on how effective the preallocation was; 2960aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2961aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 2962aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 2963aa95bbe8SBarry Smith 2964273d9f13SBarry Smith Level: intermediate 2965273d9f13SBarry Smith 296611a5261eSBarry Smith .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 2967273d9f13SBarry Smith @*/ 29689371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2969273d9f13SBarry Smith PetscFunctionBegin; 29706ba663aaSJed Brown PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 29716ba663aaSJed Brown PetscValidType(B, 1); 29726ba663aaSJed Brown PetscValidLogicalCollectiveInt(B, bs, 2); 2973cac4c232SBarry Smith PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2974273d9f13SBarry Smith PetscFunctionReturn(0); 2975273d9f13SBarry Smith } 2976273d9f13SBarry Smith 297779bdfe76SSatish Balay /*@C 297811a5261eSBarry Smith MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 297979bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 298079bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 298179bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 298279bdfe76SSatish Balay performance can be increased by more than a factor of 50. 298379bdfe76SSatish Balay 2984d083f849SBarry Smith Collective 2985db81eaa0SLois Curfman McInnes 298679bdfe76SSatish Balay Input Parameters: 2987db81eaa0SLois Curfman McInnes + comm - MPI communicator 298811a5261eSBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 298911a5261eSBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 299011a5261eSBarry Smith . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 299192e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 299292e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 299311a5261eSBarry Smith . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 299492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 299592e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 299611a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 299711a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 299847a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 299979bdfe76SSatish Balay submatrix (same for all local rows) 300047a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 300192e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 30020298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 300395742e49SBarry Smith and set it even if it is zero. 300447a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 300579bdfe76SSatish Balay submatrix (same for all local rows). 300647a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 300792e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 30080298fd71SBarry Smith each block row) or NULL. 300979bdfe76SSatish Balay 301079bdfe76SSatish Balay Output Parameter: 301179bdfe76SSatish Balay . A - the matrix 301279bdfe76SSatish Balay 3013db81eaa0SLois Curfman McInnes Options Database Keys: 30148c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 301567b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor 30163ffaccefSLois Curfman McInnes 301711a5261eSBarry Smith It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3018f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 301911a5261eSBarry Smith [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3020175b88e8SBarry Smith 3021b259b22eSLois Curfman McInnes Notes: 302249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 302349a6f317SBarry Smith 302447a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 302547a75d0bSBarry Smith 302679bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 302779bdfe76SSatish Balay (possibly both). 302879bdfe76SSatish Balay 302911a5261eSBarry Smith If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3030be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 3031be79a94dSBarry Smith 303279bdfe76SSatish Balay Storage Information: 303379bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 303479bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 303579bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 303679bdfe76SSatish Balay local matrix (a rectangular submatrix). 303779bdfe76SSatish Balay 303879bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 303979bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 30400298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 304179bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 304279bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 304379bdfe76SSatish Balay 304479bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 304579bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 304679bdfe76SSatish Balay 3047db81eaa0SLois Curfman McInnes .vb 3048db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 3049a4b1a0f6SJed Brown -------------------------- 3050db81eaa0SLois Curfman McInnes row 3 |o o o d d d o o o o o o 3051db81eaa0SLois Curfman McInnes row 4 |o o o d d d o o o o o o 3052db81eaa0SLois Curfman McInnes row 5 |o o o d d d o o o o o o 3053a4b1a0f6SJed Brown -------------------------- 3054db81eaa0SLois Curfman McInnes .ve 305579bdfe76SSatish Balay 305679bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 305779bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 305879bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 305911a5261eSBarry Smith stored simply in the `MATSEQBAIJ` format for compressed row storage. 306079bdfe76SSatish Balay 3061d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3062d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 306379bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 306492e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 306592e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 30666da5968aSLois Curfman McInnes matrices. 306779bdfe76SSatish Balay 3068027ccd11SLois Curfman McInnes Level: intermediate 3069027ccd11SLois Curfman McInnes 3070db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 307179bdfe76SSatish Balay @*/ 30729371c9d4SSatish Balay PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 3073b24ad042SBarry Smith PetscMPIInt size; 307479bdfe76SSatish Balay 3075d64ed03dSBarry Smith PetscFunctionBegin; 30769566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, A)); 30779566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, m, n, M, N)); 30789566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 3079273d9f13SBarry Smith if (size > 1) { 30809566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATMPIBAIJ)); 30819566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3082273d9f13SBarry Smith } else { 30839566063dSJacob Faibussowitsch PetscCall(MatSetType(*A, MATSEQBAIJ)); 30849566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 30853914022bSBarry Smith } 30863a40ed3dSBarry Smith PetscFunctionReturn(0); 308779bdfe76SSatish Balay } 3088026e39d0SSatish Balay 30899371c9d4SSatish Balay static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 30900ac07820SSatish Balay Mat mat; 30910ac07820SSatish Balay Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3092b24ad042SBarry Smith PetscInt len = 0; 30930ac07820SSatish Balay 3094d64ed03dSBarry Smith PetscFunctionBegin; 3095f4259b30SLisandro Dalcin *newmat = NULL; 30969566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 30979566063dSJacob Faibussowitsch PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 30989566063dSJacob Faibussowitsch PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 30997fff6886SHong Zhang 3100d5f3da31SBarry Smith mat->factortype = matin->factortype; 3101273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 31020ac07820SSatish Balay mat->assembled = PETSC_TRUE; 31037fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 31047fff6886SHong Zhang 3105273d9f13SBarry Smith a = (Mat_MPIBAIJ *)mat->data; 3106d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 31070ac07820SSatish Balay a->bs2 = oldmat->bs2; 31080ac07820SSatish Balay a->mbs = oldmat->mbs; 31090ac07820SSatish Balay a->nbs = oldmat->nbs; 31100ac07820SSatish Balay a->Mbs = oldmat->Mbs; 31110ac07820SSatish Balay a->Nbs = oldmat->Nbs; 31120ac07820SSatish Balay 31139566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 31149566063dSJacob Faibussowitsch PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3115899cda47SBarry Smith 31160ac07820SSatish Balay a->size = oldmat->size; 31170ac07820SSatish Balay a->rank = oldmat->rank; 3118aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 3119aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 3120f4259b30SLisandro Dalcin a->rowindices = NULL; 3121f4259b30SLisandro Dalcin a->rowvalues = NULL; 31220ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 3123f4259b30SLisandro Dalcin a->barray = NULL; 3124899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 3125899cda47SBarry Smith a->rendbs = oldmat->rendbs; 3126899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 3127899cda47SBarry Smith a->cendbs = oldmat->cendbs; 31280ac07820SSatish Balay 3129133cdb44SSatish Balay /* hash table stuff */ 3130f4259b30SLisandro Dalcin a->ht = NULL; 3131f4259b30SLisandro Dalcin a->hd = NULL; 3132133cdb44SSatish Balay a->ht_size = 0; 3133133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 313425fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 3135133cdb44SSatish Balay a->ht_total_ct = 0; 3136133cdb44SSatish Balay a->ht_insert_ct = 0; 3137133cdb44SSatish Balay 31389566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 31390ac07820SSatish Balay if (oldmat->colmap) { 3140aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 31419566063dSJacob Faibussowitsch PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 314248e59246SSatish Balay #else 31439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 31449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 314548e59246SSatish Balay #endif 3146f4259b30SLisandro Dalcin } else a->colmap = NULL; 31474beb1cfeSHong Zhang 31480ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 31499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(len, &a->garray)); 31509566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3151f4259b30SLisandro Dalcin } else a->garray = NULL; 31520ac07820SSatish Balay 31539566063dSJacob Faibussowitsch PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 31549566063dSJacob Faibussowitsch PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 31559566063dSJacob Faibussowitsch PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 31567fff6886SHong Zhang 31579566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 31589566063dSJacob Faibussowitsch PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 31599566063dSJacob Faibussowitsch PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 31600ac07820SSatish Balay *newmat = mat; 31613a40ed3dSBarry Smith PetscFunctionReturn(0); 31620ac07820SSatish Balay } 316357b952d6SSatish Balay 3164618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 31659371c9d4SSatish Balay PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 3166b51a4376SLisandro Dalcin PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3167b51a4376SLisandro Dalcin PetscInt *rowidxs, *colidxs, rs, cs, ce; 3168b51a4376SLisandro Dalcin PetscScalar *matvals; 3169b51a4376SLisandro Dalcin 3170b51a4376SLisandro Dalcin PetscFunctionBegin; 31719566063dSJacob Faibussowitsch PetscCall(PetscViewerSetUp(viewer)); 3172b51a4376SLisandro Dalcin 3173b51a4376SLisandro Dalcin /* read in matrix header */ 31749566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 31755f80ce2aSJacob Faibussowitsch PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 31769371c9d4SSatish Balay M = header[1]; 31779371c9d4SSatish Balay N = header[2]; 31789371c9d4SSatish Balay nz = header[3]; 31795f80ce2aSJacob Faibussowitsch PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 31805f80ce2aSJacob Faibussowitsch PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 31815f80ce2aSJacob Faibussowitsch PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3182b51a4376SLisandro Dalcin 3183b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 31849566063dSJacob Faibussowitsch PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3185618cc2edSLisandro Dalcin /* set local sizes if not set already */ 3186618cc2edSLisandro Dalcin if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3187618cc2edSLisandro Dalcin if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3188b51a4376SLisandro Dalcin /* set global sizes if not set already */ 3189b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3190b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 31919566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->rmap)); 31929566063dSJacob Faibussowitsch PetscCall(PetscLayoutSetUp(mat->cmap)); 3193b51a4376SLisandro Dalcin 3194b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 31959566063dSJacob Faibussowitsch PetscCall(MatGetSize(mat, &rows, &cols)); 31965f80ce2aSJacob Faibussowitsch PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 31979566063dSJacob Faibussowitsch PetscCall(MatGetBlockSize(mat, &bs)); 31989566063dSJacob Faibussowitsch PetscCall(MatGetLocalSize(mat, &m, &n)); 31999566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 32009566063dSJacob Faibussowitsch PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 32019371c9d4SSatish Balay mbs = m / bs; 32029371c9d4SSatish Balay nbs = n / bs; 3203b51a4376SLisandro Dalcin 3204b51a4376SLisandro Dalcin /* read in row lengths and build row indices */ 32059566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(m + 1, &rowidxs)); 32069566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 32079371c9d4SSatish Balay rowidxs[0] = 0; 32089371c9d4SSatish Balay for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 32091c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 32105f80ce2aSJacob Faibussowitsch PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3211b51a4376SLisandro Dalcin 3212b51a4376SLisandro Dalcin /* read in column indices and matrix values */ 32139566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 32149566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 32159566063dSJacob Faibussowitsch PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3216b51a4376SLisandro Dalcin 3217b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3218b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3219b51a4376SLisandro Dalcin PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3220618cc2edSLisandro Dalcin PetscBool sbaij, done; 3221b51a4376SLisandro Dalcin PetscInt *d_nnz, *o_nnz; 3222b51a4376SLisandro Dalcin 32239566063dSJacob Faibussowitsch PetscCall(PetscBTCreate(nbs, &bt)); 32249566063dSJacob Faibussowitsch PetscCall(PetscHSetICreate(&ht)); 32259566063dSJacob Faibussowitsch PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 32269566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3227b51a4376SLisandro Dalcin for (i = 0; i < mbs; i++) { 32289566063dSJacob Faibussowitsch PetscCall(PetscBTMemzero(nbs, bt)); 32299566063dSJacob Faibussowitsch PetscCall(PetscHSetIClear(ht)); 3230618cc2edSLisandro Dalcin for (k = 0; k < bs; k++) { 3231618cc2edSLisandro Dalcin PetscInt row = bs * i + k; 3232618cc2edSLisandro Dalcin for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3233618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3234618cc2edSLisandro Dalcin if (!sbaij || col >= row) { 3235618cc2edSLisandro Dalcin if (col >= cs && col < ce) { 3236618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3237b51a4376SLisandro Dalcin } else { 32389566063dSJacob Faibussowitsch PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3239b51a4376SLisandro Dalcin if (done) o_nnz[i]++; 3240b51a4376SLisandro Dalcin } 3241b51a4376SLisandro Dalcin } 3242618cc2edSLisandro Dalcin } 3243618cc2edSLisandro Dalcin } 3244618cc2edSLisandro Dalcin } 32459566063dSJacob Faibussowitsch PetscCall(PetscBTDestroy(&bt)); 32469566063dSJacob Faibussowitsch PetscCall(PetscHSetIDestroy(&ht)); 32479566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 32489566063dSJacob Faibussowitsch PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 32499566063dSJacob Faibussowitsch PetscCall(PetscFree2(d_nnz, o_nnz)); 3250b51a4376SLisandro Dalcin } 3251b51a4376SLisandro Dalcin 3252b51a4376SLisandro Dalcin /* store matrix values */ 3253b51a4376SLisandro Dalcin for (i = 0; i < m; i++) { 3254b51a4376SLisandro Dalcin PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 32559566063dSJacob Faibussowitsch PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3256b51a4376SLisandro Dalcin } 3257b51a4376SLisandro Dalcin 32589566063dSJacob Faibussowitsch PetscCall(PetscFree(rowidxs)); 32599566063dSJacob Faibussowitsch PetscCall(PetscFree2(colidxs, matvals)); 32609566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 32619566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3262b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3263b51a4376SLisandro Dalcin } 3264b51a4376SLisandro Dalcin 32659371c9d4SSatish Balay PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) { 32667f489da9SVaclav Hapla PetscBool isbinary; 32674683f7a4SShri Abhyankar 32684683f7a4SShri Abhyankar PetscFunctionBegin; 32699566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 32705f80ce2aSJacob Faibussowitsch PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 32719566063dSJacob Faibussowitsch PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 32724683f7a4SShri Abhyankar PetscFunctionReturn(0); 32734683f7a4SShri Abhyankar } 32744683f7a4SShri Abhyankar 3275133cdb44SSatish Balay /*@ 327611a5261eSBarry Smith MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3277133cdb44SSatish Balay 3278133cdb44SSatish Balay Input Parameters: 3279a2b725a8SWilliam Gropp + mat - the matrix 3280a2b725a8SWilliam Gropp - fact - factor 3281133cdb44SSatish Balay 328211a5261eSBarry Smith Options Database Key: 328311a5261eSBarry Smith . -mat_use_hash_table <fact> - provide the factor 3284fee21e36SBarry Smith 32858c890885SBarry Smith Level: advanced 32868c890885SBarry Smith 328711a5261eSBarry Smith .seealso: `MATMPIBAIJ`, `MatSetOption()` 3288133cdb44SSatish Balay @*/ 32899371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) { 32905bf65638SKris Buschelman PetscFunctionBegin; 3291cac4c232SBarry Smith PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 32925bf65638SKris Buschelman PetscFunctionReturn(0); 32935bf65638SKris Buschelman } 32945bf65638SKris Buschelman 32959371c9d4SSatish Balay PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) { 329625fdafccSSatish Balay Mat_MPIBAIJ *baij; 3297133cdb44SSatish Balay 3298133cdb44SSatish Balay PetscFunctionBegin; 3299133cdb44SSatish Balay baij = (Mat_MPIBAIJ *)mat->data; 3300133cdb44SSatish Balay baij->ht_fact = fact; 3301133cdb44SSatish Balay PetscFunctionReturn(0); 3302133cdb44SSatish Balay } 3303f2a5309cSSatish Balay 33049371c9d4SSatish Balay PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 3305f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3306ab4d48faSStefano Zampini PetscBool flg; 33075fd66863SKarl Rupp 3308f2a5309cSSatish Balay PetscFunctionBegin; 33099566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 33105f80ce2aSJacob Faibussowitsch PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 331121e72a00SBarry Smith if (Ad) *Ad = a->A; 331221e72a00SBarry Smith if (Ao) *Ao = a->B; 331321e72a00SBarry Smith if (colmap) *colmap = a->garray; 3314f2a5309cSSatish Balay PetscFunctionReturn(0); 3315f2a5309cSSatish Balay } 331685535b8eSBarry Smith 331785535b8eSBarry Smith /* 331885535b8eSBarry Smith Special version for direct calls from Fortran (to eliminate two function call overheads 331985535b8eSBarry Smith */ 332085535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 332185535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 332285535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 332385535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 332485535b8eSBarry Smith #endif 332585535b8eSBarry Smith 332685535b8eSBarry Smith /*@C 332711a5261eSBarry Smith MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 332885535b8eSBarry Smith 332985535b8eSBarry Smith Collective on Mat 333085535b8eSBarry Smith 333185535b8eSBarry Smith Input Parameters: 333285535b8eSBarry Smith + mat - the matrix 333385535b8eSBarry Smith . min - number of input rows 333485535b8eSBarry Smith . im - input rows 333585535b8eSBarry Smith . nin - number of input columns 333685535b8eSBarry Smith . in - input columns 333785535b8eSBarry Smith . v - numerical values input 333811a5261eSBarry Smith - addvin - `INSERT_VALUES` or `ADD_VALUES` 333985535b8eSBarry Smith 334011a5261eSBarry Smith Developer Note: 334111a5261eSBarry Smith This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 334285535b8eSBarry Smith 334385535b8eSBarry Smith Level: advanced 334485535b8eSBarry Smith 3345db781477SPatrick Sanan .seealso: `MatSetValuesBlocked()` 334685535b8eSBarry Smith @*/ 33479371c9d4SSatish Balay PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) { 334885535b8eSBarry Smith /* convert input arguments to C version */ 334985535b8eSBarry Smith Mat mat = *matin; 335085535b8eSBarry Smith PetscInt m = *min, n = *nin; 335185535b8eSBarry Smith InsertMode addv = *addvin; 335285535b8eSBarry Smith 335385535b8eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 335485535b8eSBarry Smith const MatScalar *value; 335585535b8eSBarry Smith MatScalar *barray = baij->barray; 3356ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 335785535b8eSBarry Smith PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 335885535b8eSBarry Smith PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3359d0f46423SBarry Smith PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 336085535b8eSBarry Smith 336185535b8eSBarry Smith PetscFunctionBegin; 336285535b8eSBarry Smith /* tasks normally handled by MatSetValuesBlocked() */ 336326fbe8dcSKarl Rupp if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 33645f80ce2aSJacob Faibussowitsch else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 33655f80ce2aSJacob Faibussowitsch PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 336685535b8eSBarry Smith if (mat->assembled) { 336785535b8eSBarry Smith mat->was_assembled = PETSC_TRUE; 336885535b8eSBarry Smith mat->assembled = PETSC_FALSE; 336985535b8eSBarry Smith } 33709566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 337185535b8eSBarry Smith 337285535b8eSBarry Smith if (!barray) { 33739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs2, &barray)); 337485535b8eSBarry Smith baij->barray = barray; 337585535b8eSBarry Smith } 337685535b8eSBarry Smith 337726fbe8dcSKarl Rupp if (roworiented) stepval = (n - 1) * bs; 337826fbe8dcSKarl Rupp else stepval = (m - 1) * bs; 337926fbe8dcSKarl Rupp 338085535b8eSBarry Smith for (i = 0; i < m; i++) { 338185535b8eSBarry Smith if (im[i] < 0) continue; 33826bdcaf15SBarry Smith PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 338385535b8eSBarry Smith if (im[i] >= rstart && im[i] < rend) { 338485535b8eSBarry Smith row = im[i] - rstart; 338585535b8eSBarry Smith for (j = 0; j < n; j++) { 338685535b8eSBarry Smith /* If NumCol = 1 then a copy is not required */ 338785535b8eSBarry Smith if ((roworiented) && (n == 1)) { 338885535b8eSBarry Smith barray = (MatScalar *)v + i * bs2; 338985535b8eSBarry Smith } else if ((!roworiented) && (m == 1)) { 339085535b8eSBarry Smith barray = (MatScalar *)v + j * bs2; 339185535b8eSBarry Smith } else { /* Here a copy is required */ 339285535b8eSBarry Smith if (roworiented) { 339385535b8eSBarry Smith value = v + i * (stepval + bs) * bs + j * bs; 339485535b8eSBarry Smith } else { 339585535b8eSBarry Smith value = v + j * (stepval + bs) * bs + i * bs; 339685535b8eSBarry Smith } 339785535b8eSBarry Smith for (ii = 0; ii < bs; ii++, value += stepval) { 3398ad540459SPierre Jolivet for (jj = 0; jj < bs; jj++) *barray++ = *value++; 339985535b8eSBarry Smith } 340085535b8eSBarry Smith barray -= bs2; 340185535b8eSBarry Smith } 340285535b8eSBarry Smith 340385535b8eSBarry Smith if (in[j] >= cstart && in[j] < cend) { 340485535b8eSBarry Smith col = in[j] - cstart; 34059566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3406f7d195e4SLawrence Mitchell } else if (in[j] < 0) { 3407f7d195e4SLawrence Mitchell continue; 3408f7d195e4SLawrence Mitchell } else { 3409f7d195e4SLawrence Mitchell PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 341085535b8eSBarry Smith if (mat->was_assembled) { 341148a46eb9SPierre Jolivet if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 341285535b8eSBarry Smith 341385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 341485535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 34159371c9d4SSatish Balay { 34169371c9d4SSatish Balay PetscInt data; 34179566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 341808401ef6SPierre Jolivet PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 341985535b8eSBarry Smith } 342085535b8eSBarry Smith #else 342108401ef6SPierre Jolivet PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 342285535b8eSBarry Smith #endif 342385535b8eSBarry Smith #endif 342485535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 34259566063dSJacob Faibussowitsch PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 342685535b8eSBarry Smith col = (col - 1) / bs; 342785535b8eSBarry Smith #else 342885535b8eSBarry Smith col = (baij->colmap[in[j]] - 1) / bs; 342985535b8eSBarry Smith #endif 343085535b8eSBarry Smith if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 34319566063dSJacob Faibussowitsch PetscCall(MatDisAssemble_MPIBAIJ(mat)); 343285535b8eSBarry Smith col = in[j]; 343385535b8eSBarry Smith } 343426fbe8dcSKarl Rupp } else col = in[j]; 34359566063dSJacob Faibussowitsch PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 343685535b8eSBarry Smith } 343785535b8eSBarry Smith } 343885535b8eSBarry Smith } else { 343985535b8eSBarry Smith if (!baij->donotstash) { 344085535b8eSBarry Smith if (roworiented) { 34419566063dSJacob Faibussowitsch PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 344285535b8eSBarry Smith } else { 34439566063dSJacob Faibussowitsch PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 344485535b8eSBarry Smith } 344585535b8eSBarry Smith } 344685535b8eSBarry Smith } 344785535b8eSBarry Smith } 344885535b8eSBarry Smith 344985535b8eSBarry Smith /* task normally handled by MatSetValuesBlocked() */ 34509566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 345185535b8eSBarry Smith PetscFunctionReturn(0); 345285535b8eSBarry Smith } 3453dfb205c3SBarry Smith 3454dfb205c3SBarry Smith /*@ 345511a5261eSBarry Smith MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3456dfb205c3SBarry Smith CSR format the local rows. 3457dfb205c3SBarry Smith 3458d083f849SBarry Smith Collective 3459dfb205c3SBarry Smith 3460dfb205c3SBarry Smith Input Parameters: 3461dfb205c3SBarry Smith + comm - MPI communicator 3462dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 346311a5261eSBarry Smith . m - number of local rows (Cannot be `PETSC_DECIDE`) 3464dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 346511a5261eSBarry Smith x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3466dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 346711a5261eSBarry Smith . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 346811a5261eSBarry Smith . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3469483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3470dfb205c3SBarry Smith . j - column indices 3471dfb205c3SBarry Smith - a - matrix values 3472dfb205c3SBarry Smith 3473dfb205c3SBarry Smith Output Parameter: 3474dfb205c3SBarry Smith . mat - the matrix 3475dfb205c3SBarry Smith 3476dfb205c3SBarry Smith Level: intermediate 3477dfb205c3SBarry Smith 3478dfb205c3SBarry Smith Notes: 3479dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3480dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 348111a5261eSBarry Smith called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3482dfb205c3SBarry Smith 34833adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 34843adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 34853adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 34863adadaf3SJed Brown with column-major ordering within blocks. 34873adadaf3SJed Brown 3488dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3489dfb205c3SBarry Smith 3490db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3491db781477SPatrick Sanan `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3492dfb205c3SBarry Smith @*/ 34939371c9d4SSatish Balay PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 3494dfb205c3SBarry Smith PetscFunctionBegin; 34955f80ce2aSJacob Faibussowitsch PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 34965f80ce2aSJacob Faibussowitsch PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 34979566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, mat)); 34989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*mat, m, n, M, N)); 34999566063dSJacob Faibussowitsch PetscCall(MatSetType(*mat, MATMPIBAIJ)); 35009566063dSJacob Faibussowitsch PetscCall(MatSetBlockSize(*mat, bs)); 35019566063dSJacob Faibussowitsch PetscCall(MatSetUp(*mat)); 35029566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 35039566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 35049566063dSJacob Faibussowitsch PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3505dfb205c3SBarry Smith PetscFunctionReturn(0); 3506dfb205c3SBarry Smith } 3507e561ad89SHong Zhang 35089371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3509bd153df0SHong Zhang PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3510bd153df0SHong Zhang PetscInt *indx; 3511bd153df0SHong Zhang PetscScalar *values; 3512e561ad89SHong Zhang 3513e561ad89SHong Zhang PetscFunctionBegin; 35149566063dSJacob Faibussowitsch PetscCall(MatGetSize(inmat, &m, &N)); 3515bd153df0SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3516bd153df0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 35172c6ba4edSHong Zhang PetscInt *dnz, *onz, mbs, Nbs, nbs; 3518bd153df0SHong Zhang PetscInt *bindx, rmax = a->rmax, j; 351977f764caSHong Zhang PetscMPIInt rank, size; 3520e561ad89SHong Zhang 35219566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 35229371c9d4SSatish Balay mbs = m / bs; 35239371c9d4SSatish Balay Nbs = N / cbs; 352448a46eb9SPierre Jolivet if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3525da91a574SPierre Jolivet nbs = n / cbs; 3526e561ad89SHong Zhang 35279566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(rmax, &bindx)); 3528d0609cedSBarry Smith MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 352977f764caSHong Zhang 35309566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 35319566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &size)); 353277f764caSHong Zhang if (rank == size - 1) { 353377f764caSHong Zhang /* Check sum(nbs) = Nbs */ 35345f80ce2aSJacob Faibussowitsch PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 353577f764caSHong Zhang } 353677f764caSHong Zhang 3537d0609cedSBarry Smith rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3538bd153df0SHong Zhang for (i = 0; i < mbs; i++) { 35399566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3540647a6520SHong Zhang nnz = nnz / bs; 3541647a6520SHong Zhang for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 35429566063dSJacob Faibussowitsch PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 35439566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3544e561ad89SHong Zhang } 35459566063dSJacob Faibussowitsch PetscCall(PetscFree(bindx)); 3546e561ad89SHong Zhang 35479566063dSJacob Faibussowitsch PetscCall(MatCreate(comm, outmat)); 35489566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 35499566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 35509566063dSJacob Faibussowitsch PetscCall(MatSetType(*outmat, MATBAIJ)); 35519566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 35529566063dSJacob Faibussowitsch PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3553d0609cedSBarry Smith MatPreallocateEnd(dnz, onz); 35549566063dSJacob Faibussowitsch PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3555e561ad89SHong Zhang } 3556e561ad89SHong Zhang 3557bd153df0SHong Zhang /* numeric phase */ 35589566063dSJacob Faibussowitsch PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 35599566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3560e561ad89SHong Zhang 3561e561ad89SHong Zhang for (i = 0; i < m; i++) { 35629566063dSJacob Faibussowitsch PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3563e561ad89SHong Zhang Ii = i + rstart; 35649566063dSJacob Faibussowitsch PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 35659566063dSJacob Faibussowitsch PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3566e561ad89SHong Zhang } 35679566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 35689566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3569e561ad89SHong Zhang PetscFunctionReturn(0); 3570e561ad89SHong Zhang } 3571