1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12 PetscInt i, j, *aj = B->j, ec = 0, *garray; 13 PetscInt bs = mat->rmap->bs, *stmp; 14 IS from, to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid, lid; 20 #else 21 PetscInt Nbs = baij->Nbs, *indices; 22 #endif 23 24 PetscFunctionBegin; 25 #if defined(PETSC_USE_CTABLE) 26 /* use a table - Mark Adams */ 27 PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1)); 28 for (i = 0; i < B->mbs; i++) { 29 for (j = 0; j < B->ilen[i]; j++) { 30 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 31 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 32 if (!data) { 33 /* one based table */ 34 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 PetscCall(PetscMalloc1(ec, &garray)); 40 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 41 while (tpos) { 42 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 43 gid--; 44 lid--; 45 garray[lid] = gid; 46 } 47 PetscCall(PetscSortInt(ec, garray)); 48 PetscCall(PetscTableRemoveAll(gid1_lid1)); 49 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 50 /* compact out the extra columns in B */ 51 for (i = 0; i < B->mbs; i++) { 52 for (j = 0; j < B->ilen[i]; j++) { 53 PetscInt gid1 = aj[B->i[i] + j] + 1; 54 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 55 lid--; 56 aj[B->i[i] + j] = lid; 57 } 58 } 59 B->nbs = ec; 60 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 61 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 62 PetscCall(PetscTableDestroy(&gid1_lid1)); 63 #else 64 /* Make an array as long as the number of columns */ 65 /* mark those columns that are in baij->B */ 66 PetscCall(PetscCalloc1(Nbs, &indices)); 67 for (i = 0; i < B->mbs; i++) { 68 for (j = 0; j < B->ilen[i]; j++) { 69 if (!indices[aj[B->i[i] + j]]) ec++; 70 indices[aj[B->i[i] + j]] = 1; 71 } 72 } 73 74 /* form array of columns we need */ 75 PetscCall(PetscMalloc1(ec, &garray)); 76 ec = 0; 77 for (i = 0; i < Nbs; i++) { 78 if (indices[i]) garray[ec++] = i; 79 } 80 81 /* make indices now point into garray */ 82 for (i = 0; i < ec; i++) indices[garray[i]] = i; 83 84 /* compact out the extra columns in B */ 85 for (i = 0; i < B->mbs; i++) { 86 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 87 } 88 B->nbs = ec; 89 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 90 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 91 PetscCall(PetscFree(indices)); 92 #endif 93 94 /* create local vector that is used to scatter into */ 95 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 96 97 /* create two temporary index sets for building scatter-gather */ 98 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 99 100 PetscCall(PetscMalloc1(ec, &stmp)); 101 for (i = 0; i < ec; i++) stmp[i] = i; 102 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 103 104 /* create temporary global vector to generate scatter context */ 105 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 106 107 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 108 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 109 110 baij->garray = garray; 111 112 PetscCall(ISDestroy(&from)); 113 PetscCall(ISDestroy(&to)); 114 PetscCall(VecDestroy(&gvec)); 115 PetscFunctionReturn(0); 116 } 117 118 /* 119 Takes the local part of an already assembled MPIBAIJ matrix 120 and disassembles it. This is to allow new nonzeros into the matrix 121 that require more communication in the matrix vector multiply. 122 Thus certain data-structures must be rebuilt. 123 124 Kind of slow! But that's what application programmers get when 125 they are sloppy. 126 */ 127 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 128 { 129 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 130 Mat B = baij->B, Bnew; 131 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 132 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 133 PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 134 MatScalar *a = Bbaij->a; 135 MatScalar *atmp; 136 137 PetscFunctionBegin; 138 /* free stuff related to matrix-vec multiply */ 139 PetscCall(VecDestroy(&baij->lvec)); 140 baij->lvec = NULL; 141 PetscCall(VecScatterDestroy(&baij->Mvctx)); 142 baij->Mvctx = NULL; 143 if (baij->colmap) { 144 #if defined(PETSC_USE_CTABLE) 145 PetscCall(PetscTableDestroy(&baij->colmap)); 146 #else 147 PetscCall(PetscFree(baij->colmap)); 148 #endif 149 } 150 151 /* make sure that B is assembled so we can access its values */ 152 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 153 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 154 155 /* invent new B and copy stuff over */ 156 PetscCall(PetscMalloc1(mbs, &nz)); 157 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 158 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 159 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 160 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 161 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 162 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 163 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 164 } 165 166 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 167 /* 168 Ensure that B's nonzerostate is monotonically increasing. 169 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 170 */ 171 Bnew->nonzerostate = B->nonzerostate; 172 173 for (i = 0; i < mbs; i++) { 174 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 175 col = garray[Bbaij->j[j]]; 176 atmp = a + j * bs2; 177 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 178 } 179 } 180 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 181 182 PetscCall(PetscFree(nz)); 183 PetscCall(PetscFree(baij->garray)); 184 PetscCall(MatDestroy(&B)); 185 186 baij->B = Bnew; 187 A->was_assembled = PETSC_FALSE; 188 A->assembled = PETSC_FALSE; 189 PetscFunctionReturn(0); 190 } 191 192 /* ugly stuff added for Glenn someday we should fix this up */ 193 194 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 195 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 196 197 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 198 { 199 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 200 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 201 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 202 PetscInt *r_rmapd, *r_rmapo; 203 204 PetscFunctionBegin; 205 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 206 PetscCall(MatGetSize(ina->A, NULL, &n)); 207 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 208 nt = 0; 209 for (i = 0; i < inA->rmap->mapping->n; i++) { 210 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 211 nt++; 212 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 213 } 214 } 215 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 216 PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 217 for (i = 0; i < inA->rmap->mapping->n; i++) { 218 if (r_rmapd[i]) { 219 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 220 } 221 } 222 PetscCall(PetscFree(r_rmapd)); 223 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 224 225 PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 226 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 227 no = inA->rmap->mapping->n - nt; 228 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 229 nt = 0; 230 for (i = 0; i < inA->rmap->mapping->n; i++) { 231 if (lindices[inA->rmap->mapping->indices[i]]) { 232 nt++; 233 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 234 } 235 } 236 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 237 PetscCall(PetscFree(lindices)); 238 PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 239 for (i = 0; i < inA->rmap->mapping->n; i++) { 240 if (r_rmapo[i]) { 241 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 242 } 243 } 244 PetscCall(PetscFree(r_rmapo)); 245 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 246 PetscFunctionReturn(0); 247 } 248 249 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) 250 { 251 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 252 253 PetscFunctionBegin; 254 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 255 PetscFunctionReturn(0); 256 } 257 258 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 259 { 260 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 261 PetscInt n, i; 262 PetscScalar *d, *o; 263 const PetscScalar *s; 264 265 PetscFunctionBegin; 266 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 267 268 PetscCall(VecGetArrayRead(scale, &s)); 269 270 PetscCall(VecGetLocalSize(uglydd, &n)); 271 PetscCall(VecGetArray(uglydd, &d)); 272 for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 273 PetscCall(VecRestoreArray(uglydd, &d)); 274 /* column scale "diagonal" portion of local matrix */ 275 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 276 277 PetscCall(VecGetLocalSize(uglyoo, &n)); 278 PetscCall(VecGetArray(uglyoo, &o)); 279 for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 280 PetscCall(VecRestoreArrayRead(scale, &s)); 281 PetscCall(VecRestoreArray(uglyoo, &o)); 282 /* column scale "off-diagonal" portion of local matrix */ 283 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 284 PetscFunctionReturn(0); 285 } 286