1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 11 PetscInt i, j, *aj = B->j, ec = 0, *garray; 12 PetscInt bs = mat->rmap->bs, *stmp; 13 IS from, to; 14 Vec gvec; 15 #if defined(PETSC_USE_CTABLE) 16 PetscTable gid1_lid1; 17 PetscTablePosition tpos; 18 PetscInt gid, lid; 19 #else 20 PetscInt Nbs = baij->Nbs, *indices; 21 #endif 22 23 PetscFunctionBegin; 24 #if defined(PETSC_USE_CTABLE) 25 /* use a table - Mark Adams */ 26 PetscCall(PetscTableCreate(B->mbs, baij->Nbs + 1, &gid1_lid1)); 27 for (i = 0; i < B->mbs; i++) { 28 for (j = 0; j < B->ilen[i]; j++) { 29 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 30 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 31 if (!data) { 32 /* one based table */ 33 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 34 } 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 40 while (tpos) { 41 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 42 gid--; 43 lid--; 44 garray[lid] = gid; 45 } 46 PetscCall(PetscSortInt(ec, garray)); 47 PetscCall(PetscTableRemoveAll(gid1_lid1)); 48 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 49 /* compact out the extra columns in B */ 50 for (i = 0; i < B->mbs; i++) { 51 for (j = 0; j < B->ilen[i]; j++) { 52 PetscInt gid1 = aj[B->i[i] + j] + 1; 53 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 54 lid--; 55 aj[B->i[i] + j] = lid; 56 } 57 } 58 B->nbs = ec; 59 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 60 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 61 PetscCall(PetscTableDestroy(&gid1_lid1)); 62 #else 63 /* Make an array as long as the number of columns */ 64 /* mark those columns that are in baij->B */ 65 PetscCall(PetscCalloc1(Nbs, &indices)); 66 for (i = 0; i < B->mbs; i++) { 67 for (j = 0; j < B->ilen[i]; j++) { 68 if (!indices[aj[B->i[i] + j]]) ec++; 69 indices[aj[B->i[i] + j]] = 1; 70 } 71 } 72 73 /* form array of columns we need */ 74 PetscCall(PetscMalloc1(ec, &garray)); 75 ec = 0; 76 for (i = 0; i < Nbs; i++) { 77 if (indices[i]) garray[ec++] = i; 78 } 79 80 /* make indices now point into garray */ 81 for (i = 0; i < ec; i++) indices[garray[i]] = i; 82 83 /* compact out the extra columns in B */ 84 for (i = 0; i < B->mbs; i++) { 85 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 86 } 87 B->nbs = ec; 88 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 89 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 90 PetscCall(PetscFree(indices)); 91 #endif 92 93 /* create local vector that is used to scatter into */ 94 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 95 96 /* create two temporary index sets for building scatter-gather */ 97 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 98 99 PetscCall(PetscMalloc1(ec, &stmp)); 100 for (i = 0; i < ec; i++) stmp[i] = i; 101 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 102 103 /* create temporary global vector to generate scatter context */ 104 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 105 106 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 107 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 108 109 baij->garray = garray; 110 111 PetscCall(ISDestroy(&from)); 112 PetscCall(ISDestroy(&to)); 113 PetscCall(VecDestroy(&gvec)); 114 PetscFunctionReturn(0); 115 } 116 117 /* 118 Takes the local part of an already assembled MPIBAIJ matrix 119 and disassembles it. This is to allow new nonzeros into the matrix 120 that require more communication in the matrix vector multiply. 121 Thus certain data-structures must be rebuilt. 122 123 Kind of slow! But that's what application programmers get when 124 they are sloppy. 125 */ 126 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) { 127 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 128 Mat B = baij->B, Bnew; 129 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 130 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 131 PetscInt bs2 = baij->bs2, *nz, m = A->rmap->n; 132 MatScalar *a = Bbaij->a; 133 MatScalar *atmp; 134 135 PetscFunctionBegin; 136 /* free stuff related to matrix-vec multiply */ 137 PetscCall(VecDestroy(&baij->lvec)); 138 baij->lvec = NULL; 139 PetscCall(VecScatterDestroy(&baij->Mvctx)); 140 baij->Mvctx = NULL; 141 if (baij->colmap) { 142 #if defined(PETSC_USE_CTABLE) 143 PetscCall(PetscTableDestroy(&baij->colmap)); 144 #else 145 PetscCall(PetscFree(baij->colmap)); 146 #endif 147 } 148 149 /* make sure that B is assembled so we can access its values */ 150 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 151 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 152 153 /* invent new B and copy stuff over */ 154 PetscCall(PetscMalloc1(mbs, &nz)); 155 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 156 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 157 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 158 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 159 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 160 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 161 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 162 } 163 164 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 165 /* 166 Ensure that B's nonzerostate is monotonically increasing. 167 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 168 */ 169 Bnew->nonzerostate = B->nonzerostate; 170 171 for (i = 0; i < mbs; i++) { 172 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 173 col = garray[Bbaij->j[j]]; 174 atmp = a + j * bs2; 175 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 176 } 177 } 178 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 179 180 PetscCall(PetscFree(nz)); 181 PetscCall(PetscFree(baij->garray)); 182 PetscCall(MatDestroy(&B)); 183 184 baij->B = Bnew; 185 A->was_assembled = PETSC_FALSE; 186 A->assembled = PETSC_FALSE; 187 PetscFunctionReturn(0); 188 } 189 190 /* ugly stuff added for Glenn someday we should fix this up */ 191 192 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 193 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 194 195 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 196 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 197 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 198 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 199 PetscInt *r_rmapd, *r_rmapo; 200 201 PetscFunctionBegin; 202 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 203 PetscCall(MatGetSize(ina->A, NULL, &n)); 204 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 205 nt = 0; 206 for (i = 0; i < inA->rmap->mapping->n; i++) { 207 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 208 nt++; 209 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 210 } 211 } 212 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 213 PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 214 for (i = 0; i < inA->rmap->mapping->n; i++) { 215 if (r_rmapd[i]) { 216 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 217 } 218 } 219 PetscCall(PetscFree(r_rmapd)); 220 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 221 222 PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 223 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 224 no = inA->rmap->mapping->n - nt; 225 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 226 nt = 0; 227 for (i = 0; i < inA->rmap->mapping->n; i++) { 228 if (lindices[inA->rmap->mapping->indices[i]]) { 229 nt++; 230 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 231 } 232 } 233 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 234 PetscCall(PetscFree(lindices)); 235 PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 236 for (i = 0; i < inA->rmap->mapping->n; i++) { 237 if (r_rmapo[i]) { 238 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 239 } 240 } 241 PetscCall(PetscFree(r_rmapo)); 242 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) { 247 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 248 249 PetscFunctionBegin; 250 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 251 PetscFunctionReturn(0); 252 } 253 254 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) { 255 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 256 PetscInt n, i; 257 PetscScalar *d, *o; 258 const PetscScalar *s; 259 260 PetscFunctionBegin; 261 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 262 263 PetscCall(VecGetArrayRead(scale, &s)); 264 265 PetscCall(VecGetLocalSize(uglydd, &n)); 266 PetscCall(VecGetArray(uglydd, &d)); 267 for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 268 PetscCall(VecRestoreArray(uglydd, &d)); 269 /* column scale "diagonal" portion of local matrix */ 270 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 271 272 PetscCall(VecGetLocalSize(uglyoo, &n)); 273 PetscCall(VecGetArray(uglyoo, &o)); 274 for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 275 PetscCall(VecRestoreArrayRead(scale, &s)); 276 PetscCall(VecRestoreArray(uglyoo, &o)); 277 /* column scale "off-diagonal" portion of local matrix */ 278 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 279 PetscFunctionReturn(0); 280 } 281