1 /* 2 Support for the parallel SELL matrix vector multiply 3 */ 4 #include <../src/mat/impls/sell/mpi/mpisell.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 /* 8 Takes the local part of an already assembled MPISELL matrix 9 and disassembles it. This is to allow new nonzeros into the matrix 10 that require more communication in the matrix vector multiply. 11 Thus certain data-structures must be rebuilt. 12 13 Kind of slow! But that's what application programmers get when 14 they are sloppy. 15 */ 16 PetscErrorCode MatDisAssemble_MPISELL(Mat A) { 17 Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 18 Mat B = sell->B, Bnew; 19 Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 20 PetscInt i, j, totalslices, N = A->cmap->N, row; 21 PetscBool isnonzero; 22 23 PetscFunctionBegin; 24 /* free stuff related to matrix-vec multiply */ 25 PetscCall(VecDestroy(&sell->lvec)); 26 PetscCall(VecScatterDestroy(&sell->Mvctx)); 27 if (sell->colmap) { 28 #if defined(PETSC_USE_CTABLE) 29 PetscCall(PetscTableDestroy(&sell->colmap)); 30 #else 31 PetscCall(PetscFree(sell->colmap)); 32 #endif 33 } 34 35 /* make sure that B is assembled so we can access its values */ 36 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 37 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 38 39 /* invent new B and copy stuff over */ 40 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 41 PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 42 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 43 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 44 PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 45 if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 46 ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 47 } 48 49 /* 50 Ensure that B's nonzerostate is monotonically increasing. 51 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 52 */ 53 Bnew->nonzerostate = B->nonzerostate; 54 55 totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 56 for (i = 0; i < totalslices; i++) { /* loop over slices */ 57 for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) { 58 isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]); 59 if (isnonzero) PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); 60 } 61 } 62 63 PetscCall(PetscFree(sell->garray)); 64 PetscCall(MatDestroy(&B)); 65 66 sell->B = Bnew; 67 A->was_assembled = PETSC_FALSE; 68 A->assembled = PETSC_FALSE; 69 PetscFunctionReturn(0); 70 } 71 72 PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) { 73 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 74 Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data); 75 PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 76 IS from, to; 77 Vec gvec; 78 PetscBool isnonzero; 79 #if defined(PETSC_USE_CTABLE) 80 PetscTable gid1_lid1; 81 PetscTablePosition tpos; 82 PetscInt gid, lid; 83 #else 84 PetscInt N = mat->cmap->N, *indices; 85 #endif 86 87 PetscFunctionBegin; 88 totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 89 90 /* ec counts the number of columns that contain nonzeros */ 91 #if defined(PETSC_USE_CTABLE) 92 /* use a table */ 93 PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 94 for (i = 0; i < totalslices; i++) { /* loop over slices */ 95 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 96 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 97 if (isnonzero) { /* check the mask bit */ 98 PetscInt data, gid1 = bcolidx[j] + 1; 99 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 100 if (!data) { 101 /* one based table */ 102 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 103 } 104 } 105 } 106 } 107 108 /* form array of columns we need */ 109 PetscCall(PetscMalloc1(ec, &garray)); 110 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 111 while (tpos) { 112 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 113 gid--; 114 lid--; 115 garray[lid] = gid; 116 } 117 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 118 PetscCall(PetscTableRemoveAll(gid1_lid1)); 119 for (i = 0; i < ec; i++) PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 120 121 /* compact out the extra columns in B */ 122 for (i = 0; i < totalslices; i++) { /* loop over slices */ 123 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 124 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 125 if (isnonzero) { 126 PetscInt gid1 = bcolidx[j] + 1; 127 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 128 lid--; 129 bcolidx[j] = lid; 130 } 131 } 132 } 133 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 134 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 135 PetscCall(PetscTableDestroy(&gid1_lid1)); 136 #else 137 /* Make an array as long as the number of columns */ 138 PetscCall(PetscCalloc1(N, &indices)); 139 /* mark those columns that are in sell->B */ 140 for (i = 0; i < totalslices; i++) { /* loop over slices */ 141 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 142 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 143 if (isnonzero) { 144 if (!indices[bcolidx[j]]) ec++; 145 indices[bcolidx[j]] = 1; 146 } 147 } 148 } 149 150 /* form array of columns we need */ 151 PetscCall(PetscMalloc1(ec, &garray)); 152 ec = 0; 153 for (i = 0; i < N; i++) { 154 if (indices[i]) garray[ec++] = i; 155 } 156 157 /* make indices now point into garray */ 158 for (i = 0; i < ec; i++) indices[garray[i]] = i; 159 160 /* compact out the extra columns in B */ 161 for (i = 0; i < totalslices; i++) { /* loop over slices */ 162 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 163 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 164 if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 165 } 166 } 167 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 168 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 169 PetscCall(PetscFree(indices)); 170 #endif 171 /* create local vector that is used to scatter into */ 172 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 173 /* create two temporary Index sets for build scatter gather */ 174 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 175 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 176 177 /* create temporary global vector to generate scatter context */ 178 /* This does not allocate the array's memory so is efficient */ 179 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 180 181 /* generate the scatter context */ 182 PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 183 PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 184 185 sell->garray = garray; 186 187 PetscCall(ISDestroy(&from)); 188 PetscCall(ISDestroy(&to)); 189 PetscCall(VecDestroy(&gvec)); 190 PetscFunctionReturn(0); 191 } 192 193 /* ugly stuff added for Glenn someday we should fix this up */ 194 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 195 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 196 197 PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 198 Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 199 PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 200 PetscInt *r_rmapd, *r_rmapo; 201 202 PetscFunctionBegin; 203 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 204 PetscCall(MatGetSize(ina->A, NULL, &n)); 205 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 206 nt = 0; 207 for (i = 0; i < inA->rmap->mapping->n; i++) { 208 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 209 nt++; 210 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 211 } 212 } 213 PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 214 PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 215 for (i = 0; i < inA->rmap->mapping->n; i++) { 216 if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 217 } 218 PetscCall(PetscFree(r_rmapd)); 219 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 220 PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 221 for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 222 no = inA->rmap->mapping->n - nt; 223 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 224 nt = 0; 225 for (i = 0; i < inA->rmap->mapping->n; i++) { 226 if (lindices[inA->rmap->mapping->indices[i]]) { 227 nt++; 228 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 229 } 230 } 231 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 232 PetscCall(PetscFree(lindices)); 233 PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 234 for (i = 0; i < inA->rmap->mapping->n; i++) { 235 if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 236 } 237 PetscCall(PetscFree(r_rmapo)); 238 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 239 PetscFunctionReturn(0); 240 } 241 242 PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) { 243 Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 244 PetscInt n, i; 245 PetscScalar *d, *o; 246 const PetscScalar *s; 247 248 PetscFunctionBegin; 249 if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); 250 PetscCall(VecGetArrayRead(scale, &s)); 251 PetscCall(VecGetLocalSize(auglydd, &n)); 252 PetscCall(VecGetArray(auglydd, &d)); 253 for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 254 PetscCall(VecRestoreArray(auglydd, &d)); 255 /* column scale "diagonal" portion of local matrix */ 256 PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 257 PetscCall(VecGetLocalSize(auglyoo, &n)); 258 PetscCall(VecGetArray(auglyoo, &o)); 259 for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 260 PetscCall(VecRestoreArrayRead(scale, &s)); 261 PetscCall(VecRestoreArray(auglyoo, &o)); 262 /* column scale "off-diagonal" portion of local matrix */ 263 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 264 PetscFunctionReturn(0); 265 } 266