1 /* 2 Support for the parallel SELL matrix vector multiply 3 */ 4 #include <../src/mat/impls/sell/mpi/mpisell.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 /* 8 Takes the local part of an already assembled MPISELL matrix 9 and disassembles it. This is to allow new nonzeros into the matrix 10 that require more communication in the matrix vector multiply. 11 Thus certain data-structures must be rebuilt. 12 13 Kind of slow! But that's what application programmers get when 14 they are sloppy. 15 */ 16 PetscErrorCode MatDisAssemble_MPISELL(Mat A) 17 { 18 Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 19 Mat B = sell->B, Bnew; 20 Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 21 PetscInt i, j, totalslices, N = A->cmap->N, row; 22 PetscBool isnonzero; 23 24 PetscFunctionBegin; 25 /* free stuff related to matrix-vec multiply */ 26 PetscCall(VecDestroy(&sell->lvec)); 27 PetscCall(VecScatterDestroy(&sell->Mvctx)); 28 if (sell->colmap) { 29 #if defined(PETSC_USE_CTABLE) 30 PetscCall(PetscHMapIDestroy(&sell->colmap)); 31 #else 32 PetscCall(PetscFree(sell->colmap)); 33 #endif 34 } 35 36 /* make sure that B is assembled so we can access its values */ 37 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 38 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 39 40 /* invent new B and copy stuff over */ 41 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 42 PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 43 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 44 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 45 PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 46 if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 47 ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 48 } 49 50 /* 51 Ensure that B's nonzerostate is monotonically increasing. 52 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 53 */ 54 Bnew->nonzerostate = B->nonzerostate; 55 56 totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 57 for (i = 0; i < totalslices; i++) { /* loop over slices */ 58 for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) { 59 isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]); 60 if (isnonzero) PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); 61 } 62 } 63 64 PetscCall(PetscFree(sell->garray)); 65 PetscCall(MatDestroy(&B)); 66 67 sell->B = Bnew; 68 A->was_assembled = PETSC_FALSE; 69 A->assembled = PETSC_FALSE; 70 PetscFunctionReturn(0); 71 } 72 73 PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) 74 { 75 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 76 Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data); 77 PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 78 IS from, to; 79 Vec gvec; 80 PetscBool isnonzero; 81 #if defined(PETSC_USE_CTABLE) 82 PetscHMapI gid1_lid1 = NULL; 83 PetscHashIter tpos; 84 PetscInt gid, lid; 85 #else 86 PetscInt N = mat->cmap->N, *indices; 87 #endif 88 89 PetscFunctionBegin; 90 totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 91 92 /* ec counts the number of columns that contain nonzeros */ 93 #if defined(PETSC_USE_CTABLE) 94 /* use a table */ 95 PetscCall(PetscHMapICreateWithSize(sell->B->rmap->n, &gid1_lid1)); 96 for (i = 0; i < totalslices; i++) { /* loop over slices */ 97 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 98 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 99 if (isnonzero) { /* check the mask bit */ 100 PetscInt data, gid1 = bcolidx[j] + 1; 101 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 102 if (!data) { 103 /* one based table */ 104 PetscCall(PetscHMapISetWithMode(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 105 } 106 } 107 } 108 } 109 110 /* form array of columns we need */ 111 PetscCall(PetscMalloc1(ec, &garray)); 112 PetscHashIterBegin(gid1_lid1, tpos); 113 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 114 PetscHashIterGetKey(gid1_lid1, tpos, gid); 115 PetscHashIterGetVal(gid1_lid1, tpos, lid); 116 PetscHashIterNext(gid1_lid1, tpos); 117 gid--; 118 lid--; 119 garray[lid] = gid; 120 } 121 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 122 PetscCall(PetscHMapIClear(gid1_lid1)); 123 for (i = 0; i < ec; i++) PetscCall(PetscHMapISetWithMode(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); 124 125 /* compact out the extra columns in B */ 126 for (i = 0; i < totalslices; i++) { /* loop over slices */ 127 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 128 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 129 if (isnonzero) { 130 PetscInt gid1 = bcolidx[j] + 1; 131 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 132 lid--; 133 bcolidx[j] = lid; 134 } 135 } 136 } 137 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 138 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 139 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 140 #else 141 /* Make an array as long as the number of columns */ 142 PetscCall(PetscCalloc1(N, &indices)); 143 /* mark those columns that are in sell->B */ 144 for (i = 0; i < totalslices; i++) { /* loop over slices */ 145 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 146 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 147 if (isnonzero) { 148 if (!indices[bcolidx[j]]) ec++; 149 indices[bcolidx[j]] = 1; 150 } 151 } 152 } 153 154 /* form array of columns we need */ 155 PetscCall(PetscMalloc1(ec, &garray)); 156 ec = 0; 157 for (i = 0; i < N; i++) { 158 if (indices[i]) garray[ec++] = i; 159 } 160 161 /* make indices now point into garray */ 162 for (i = 0; i < ec; i++) indices[garray[i]] = i; 163 164 /* compact out the extra columns in B */ 165 for (i = 0; i < totalslices; i++) { /* loop over slices */ 166 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 167 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 168 if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 169 } 170 } 171 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 172 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 173 PetscCall(PetscFree(indices)); 174 #endif 175 /* create local vector that is used to scatter into */ 176 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 177 /* create two temporary Index sets for build scatter gather */ 178 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 179 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 180 181 /* create temporary global vector to generate scatter context */ 182 /* This does not allocate the array's memory so is efficient */ 183 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 184 185 /* generate the scatter context */ 186 PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 187 PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 188 189 sell->garray = garray; 190 191 PetscCall(ISDestroy(&from)); 192 PetscCall(ISDestroy(&to)); 193 PetscCall(VecDestroy(&gvec)); 194 PetscFunctionReturn(0); 195 } 196 197 /* ugly stuff added for Glenn someday we should fix this up */ 198 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 199 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 200 201 PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) 202 { 203 Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 204 PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 205 PetscInt *r_rmapd, *r_rmapo; 206 207 PetscFunctionBegin; 208 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 209 PetscCall(MatGetSize(ina->A, NULL, &n)); 210 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 211 nt = 0; 212 for (i = 0; i < inA->rmap->mapping->n; i++) { 213 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 214 nt++; 215 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 216 } 217 } 218 PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 219 PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 220 for (i = 0; i < inA->rmap->mapping->n; i++) { 221 if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 222 } 223 PetscCall(PetscFree(r_rmapd)); 224 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 225 PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 226 for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 227 no = inA->rmap->mapping->n - nt; 228 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 229 nt = 0; 230 for (i = 0; i < inA->rmap->mapping->n; i++) { 231 if (lindices[inA->rmap->mapping->indices[i]]) { 232 nt++; 233 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 234 } 235 } 236 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 237 PetscCall(PetscFree(lindices)); 238 PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 239 for (i = 0; i < inA->rmap->mapping->n; i++) { 240 if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 241 } 242 PetscCall(PetscFree(r_rmapo)); 243 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 244 PetscFunctionReturn(0); 245 } 246 247 PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) 248 { 249 Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 250 PetscInt n, i; 251 PetscScalar *d, *o; 252 const PetscScalar *s; 253 254 PetscFunctionBegin; 255 if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); 256 PetscCall(VecGetArrayRead(scale, &s)); 257 PetscCall(VecGetLocalSize(auglydd, &n)); 258 PetscCall(VecGetArray(auglydd, &d)); 259 for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 260 PetscCall(VecRestoreArray(auglydd, &d)); 261 /* column scale "diagonal" portion of local matrix */ 262 PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 263 PetscCall(VecGetLocalSize(auglyoo, &n)); 264 PetscCall(VecGetArray(auglyoo, &o)); 265 for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 266 PetscCall(VecRestoreArrayRead(scale, &s)); 267 PetscCall(VecRestoreArray(auglyoo, &o)); 268 /* column scale "off-diagonal" portion of local matrix */ 269 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 270 PetscFunctionReturn(0); 271 } 272