1 /* 2 Support for the parallel SELL matrix vector multiply 3 */ 4 #include <../src/mat/impls/sell/mpi/mpisell.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 /* 8 Takes the local part of an already assembled MPISELL matrix 9 and disassembles it. This is to allow new nonzeros into the matrix 10 that require more communication in the matrix vector multiply. 11 Thus certain data-structures must be rebuilt. 12 13 Kind of slow! But that's what application programmers get when 14 they are sloppy. 15 */ 16 PetscErrorCode MatDisAssemble_MPISELL(Mat A) { 17 Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 18 Mat B = sell->B, Bnew; 19 Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 20 PetscInt i, j, totalslices, N = A->cmap->N, ec, row; 21 PetscBool isnonzero; 22 23 PetscFunctionBegin; 24 /* free stuff related to matrix-vec multiply */ 25 PetscCall(VecGetSize(sell->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 26 PetscCall(VecDestroy(&sell->lvec)); 27 PetscCall(VecScatterDestroy(&sell->Mvctx)); 28 if (sell->colmap) { 29 #if defined(PETSC_USE_CTABLE) 30 PetscCall(PetscTableDestroy(&sell->colmap)); 31 #else 32 PetscCall(PetscFree(sell->colmap)); 33 PetscCall(PetscLogObjectMemory((PetscObject)A, -sell->B->cmap->n * sizeof(PetscInt))); 34 #endif 35 } 36 37 /* make sure that B is assembled so we can access its values */ 38 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 39 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 40 41 /* invent new B and copy stuff over */ 42 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 43 PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 44 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 45 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 46 PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 47 if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 48 ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 49 } 50 51 /* 52 Ensure that B's nonzerostate is monotonically increasing. 53 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 54 */ 55 Bnew->nonzerostate = B->nonzerostate; 56 57 totalslices = B->rmap->n / 8 + ((B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 58 for (i = 0; i < totalslices; i++) { /* loop over slices */ 59 for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = ((row + 1) & 0x07)) { 60 isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / 8 < Bsell->rlen[8 * i + row]); 61 if (isnonzero) { PetscCall(MatSetValue(Bnew, 8 * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); } 62 } 63 } 64 65 PetscCall(PetscFree(sell->garray)); 66 PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 67 PetscCall(MatDestroy(&B)); 68 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 69 70 sell->B = Bnew; 71 A->was_assembled = PETSC_FALSE; 72 A->assembled = PETSC_FALSE; 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) { 77 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 78 Mat_SeqSELL *B = (Mat_SeqSELL *)(sell->B->data); 79 PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 80 IS from, to; 81 Vec gvec; 82 PetscBool isnonzero; 83 #if defined(PETSC_USE_CTABLE) 84 PetscTable gid1_lid1; 85 PetscTablePosition tpos; 86 PetscInt gid, lid; 87 #else 88 PetscInt N = mat->cmap->N, *indices; 89 #endif 90 91 PetscFunctionBegin; 92 totalslices = sell->B->rmap->n / 8 + ((sell->B->rmap->n & 0x07) ? 1 : 0); /* floor(n/8) */ 93 94 /* ec counts the number of columns that contain nonzeros */ 95 #if defined(PETSC_USE_CTABLE) 96 /* use a table */ 97 PetscCall(PetscTableCreate(sell->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 98 for (i = 0; i < totalslices; i++) { /* loop over slices */ 99 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 100 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 101 if (isnonzero) { /* check the mask bit */ 102 PetscInt data, gid1 = bcolidx[j] + 1; 103 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 104 if (!data) { 105 /* one based table */ 106 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 107 } 108 } 109 } 110 } 111 112 /* form array of columns we need */ 113 PetscCall(PetscMalloc1(ec, &garray)); 114 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 115 while (tpos) { 116 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 117 gid--; 118 lid--; 119 garray[lid] = gid; 120 } 121 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 122 PetscCall(PetscTableRemoveAll(gid1_lid1)); 123 for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); } 124 125 /* compact out the extra columns in B */ 126 for (i = 0; i < totalslices; i++) { /* loop over slices */ 127 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 128 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 129 if (isnonzero) { 130 PetscInt gid1 = bcolidx[j] + 1; 131 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 132 lid--; 133 bcolidx[j] = lid; 134 } 135 } 136 } 137 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 138 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 139 PetscCall(PetscTableDestroy(&gid1_lid1)); 140 #else 141 /* Make an array as long as the number of columns */ 142 PetscCall(PetscCalloc1(N, &indices)); 143 /* mark those columns that are in sell->B */ 144 for (i = 0; i < totalslices; i++) { /* loop over slices */ 145 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 146 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 147 if (isnonzero) { 148 if (!indices[bcolidx[j]]) ec++; 149 indices[bcolidx[j]] = 1; 150 } 151 } 152 } 153 154 /* form array of columns we need */ 155 PetscCall(PetscMalloc1(ec, &garray)); 156 ec = 0; 157 for (i = 0; i < N; i++) { 158 if (indices[i]) garray[ec++] = i; 159 } 160 161 /* make indices now point into garray */ 162 for (i = 0; i < ec; i++) { indices[garray[i]] = i; } 163 164 /* compact out the extra columns in B */ 165 for (i = 0; i < totalslices; i++) { /* loop over slices */ 166 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 167 isnonzero = (PetscBool)((j - B->sliidx[i]) / 8 < B->rlen[(i << 3) + (j & 0x07)]); 168 if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 169 } 170 } 171 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 172 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 173 PetscCall(PetscFree(indices)); 174 #endif 175 /* create local vector that is used to scatter into */ 176 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 177 /* create two temporary Index sets for build scatter gather */ 178 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 179 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 180 181 /* create temporary global vector to generate scatter context */ 182 /* This does not allocate the array's memory so is efficient */ 183 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 184 185 /* generate the scatter context */ 186 PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 187 PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 188 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->Mvctx)); 189 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)sell->lvec)); 190 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 191 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 192 193 sell->garray = garray; 194 195 PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 196 PetscCall(ISDestroy(&from)); 197 PetscCall(ISDestroy(&to)); 198 PetscCall(VecDestroy(&gvec)); 199 PetscFunctionReturn(0); 200 } 201 202 /* ugly stuff added for Glenn someday we should fix this up */ 203 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 204 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 205 206 PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 207 Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 208 PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 209 PetscInt *r_rmapd, *r_rmapo; 210 211 PetscFunctionBegin; 212 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 213 PetscCall(MatGetSize(ina->A, NULL, &n)); 214 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 215 nt = 0; 216 for (i = 0; i < inA->rmap->mapping->n; i++) { 217 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 218 nt++; 219 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 220 } 221 } 222 PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 223 PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 224 for (i = 0; i < inA->rmap->mapping->n; i++) { 225 if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; } 226 } 227 PetscCall(PetscFree(r_rmapd)); 228 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 229 PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 230 for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; } 231 no = inA->rmap->mapping->n - nt; 232 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 233 nt = 0; 234 for (i = 0; i < inA->rmap->mapping->n; i++) { 235 if (lindices[inA->rmap->mapping->indices[i]]) { 236 nt++; 237 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 238 } 239 } 240 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 241 PetscCall(PetscFree(lindices)); 242 PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 243 for (i = 0; i < inA->rmap->mapping->n; i++) { 244 if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; } 245 } 246 PetscCall(PetscFree(r_rmapo)); 247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) { 252 Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 253 PetscInt n, i; 254 PetscScalar *d, *o; 255 const PetscScalar *s; 256 257 PetscFunctionBegin; 258 if (!auglyrmapd) { PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); } 259 PetscCall(VecGetArrayRead(scale, &s)); 260 PetscCall(VecGetLocalSize(auglydd, &n)); 261 PetscCall(VecGetArray(auglydd, &d)); 262 for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 263 PetscCall(VecRestoreArray(auglydd, &d)); 264 /* column scale "diagonal" portion of local matrix */ 265 PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 266 PetscCall(VecGetLocalSize(auglyoo, &n)); 267 PetscCall(VecGetArray(auglyoo, &o)); 268 for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 269 PetscCall(VecRestoreArrayRead(scale, &s)); 270 PetscCall(VecRestoreArray(auglyoo, &o)); 271 /* column scale "off-diagonal" portion of local matrix */ 272 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 273 PetscFunctionReturn(0); 274 } 275