1 /* 2 Support for the parallel SELL matrix vector multiply 3 */ 4 #include <../src/mat/impls/sell/mpi/mpisell.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 /* 8 Takes the local part of an already assembled MPISELL matrix 9 and disassembles it. This is to allow new nonzeros into the matrix 10 that require more communication in the matrix vector multiply. 11 Thus certain data-structures must be rebuilt. 12 13 Kind of slow! But that's what application programmers get when 14 they are sloppy. 15 */ 16 PetscErrorCode MatDisAssemble_MPISELL(Mat A) 17 { 18 Mat_MPISELL *sell=(Mat_MPISELL*)A->data; 19 Mat B=sell->B,Bnew; 20 Mat_SeqSELL *Bsell=(Mat_SeqSELL*)B->data; 21 PetscInt i,j,totalslices,N=A->cmap->N,ec,row; 22 PetscBool isnonzero; 23 24 PetscFunctionBegin; 25 /* free stuff related to matrix-vec multiply */ 26 PetscCall(VecGetSize(sell->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 27 PetscCall(VecDestroy(&sell->lvec)); 28 PetscCall(VecScatterDestroy(&sell->Mvctx)); 29 if (sell->colmap) { 30 #if defined(PETSC_USE_CTABLE) 31 PetscCall(PetscTableDestroy(&sell->colmap)); 32 #else 33 PetscCall(PetscFree(sell->colmap)); 34 PetscCall(PetscLogObjectMemory((PetscObject)A,-sell->B->cmap->n*sizeof(PetscInt))); 35 #endif 36 } 37 38 /* make sure that B is assembled so we can access its values */ 39 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 40 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 41 42 /* invent new B and copy stuff over */ 43 PetscCall(MatCreate(PETSC_COMM_SELF,&Bnew)); 44 PetscCall(MatSetSizes(Bnew,B->rmap->n,N,B->rmap->n,N)); 45 PetscCall(MatSetBlockSizesFromMats(Bnew,A,A)); 46 PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name)); 47 PetscCall(MatSeqSELLSetPreallocation(Bnew,0,Bsell->rlen)); 48 if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 49 ((Mat_SeqSELL*)Bnew->data)->nonew = Bsell->nonew; 50 } 51 52 /* 53 Ensure that B's nonzerostate is monotonically increasing. 54 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 55 */ 56 Bnew->nonzerostate = B->nonzerostate; 57 58 totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* floor(n/8) */ 59 for (i=0; i<totalslices; i++) { /* loop over slices */ 60 for (j=Bsell->sliidx[i],row=0; j<Bsell->sliidx[i+1]; j++,row=((row+1)&0x07)) { 61 isnonzero = (PetscBool)((j-Bsell->sliidx[i])/8 < Bsell->rlen[8*i+row]); 62 if (isnonzero) { 63 PetscCall(MatSetValue(Bnew,8*i+row,sell->garray[Bsell->colidx[j]],Bsell->val[j],B->insertmode)); 64 } 65 } 66 } 67 68 PetscCall(PetscFree(sell->garray)); 69 PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 70 PetscCall(MatDestroy(&B)); 71 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 72 73 sell->B = Bnew; 74 A->was_assembled = PETSC_FALSE; 75 A->assembled = PETSC_FALSE; 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) 80 { 81 Mat_MPISELL *sell=(Mat_MPISELL*)mat->data; 82 Mat_SeqSELL *B=(Mat_SeqSELL*)(sell->B->data); 83 PetscInt i,j,*bcolidx=B->colidx,ec=0,*garray,totalslices; 84 IS from,to; 85 Vec gvec; 86 PetscBool isnonzero; 87 #if defined(PETSC_USE_CTABLE) 88 PetscTable gid1_lid1; 89 PetscTablePosition tpos; 90 PetscInt gid,lid; 91 #else 92 PetscInt N = mat->cmap->N,*indices; 93 #endif 94 95 PetscFunctionBegin; 96 totalslices = sell->B->rmap->n/8+((sell->B->rmap->n & 0x07)?1:0); /* floor(n/8) */ 97 98 /* ec counts the number of columns that contain nonzeros */ 99 #if defined(PETSC_USE_CTABLE) 100 /* use a table */ 101 PetscCall(PetscTableCreate(sell->B->rmap->n,mat->cmap->N+1,&gid1_lid1)); 102 for (i=0; i<totalslices; i++) { /* loop over slices */ 103 for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 104 isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 105 if (isnonzero) { /* check the mask bit */ 106 PetscInt data,gid1 = bcolidx[j] + 1; 107 PetscCall(PetscTableFind(gid1_lid1,gid1,&data)); 108 if (!data) { 109 /* one based table */ 110 PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 111 } 112 } 113 } 114 } 115 116 /* form array of columns we need */ 117 PetscCall(PetscMalloc1(ec,&garray)); 118 PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 119 while (tpos) { 120 PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 121 gid--; 122 lid--; 123 garray[lid] = gid; 124 } 125 PetscCall(PetscSortInt(ec,garray)); /* sort, and rebuild */ 126 PetscCall(PetscTableRemoveAll(gid1_lid1)); 127 for (i=0; i<ec; i++) { 128 PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 129 } 130 131 /* compact out the extra columns in B */ 132 for (i=0; i<totalslices; i++) { /* loop over slices */ 133 for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 134 isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 135 if (isnonzero) { 136 PetscInt gid1 = bcolidx[j] + 1; 137 PetscCall(PetscTableFind(gid1_lid1,gid1,&lid)); 138 lid--; 139 bcolidx[j] = lid; 140 } 141 } 142 } 143 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 144 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B),ec,ec,1,&sell->B->cmap)); 145 PetscCall(PetscTableDestroy(&gid1_lid1)); 146 #else 147 /* Make an array as long as the number of columns */ 148 PetscCall(PetscCalloc1(N,&indices)); 149 /* mark those columns that are in sell->B */ 150 for (i=0; i<totalslices; i++) { /* loop over slices */ 151 for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 152 isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 153 if (isnonzero) { 154 if (!indices[bcolidx[j]]) ec++; 155 indices[bcolidx[j]] = 1; 156 } 157 } 158 } 159 160 /* form array of columns we need */ 161 PetscCall(PetscMalloc1(ec,&garray)); 162 ec = 0; 163 for (i=0; i<N; i++) { 164 if (indices[i]) garray[ec++] = i; 165 } 166 167 /* make indices now point into garray */ 168 for (i=0; i<ec; i++) { 169 indices[garray[i]] = i; 170 } 171 172 /* compact out the extra columns in B */ 173 for (i=0; i<totalslices; i++) { /* loop over slices */ 174 for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 175 isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 176 if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 177 } 178 } 179 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 180 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B),ec,ec,1,&sell->B->cmap)); 181 PetscCall(PetscFree(indices)); 182 #endif 183 /* create local vector that is used to scatter into */ 184 PetscCall(VecCreateSeq(PETSC_COMM_SELF,ec,&sell->lvec)); 185 /* create two temporary Index sets for build scatter gather */ 186 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from)); 187 PetscCall(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to)); 188 189 /* create temporary global vector to generate scatter context */ 190 /* This does not allocate the array's memory so is efficient */ 191 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 192 193 /* generate the scatter context */ 194 PetscCall(VecScatterCreate(gvec,from,sell->lvec,to,&sell->Mvctx)); 195 PetscCall(VecScatterViewFromOptions(sell->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 196 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sell->Mvctx)); 197 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sell->lvec)); 198 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 199 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 200 201 sell->garray = garray; 202 203 PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 204 PetscCall(ISDestroy(&from)); 205 PetscCall(ISDestroy(&to)); 206 PetscCall(VecDestroy(&gvec)); 207 PetscFunctionReturn(0); 208 } 209 210 /* ugly stuff added for Glenn someday we should fix this up */ 211 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 212 static Vec auglydd = NULL,auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 213 214 PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA,Vec scale) 215 { 216 Mat_MPISELL *ina=(Mat_MPISELL*)inA->data; /*access private part of matrix */ 217 PetscInt i,n,nt,cstart,cend,no,*garray=ina->garray,*lindices; 218 PetscInt *r_rmapd,*r_rmapo; 219 220 PetscFunctionBegin; 221 PetscCall(MatGetOwnershipRange(inA,&cstart,&cend)); 222 PetscCall(MatGetSize(ina->A,NULL,&n)); 223 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 224 nt = 0; 225 for (i=0; i<inA->rmap->mapping->n; i++) { 226 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 227 nt++; 228 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 229 } 230 } 231 PetscCheckFalse(nt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n); 232 PetscCall(PetscMalloc1(n+1,&auglyrmapd)); 233 for (i=0; i<inA->rmap->mapping->n; i++) { 234 if (r_rmapd[i]) { 235 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 236 } 237 } 238 PetscCall(PetscFree(r_rmapd)); 239 PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd)); 240 PetscCall(PetscCalloc1(inA->cmap->N+1,&lindices)); 241 for (i=0; i<ina->B->cmap->n; i++) { 242 lindices[garray[i]] = i+1; 243 } 244 no = inA->rmap->mapping->n - nt; 245 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 246 nt = 0; 247 for (i=0; i<inA->rmap->mapping->n; i++) { 248 if (lindices[inA->rmap->mapping->indices[i]]) { 249 nt++; 250 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 251 } 252 } 253 PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 254 PetscCall(PetscFree(lindices)); 255 PetscCall(PetscMalloc1(nt+1,&auglyrmapo)); 256 for (i=0; i<inA->rmap->mapping->n; i++) { 257 if (r_rmapo[i]) { 258 auglyrmapo[(r_rmapo[i]-1)] = i; 259 } 260 } 261 PetscCall(PetscFree(r_rmapo)); 262 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo)); 263 PetscFunctionReturn(0); 264 } 265 266 PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A,Vec scale) 267 { 268 Mat_MPISELL *a=(Mat_MPISELL*)A->data; /*access private part of matrix */ 269 PetscInt n,i; 270 PetscScalar *d,*o; 271 const PetscScalar *s; 272 273 PetscFunctionBegin; 274 if (!auglyrmapd) { 275 PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A,scale)); 276 } 277 PetscCall(VecGetArrayRead(scale,&s)); 278 PetscCall(VecGetLocalSize(auglydd,&n)); 279 PetscCall(VecGetArray(auglydd,&d)); 280 for (i=0; i<n; i++) { 281 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 282 } 283 PetscCall(VecRestoreArray(auglydd,&d)); 284 /* column scale "diagonal" portion of local matrix */ 285 PetscCall(MatDiagonalScale(a->A,NULL,auglydd)); 286 PetscCall(VecGetLocalSize(auglyoo,&n)); 287 PetscCall(VecGetArray(auglyoo,&o)); 288 for (i=0; i<n; i++) { 289 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 290 } 291 PetscCall(VecRestoreArrayRead(scale,&s)); 292 PetscCall(VecRestoreArray(auglyoo,&o)); 293 /* column scale "off-diagonal" portion of local matrix */ 294 PetscCall(MatDiagonalScale(a->B,NULL,auglyoo)); 295 PetscFunctionReturn(0); 296 } 297