1d4002b98SHong Zhang /* 2d4002b98SHong Zhang Support for the parallel SELL matrix vector multiply 3d4002b98SHong Zhang */ 4d4002b98SHong Zhang #include <../src/mat/impls/sell/mpi/mpisell.h> 5d4002b98SHong Zhang #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6d4002b98SHong Zhang 7d4002b98SHong Zhang /* 8d4002b98SHong Zhang Takes the local part of an already assembled MPISELL matrix 9d4002b98SHong Zhang and disassembles it. This is to allow new nonzeros into the matrix 10d4002b98SHong Zhang that require more communication in the matrix vector multiply. 11d4002b98SHong Zhang Thus certain data-structures must be rebuilt. 12d4002b98SHong Zhang 13d4002b98SHong Zhang Kind of slow! But that's what application programmers get when 14d4002b98SHong Zhang they are sloppy. 15d4002b98SHong Zhang */ 16d4002b98SHong Zhang PetscErrorCode MatDisAssemble_MPISELL(Mat A) 17d4002b98SHong Zhang { 18d4002b98SHong Zhang Mat_MPISELL *sell=(Mat_MPISELL*)A->data; 19d4002b98SHong Zhang Mat B=sell->B,Bnew; 20d4002b98SHong Zhang Mat_SeqSELL *Bsell=(Mat_SeqSELL*)B->data; 21d4002b98SHong Zhang PetscInt i,j,totalslices,N=A->cmap->N,ec,row; 22d4002b98SHong Zhang PetscBool isnonzero; 23d4002b98SHong Zhang 24d4002b98SHong Zhang PetscFunctionBegin; 25d4002b98SHong Zhang /* free stuff related to matrix-vec multiply */ 269566063dSJacob Faibussowitsch PetscCall(VecGetSize(sell->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 279566063dSJacob Faibussowitsch PetscCall(VecDestroy(&sell->lvec)); 289566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&sell->Mvctx)); 29d4002b98SHong Zhang if (sell->colmap) { 30d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 319566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&sell->colmap)); 32d4002b98SHong Zhang #else 339566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->colmap)); 349566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,-sell->B->cmap->n*sizeof(PetscInt))); 35d4002b98SHong Zhang #endif 36d4002b98SHong Zhang } 37d4002b98SHong Zhang 38d4002b98SHong Zhang /* make sure that B is assembled so we can access its values */ 399566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 409566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 41d4002b98SHong Zhang 42d4002b98SHong Zhang /* invent new B and copy stuff over */ 439566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_SELF,&Bnew)); 449566063dSJacob Faibussowitsch PetscCall(MatSetSizes(Bnew,B->rmap->n,N,B->rmap->n,N)); 459566063dSJacob Faibussowitsch PetscCall(MatSetBlockSizesFromMats(Bnew,A,A)); 469566063dSJacob Faibussowitsch PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name)); 479566063dSJacob Faibussowitsch PetscCall(MatSeqSELLSetPreallocation(Bnew,0,Bsell->rlen)); 48b38c15b3SStefano Zampini if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 49b38c15b3SStefano Zampini ((Mat_SeqSELL*)Bnew->data)->nonew = Bsell->nonew; 50b38c15b3SStefano Zampini } 51d4002b98SHong Zhang 52d4002b98SHong Zhang /* 53d4002b98SHong Zhang Ensure that B's nonzerostate is monotonically increasing. 54d4002b98SHong Zhang Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 55d4002b98SHong Zhang */ 56d4002b98SHong Zhang Bnew->nonzerostate = B->nonzerostate; 57d4002b98SHong Zhang 58d4002b98SHong Zhang totalslices = B->rmap->n/8+((B->rmap->n & 0x07)?1:0); /* floor(n/8) */ 59d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 60d4002b98SHong Zhang for (j=Bsell->sliidx[i],row=0; j<Bsell->sliidx[i+1]; j++,row=((row+1)&0x07)) { 61d4002b98SHong Zhang isnonzero = (PetscBool)((j-Bsell->sliidx[i])/8 < Bsell->rlen[8*i+row]); 62d4002b98SHong Zhang if (isnonzero) { 639566063dSJacob Faibussowitsch PetscCall(MatSetValue(Bnew,8*i+row,sell->garray[Bsell->colidx[j]],Bsell->val[j],B->insertmode)); 64d4002b98SHong Zhang } 65d4002b98SHong Zhang } 66d4002b98SHong Zhang } 67d4002b98SHong Zhang 689566063dSJacob Faibussowitsch PetscCall(PetscFree(sell->garray)); 699566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 709566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B)); 719566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 72d4002b98SHong Zhang 73d4002b98SHong Zhang sell->B = Bnew; 74d4002b98SHong Zhang A->was_assembled = PETSC_FALSE; 75d4002b98SHong Zhang A->assembled = PETSC_FALSE; 76d4002b98SHong Zhang PetscFunctionReturn(0); 77d4002b98SHong Zhang } 78d4002b98SHong Zhang 79d4002b98SHong Zhang PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) 80d4002b98SHong Zhang { 81d4002b98SHong Zhang Mat_MPISELL *sell=(Mat_MPISELL*)mat->data; 82d4002b98SHong Zhang Mat_SeqSELL *B=(Mat_SeqSELL*)(sell->B->data); 83d4002b98SHong Zhang PetscInt i,j,*bcolidx=B->colidx,ec=0,*garray,totalslices; 84d4002b98SHong Zhang IS from,to; 85d4002b98SHong Zhang Vec gvec; 86d4002b98SHong Zhang PetscBool isnonzero; 87d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 88d4002b98SHong Zhang PetscTable gid1_lid1; 89d4002b98SHong Zhang PetscTablePosition tpos; 90d4002b98SHong Zhang PetscInt gid,lid; 91d4002b98SHong Zhang #else 92d4002b98SHong Zhang PetscInt N = mat->cmap->N,*indices; 93d4002b98SHong Zhang #endif 94d4002b98SHong Zhang 95d4002b98SHong Zhang PetscFunctionBegin; 96d4002b98SHong Zhang totalslices = sell->B->rmap->n/8+((sell->B->rmap->n & 0x07)?1:0); /* floor(n/8) */ 97d4002b98SHong Zhang 98d4002b98SHong Zhang /* ec counts the number of columns that contain nonzeros */ 99d4002b98SHong Zhang #if defined(PETSC_USE_CTABLE) 100d4002b98SHong Zhang /* use a table */ 1019566063dSJacob Faibussowitsch PetscCall(PetscTableCreate(sell->B->rmap->n,mat->cmap->N+1,&gid1_lid1)); 102d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 103d4002b98SHong Zhang for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 104d4002b98SHong Zhang isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 105d4002b98SHong Zhang if (isnonzero) { /* check the mask bit */ 106d4002b98SHong Zhang PetscInt data,gid1 = bcolidx[j] + 1; 1079566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1,gid1,&data)); 108d4002b98SHong Zhang if (!data) { 109d4002b98SHong Zhang /* one based table */ 1109566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 111d4002b98SHong Zhang } 112d4002b98SHong Zhang } 113d4002b98SHong Zhang } 114d4002b98SHong Zhang } 115d4002b98SHong Zhang 116d4002b98SHong Zhang /* form array of columns we need */ 1179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec,&garray)); 1189566063dSJacob Faibussowitsch PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 119d4002b98SHong Zhang while (tpos) { 1209566063dSJacob Faibussowitsch PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 121d4002b98SHong Zhang gid--; 122d4002b98SHong Zhang lid--; 123d4002b98SHong Zhang garray[lid] = gid; 124d4002b98SHong Zhang } 1259566063dSJacob Faibussowitsch PetscCall(PetscSortInt(ec,garray)); /* sort, and rebuild */ 1269566063dSJacob Faibussowitsch PetscCall(PetscTableRemoveAll(gid1_lid1)); 127d4002b98SHong Zhang for (i=0; i<ec; i++) { 1289566063dSJacob Faibussowitsch PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 129d4002b98SHong Zhang } 130d4002b98SHong Zhang 131d4002b98SHong Zhang /* compact out the extra columns in B */ 132d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 133d4002b98SHong Zhang for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 134d4002b98SHong Zhang isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 135d4002b98SHong Zhang if (isnonzero) { 136d4002b98SHong Zhang PetscInt gid1 = bcolidx[j] + 1; 1379566063dSJacob Faibussowitsch PetscCall(PetscTableFind(gid1_lid1,gid1,&lid)); 138d4002b98SHong Zhang lid--; 139d4002b98SHong Zhang bcolidx[j] = lid; 140d4002b98SHong Zhang } 141d4002b98SHong Zhang } 142d4002b98SHong Zhang } 1439566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1449566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B),ec,ec,1,&sell->B->cmap)); 1459566063dSJacob Faibussowitsch PetscCall(PetscTableDestroy(&gid1_lid1)); 146d4002b98SHong Zhang #else 147d4002b98SHong Zhang /* Make an array as long as the number of columns */ 1489566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(N,&indices)); 149d4002b98SHong Zhang /* mark those columns that are in sell->B */ 150d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 151d4002b98SHong Zhang for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 152d4002b98SHong Zhang isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 153d4002b98SHong Zhang if (isnonzero) { 154d4002b98SHong Zhang if (!indices[bcolidx[j]]) ec++; 155d4002b98SHong Zhang indices[bcolidx[j]] = 1; 156d4002b98SHong Zhang } 157d4002b98SHong Zhang } 158d4002b98SHong Zhang } 159d4002b98SHong Zhang 160d4002b98SHong Zhang /* form array of columns we need */ 1619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ec,&garray)); 162d4002b98SHong Zhang ec = 0; 163d4002b98SHong Zhang for (i=0; i<N; i++) { 164d4002b98SHong Zhang if (indices[i]) garray[ec++] = i; 165d4002b98SHong Zhang } 166d4002b98SHong Zhang 167d4002b98SHong Zhang /* make indices now point into garray */ 168d4002b98SHong Zhang for (i=0; i<ec; i++) { 169d4002b98SHong Zhang indices[garray[i]] = i; 170d4002b98SHong Zhang } 171d4002b98SHong Zhang 172d4002b98SHong Zhang /* compact out the extra columns in B */ 173d4002b98SHong Zhang for (i=0; i<totalslices; i++) { /* loop over slices */ 174d4002b98SHong Zhang for (j=B->sliidx[i]; j<B->sliidx[i+1]; j++) { 175d4002b98SHong Zhang isnonzero = (PetscBool)((j-B->sliidx[i])/8 < B->rlen[(i<<3)+(j&0x07)]); 176d4002b98SHong Zhang if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 177d4002b98SHong Zhang } 178d4002b98SHong Zhang } 1799566063dSJacob Faibussowitsch PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 1809566063dSJacob Faibussowitsch PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B),ec,ec,1,&sell->B->cmap)); 1819566063dSJacob Faibussowitsch PetscCall(PetscFree(indices)); 182d4002b98SHong Zhang #endif 183d4002b98SHong Zhang /* create local vector that is used to scatter into */ 1849566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,ec,&sell->lvec)); 185d4002b98SHong Zhang /* create two temporary Index sets for build scatter gather */ 1869566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from)); 1879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to)); 188d4002b98SHong Zhang 189d4002b98SHong Zhang /* create temporary global vector to generate scatter context */ 190d4002b98SHong Zhang /* This does not allocate the array's memory so is efficient */ 1919566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 192d4002b98SHong Zhang 193d4002b98SHong Zhang /* generate the scatter context */ 1949566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(gvec,from,sell->lvec,to,&sell->Mvctx)); 1959566063dSJacob Faibussowitsch PetscCall(VecScatterViewFromOptions(sell->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 1969566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sell->Mvctx)); 1979566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)sell->lvec)); 1989566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 1999566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 200d4002b98SHong Zhang 201d4002b98SHong Zhang sell->garray = garray; 202d4002b98SHong Zhang 2039566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 2049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&from)); 2059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&to)); 2069566063dSJacob Faibussowitsch PetscCall(VecDestroy(&gvec)); 207d4002b98SHong Zhang PetscFunctionReturn(0); 208d4002b98SHong Zhang } 209d4002b98SHong Zhang 210d4002b98SHong Zhang /* ugly stuff added for Glenn someday we should fix this up */ 211f4259b30SLisandro Dalcin static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 212f4259b30SLisandro Dalcin static Vec auglydd = NULL,auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 213d4002b98SHong Zhang 214d4002b98SHong Zhang PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA,Vec scale) 215d4002b98SHong Zhang { 216d4002b98SHong Zhang Mat_MPISELL *ina=(Mat_MPISELL*)inA->data; /*access private part of matrix */ 217d4002b98SHong Zhang PetscInt i,n,nt,cstart,cend,no,*garray=ina->garray,*lindices; 218d4002b98SHong Zhang PetscInt *r_rmapd,*r_rmapo; 219d4002b98SHong Zhang 220d4002b98SHong Zhang PetscFunctionBegin; 2219566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(inA,&cstart,&cend)); 2229566063dSJacob Faibussowitsch PetscCall(MatGetSize(ina->A,NULL,&n)); 2239566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 224d4002b98SHong Zhang nt = 0; 225d4002b98SHong Zhang for (i=0; i<inA->rmap->mapping->n; i++) { 226d4002b98SHong Zhang if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 227d4002b98SHong Zhang nt++; 228d4002b98SHong Zhang r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 229d4002b98SHong Zhang } 230d4002b98SHong Zhang } 231*08401ef6SPierre Jolivet PetscCheck(nt == n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n); 2329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n+1,&auglyrmapd)); 233d4002b98SHong Zhang for (i=0; i<inA->rmap->mapping->n; i++) { 234d4002b98SHong Zhang if (r_rmapd[i]) { 235d4002b98SHong Zhang auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 236d4002b98SHong Zhang } 237d4002b98SHong Zhang } 2389566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapd)); 2399566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd)); 2409566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->cmap->N+1,&lindices)); 241d4002b98SHong Zhang for (i=0; i<ina->B->cmap->n; i++) { 242d4002b98SHong Zhang lindices[garray[i]] = i+1; 243d4002b98SHong Zhang } 244d4002b98SHong Zhang no = inA->rmap->mapping->n - nt; 2459566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 246d4002b98SHong Zhang nt = 0; 247d4002b98SHong Zhang for (i=0; i<inA->rmap->mapping->n; i++) { 248d4002b98SHong Zhang if (lindices[inA->rmap->mapping->indices[i]]) { 249d4002b98SHong Zhang nt++; 250d4002b98SHong Zhang r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 251d4002b98SHong Zhang } 252d4002b98SHong Zhang } 253*08401ef6SPierre Jolivet PetscCheck(nt <= no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 2549566063dSJacob Faibussowitsch PetscCall(PetscFree(lindices)); 2559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nt+1,&auglyrmapo)); 256d4002b98SHong Zhang for (i=0; i<inA->rmap->mapping->n; i++) { 257d4002b98SHong Zhang if (r_rmapo[i]) { 258d4002b98SHong Zhang auglyrmapo[(r_rmapo[i]-1)] = i; 259d4002b98SHong Zhang } 260d4002b98SHong Zhang } 2619566063dSJacob Faibussowitsch PetscCall(PetscFree(r_rmapo)); 2629566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo)); 263d4002b98SHong Zhang PetscFunctionReturn(0); 264d4002b98SHong Zhang } 265d4002b98SHong Zhang 266d4002b98SHong Zhang PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A,Vec scale) 267d4002b98SHong Zhang { 268d4002b98SHong Zhang Mat_MPISELL *a=(Mat_MPISELL*)A->data; /*access private part of matrix */ 269d4002b98SHong Zhang PetscInt n,i; 270d4002b98SHong Zhang PetscScalar *d,*o; 271d4002b98SHong Zhang const PetscScalar *s; 272d4002b98SHong Zhang 273d4002b98SHong Zhang PetscFunctionBegin; 274d4002b98SHong Zhang if (!auglyrmapd) { 2759566063dSJacob Faibussowitsch PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A,scale)); 276d4002b98SHong Zhang } 2779566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(scale,&s)); 2789566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglydd,&n)); 2799566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglydd,&d)); 280d4002b98SHong Zhang for (i=0; i<n; i++) { 281d4002b98SHong Zhang d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 282d4002b98SHong Zhang } 2839566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglydd,&d)); 284d4002b98SHong Zhang /* column scale "diagonal" portion of local matrix */ 2859566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->A,NULL,auglydd)); 2869566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(auglyoo,&n)); 2879566063dSJacob Faibussowitsch PetscCall(VecGetArray(auglyoo,&o)); 288d4002b98SHong Zhang for (i=0; i<n; i++) { 289d4002b98SHong Zhang o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 290d4002b98SHong Zhang } 2919566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(scale,&s)); 2929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(auglyoo,&o)); 293d4002b98SHong Zhang /* column scale "off-diagonal" portion of local matrix */ 2949566063dSJacob Faibussowitsch PetscCall(MatDiagonalScale(a->B,NULL,auglyoo)); 295d4002b98SHong Zhang PetscFunctionReturn(0); 296d4002b98SHong Zhang } 297