1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 11 PetscInt Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray; 12 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 13 IS from,to; 14 Vec gvec; 15 PetscMPIInt rank = sbaij->rank,lsize; 16 PetscInt *owners = sbaij->rangebs,*ec_owner,k; 17 const PetscInt *sowners; 18 PetscScalar *ptr; 19 #if defined(PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; /* one-based gid to lid table */ 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt *indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 CHKERRQ(PetscTableCreate(mbs,Nbs+1,&gid1_lid1)); 30 for (i=0; i<B->mbs; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 33 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data)); 34 if (!data) CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 35 } 36 } 37 /* form array of columns we need */ 38 CHKERRQ(PetscMalloc1(ec,&garray)); 39 CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 40 while (tpos) { 41 CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 42 gid--; lid--; 43 garray[lid] = gid; 44 } 45 CHKERRQ(PetscSortInt(ec,garray)); 46 CHKERRQ(PetscTableRemoveAll(gid1_lid1)); 47 for (i=0; i<ec; i++) CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 48 /* compact out the extra columns in B */ 49 for (i=0; i<B->mbs; i++) { 50 for (j=0; j<B->ilen[i]; j++) { 51 PetscInt gid1 = aj[B->i[i] + j] + 1; 52 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid)); 53 lid--; 54 aj[B->i[i]+j] = lid; 55 } 56 } 57 CHKERRQ(PetscTableDestroy(&gid1_lid1)); 58 CHKERRQ(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner)); 59 for (i=j=0; i<ec; i++) { 60 while (garray[i]>=owners[j+1]) j++; 61 ec_owner[i] = j; 62 } 63 #else 64 /* For the first stab we make an array as long as the number of columns */ 65 /* mark those columns that are in sbaij->B */ 66 CHKERRQ(PetscCalloc1(Nbs,&indices)); 67 for (i=0; i<mbs; i++) { 68 for (j=0; j<B->ilen[i]; j++) { 69 if (!indices[aj[B->i[i] + j]]) ec++; 70 indices[aj[B->i[i] + j]] = 1; 71 } 72 } 73 74 /* form arrays of columns we need */ 75 CHKERRQ(PetscMalloc1(ec,&garray)); 76 CHKERRQ(PetscMalloc2(2*ec,&sgarray,ec,&ec_owner)); 77 78 ec = 0; 79 for (j=0; j<sbaij->size; j++) { 80 for (i=owners[j]; i<owners[j+1]; i++) { 81 if (indices[i]) { 82 garray[ec] = i; 83 ec_owner[ec] = j; 84 ec++; 85 } 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i=0; i<ec; i++) indices[garray[i]] = i; 91 92 /* compact out the extra columns in B */ 93 for (i=0; i<mbs; i++) { 94 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 CHKERRQ(PetscFree(indices)); 97 #endif 98 B->nbs = ec; 99 CHKERRQ(PetscLayoutDestroy(&sbaij->B->cmap)); 100 CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap)); 101 102 CHKERRQ(VecScatterDestroy(&sbaij->sMvctx)); 103 /* create local vector that is used to scatter into */ 104 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec)); 105 106 /* create two temporary index sets for building scatter-gather */ 107 CHKERRQ(PetscMalloc1(2*ec,&stmp)); 108 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from)); 109 for (i=0; i<ec; i++) stmp[i] = i; 110 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to)); 111 112 /* generate the scatter context 113 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 114 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 115 CHKERRQ(VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx)); 116 CHKERRQ(VecDestroy(&gvec)); 117 118 sbaij->garray = garray; 119 120 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx)); 121 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec)); 122 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 123 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 124 125 CHKERRQ(ISDestroy(&from)); 126 CHKERRQ(ISDestroy(&to)); 127 128 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 129 lsize = (mbs + ec)*bs; 130 CHKERRQ(VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0)); 131 CHKERRQ(VecDuplicate(sbaij->slvec0,&sbaij->slvec1)); 132 CHKERRQ(VecGetSize(sbaij->slvec0,&vec_size)); 133 134 CHKERRQ(VecGetOwnershipRanges(sbaij->slvec0,&sowners)); 135 136 /* x index in the IS sfrom */ 137 for (i=0; i<ec; i++) { 138 j = ec_owner[i]; 139 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 140 } 141 /* b index in the IS sfrom */ 142 k = sowners[rank]/bs + mbs; 143 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 144 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from)); 145 146 /* x index in the IS sto */ 147 k = sowners[rank]/bs + mbs; 148 for (i=0; i<ec; i++) stmp[i] = (k + i); 149 /* b index in the IS sto */ 150 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 151 152 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to)); 153 154 CHKERRQ(VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx)); 155 CHKERRQ(VecScatterViewFromOptions(sbaij->sMvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 156 157 CHKERRQ(VecGetLocalSize(sbaij->slvec1,&nt)); 158 CHKERRQ(VecGetArray(sbaij->slvec1,&ptr)); 159 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a)); 160 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b)); 161 CHKERRQ(VecRestoreArray(sbaij->slvec1,&ptr)); 162 163 CHKERRQ(VecGetArray(sbaij->slvec0,&ptr)); 164 CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b)); 165 CHKERRQ(VecRestoreArray(sbaij->slvec0,&ptr)); 166 167 CHKERRQ(PetscFree(stmp)); 168 169 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx)); 170 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0)); 171 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1)); 172 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b)); 173 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a)); 174 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b)); 175 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 176 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 177 178 CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 179 CHKERRQ(ISDestroy(&from)); 180 CHKERRQ(ISDestroy(&to)); 181 CHKERRQ(PetscFree2(sgarray,ec_owner)); 182 PetscFunctionReturn(0); 183 } 184 185 /* 186 Takes the local part of an already assembled MPISBAIJ matrix 187 and disassembles it. This is to allow new nonzeros into the matrix 188 that require more communication in the matrix vector multiply. 189 Thus certain data-structures must be rebuilt. 190 191 Kind of slow! But that's what application programmers get when 192 they are sloppy. 193 */ 194 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 195 { 196 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 197 Mat B = baij->B,Bnew; 198 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 199 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 200 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 201 MatScalar *a = Bbaij->a; 202 PetscScalar *atmp; 203 #if defined(PETSC_USE_REAL_MAT_SINGLE) 204 PetscInt l; 205 #endif 206 207 PetscFunctionBegin; 208 #if defined(PETSC_USE_REAL_MAT_SINGLE) 209 CHKERRQ(PetscMalloc1(A->rmap->bs,&atmp)); 210 #endif 211 /* free stuff related to matrix-vec multiply */ 212 CHKERRQ(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 213 CHKERRQ(VecDestroy(&baij->lvec)); 214 CHKERRQ(VecScatterDestroy(&baij->Mvctx)); 215 216 CHKERRQ(VecDestroy(&baij->slvec0)); 217 CHKERRQ(VecDestroy(&baij->slvec0b)); 218 CHKERRQ(VecDestroy(&baij->slvec1)); 219 CHKERRQ(VecDestroy(&baij->slvec1a)); 220 CHKERRQ(VecDestroy(&baij->slvec1b)); 221 222 if (baij->colmap) { 223 #if defined(PETSC_USE_CTABLE) 224 CHKERRQ(PetscTableDestroy(&baij->colmap)); 225 #else 226 CHKERRQ(PetscFree(baij->colmap)); 227 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt))); 228 #endif 229 } 230 231 /* make sure that B is assembled so we can access its values */ 232 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 233 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 234 235 /* invent new B and copy stuff over */ 236 CHKERRQ(PetscMalloc1(mbs,&nz)); 237 for (i=0; i<mbs; i++) { 238 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 239 } 240 CHKERRQ(MatCreate(PETSC_COMM_SELF,&Bnew)); 241 CHKERRQ(MatSetSizes(Bnew,m,n,m,n)); 242 CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name)); 243 CHKERRQ(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz)); 244 CHKERRQ(PetscFree(nz)); 245 246 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 247 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 248 } 249 250 /* 251 Ensure that B's nonzerostate is monotonically increasing. 252 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 253 */ 254 Bnew->nonzerostate = B->nonzerostate; 255 CHKERRQ(PetscMalloc1(bs,&rvals)); 256 for (i=0; i<mbs; i++) { 257 rvals[0] = bs*i; 258 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 259 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 260 col = garray[Bbaij->j[j]]*bs; 261 for (k=0; k<bs; k++) { 262 #if defined(PETSC_USE_REAL_MAT_SINGLE) 263 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 264 #else 265 atmp = a+j*bs2 + k*bs; 266 #endif 267 CHKERRQ(MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode)); 268 col++; 269 } 270 } 271 } 272 #if defined(PETSC_USE_REAL_MAT_SINGLE) 273 CHKERRQ(PetscFree(atmp)); 274 #endif 275 CHKERRQ(PetscFree(baij->garray)); 276 277 baij->garray = NULL; 278 279 CHKERRQ(PetscFree(rvals)); 280 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 281 CHKERRQ(MatDestroy(&B)); 282 CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 283 284 baij->B = Bnew; 285 286 A->was_assembled = PETSC_FALSE; 287 PetscFunctionReturn(0); 288 } 289