1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 11 PetscErrorCode ierr; 12 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 13 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 14 IS from,to; 15 Vec gvec; 16 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 17 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 18 const PetscInt *sowners; 19 PetscScalar *ptr; 20 21 PetscFunctionBegin; 22 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 23 24 /* For the first stab we make an array as long as the number of columns */ 25 /* mark those columns that are in sbaij->B */ 26 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 27 for (i=0; i<mbs; i++) { 28 for (j=0; j<B->ilen[i]; j++) { 29 if (!indices[aj[B->i[i] + j]]) ec++; 30 indices[aj[B->i[i] + j]] = 1; 31 } 32 } 33 34 /* form arrays of columns we need */ 35 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 36 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 37 38 ec = 0; 39 for (j=0; j<size; j++) { 40 for (i=owners[j]; i<owners[j+1]; i++) { 41 if (indices[i]) { 42 garray[ec] = i; 43 ec_owner[ec] = j; 44 ec++; 45 } 46 } 47 } 48 49 /* make indices now point into garray */ 50 for (i=0; i<ec; i++) indices[garray[i]] = i; 51 52 /* compact out the extra columns in B */ 53 for (i=0; i<mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 55 } 56 B->nbs = ec; 57 ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr); 58 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr); 59 ierr = PetscFree(indices);CHKERRQ(ierr); 60 61 /* create local vector that is used to scatter into */ 62 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 63 64 /* create two temporary index sets for building scatter-gather */ 65 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 66 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 67 for (i=0; i<ec; i++) stmp[i] = i; 68 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 69 70 /* generate the scatter context 71 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefull for some applications */ 72 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 73 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 74 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 75 76 sbaij->garray = garray; 77 78 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 79 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 80 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 81 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 82 83 ierr = ISDestroy(&from);CHKERRQ(ierr); 84 ierr = ISDestroy(&to);CHKERRQ(ierr); 85 86 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 87 lsize = (mbs + ec)*bs; 88 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 89 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 90 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 91 92 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 93 94 /* x index in the IS sfrom */ 95 for (i=0; i<ec; i++) { 96 j = ec_owner[i]; 97 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 98 } 99 /* b index in the IS sfrom */ 100 k = sowners[rank]/bs + mbs; 101 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 102 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 103 104 /* x index in the IS sto */ 105 k = sowners[rank]/bs + mbs; 106 for (i=0; i<ec; i++) stmp[i] = (k + i); 107 /* b index in the IS sto */ 108 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 109 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 111 112 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 113 114 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 115 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 116 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 117 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 118 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 119 120 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 122 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 123 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 126 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 130 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 135 136 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 137 ierr = ISDestroy(&from);CHKERRQ(ierr); 138 ierr = ISDestroy(&to);CHKERRQ(ierr); 139 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* 144 Takes the local part of an already assembled MPISBAIJ matrix 145 and disassembles it. This is to allow new nonzeros into the matrix 146 that require more communication in the matrix vector multiply. 147 Thus certain data-structures must be rebuilt. 148 149 Kind of slow! But that's what application programmers get when 150 they are sloppy. 151 */ 152 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 153 { 154 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 155 Mat B = baij->B,Bnew; 156 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 157 PetscErrorCode ierr; 158 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 159 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 160 MatScalar *a = Bbaij->a; 161 PetscScalar *atmp; 162 #if defined(PETSC_USE_REAL_MAT_SINGLE) 163 PetscInt l; 164 #endif 165 166 PetscFunctionBegin; 167 #if defined(PETSC_USE_REAL_MAT_SINGLE) 168 ierr = PetscMalloc1(A->rmap->bs,&atmp); 169 #endif 170 /* free stuff related to matrix-vec multiply */ 171 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 172 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 173 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 174 175 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 176 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 177 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 178 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 179 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 180 181 if (baij->colmap) { 182 #if defined(PETSC_USE_CTABLE) 183 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 184 #else 185 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 186 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 187 #endif 188 } 189 190 /* make sure that B is assembled so we can access its values */ 191 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 192 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 193 194 /* invent new B and copy stuff over */ 195 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 196 for (i=0; i<mbs; i++) { 197 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 198 } 199 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 200 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 201 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 202 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 203 ierr = PetscFree(nz);CHKERRQ(ierr); 204 205 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 206 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 207 } 208 209 /* 210 Ensure that B's nonzerostate is monotonically increasing. 211 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 212 */ 213 Bnew->nonzerostate = B->nonzerostate; 214 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 215 for (i=0; i<mbs; i++) { 216 rvals[0] = bs*i; 217 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 218 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 219 col = garray[Bbaij->j[j]]*bs; 220 for (k=0; k<bs; k++) { 221 #if defined(PETSC_USE_REAL_MAT_SINGLE) 222 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 223 #else 224 atmp = a+j*bs2 + k*bs; 225 #endif 226 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 227 col++; 228 } 229 } 230 } 231 #if defined(PETSC_USE_REAL_MAT_SINGLE) 232 ierr = PetscFree(atmp);CHKERRQ(ierr); 233 #endif 234 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 235 236 baij->garray = 0; 237 238 ierr = PetscFree(rvals);CHKERRQ(ierr); 239 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = MatDestroy(&B);CHKERRQ(ierr); 241 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 242 243 baij->B = Bnew; 244 245 A->was_assembled = PETSC_FALSE; 246 PetscFunctionReturn(0); 247 } 248