1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 11 PetscErrorCode ierr; 12 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 13 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 14 IS from,to; 15 Vec gvec; 16 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 17 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 18 const PetscInt *sowners; 19 PetscScalar *ptr; 20 21 PetscFunctionBegin; 22 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 23 24 /* For the first stab we make an array as long as the number of columns */ 25 /* mark those columns that are in sbaij->B */ 26 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 27 for (i=0; i<mbs; i++) { 28 for (j=0; j<B->ilen[i]; j++) { 29 if (!indices[aj[B->i[i] + j]]) ec++; 30 indices[aj[B->i[i] + j]] = 1; 31 } 32 } 33 34 /* form arrays of columns we need */ 35 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 36 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 37 38 ec = 0; 39 for (j=0; j<size; j++) { 40 for (i=owners[j]; i<owners[j+1]; i++) { 41 if (indices[i]) { 42 garray[ec] = i; 43 ec_owner[ec] = j; 44 ec++; 45 } 46 } 47 } 48 49 /* make indices now point into garray */ 50 for (i=0; i<ec; i++) indices[garray[i]] = i; 51 52 /* compact out the extra columns in B */ 53 for (i=0; i<mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 55 } 56 B->nbs = ec; 57 ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr); 58 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr); 59 ierr = PetscFree(indices);CHKERRQ(ierr); 60 61 /* create local vector that is used to scatter into */ 62 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 63 64 /* create two temporary index sets for building scatter-gather */ 65 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 66 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 67 for (i=0; i<ec; i++) stmp[i] = i; 68 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 69 70 /* generate the scatter context 71 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefull for some applications */ 72 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 73 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 74 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 75 76 sbaij->garray = garray; 77 78 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 79 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 80 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 81 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 82 83 ierr = ISDestroy(&from);CHKERRQ(ierr); 84 ierr = ISDestroy(&to);CHKERRQ(ierr); 85 86 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 87 lsize = (mbs + ec)*bs; 88 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 89 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 90 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 91 92 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 93 94 /* x index in the IS sfrom */ 95 for (i=0; i<ec; i++) { 96 j = ec_owner[i]; 97 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 98 } 99 /* b index in the IS sfrom */ 100 k = sowners[rank]/bs + mbs; 101 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 102 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 103 104 /* x index in the IS sto */ 105 k = sowners[rank]/bs + mbs; 106 for (i=0; i<ec; i++) stmp[i] = (k + i); 107 /* b index in the IS sto */ 108 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 109 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 111 112 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 113 114 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 115 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 116 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 117 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 118 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 119 120 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 122 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 123 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 126 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 130 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 135 136 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 137 ierr = ISDestroy(&from);CHKERRQ(ierr); 138 ierr = ISDestroy(&to);CHKERRQ(ierr); 139 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 144 { 145 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 147 PetscErrorCode ierr; 148 PetscInt i,j,*aj = B->j,ec = 0,*garray; 149 PetscInt bs = mat->rmap->bs,*stmp; 150 IS from,to; 151 Vec gvec; 152 #if defined(PETSC_USE_CTABLE) 153 PetscTable gid1_lid1; 154 PetscTablePosition tpos; 155 PetscInt gid,lid; 156 #else 157 PetscInt Nbs = baij->Nbs,*indices; 158 #endif 159 160 PetscFunctionBegin; 161 #if defined(PETSC_USE_CTABLE) 162 /* use a table - Mark Adams */ 163 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 164 for (i=0; i<B->mbs; i++) { 165 for (j=0; j<B->ilen[i]; j++) { 166 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 167 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 168 if (!data) { 169 /* one based table */ 170 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 171 } 172 } 173 } 174 /* form array of columns we need */ 175 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 176 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 177 while (tpos) { 178 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 179 gid--; lid--; 180 garray[lid] = gid; 181 } 182 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 183 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 184 for (i=0; i<ec; i++) { 185 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 186 } 187 /* compact out the extra columns in B */ 188 for (i=0; i<B->mbs; i++) { 189 for (j=0; j<B->ilen[i]; j++) { 190 PetscInt gid1 = aj[B->i[i] + j] + 1; 191 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 192 lid--; 193 aj[B->i[i]+j] = lid; 194 } 195 } 196 B->nbs = ec; 197 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr); 198 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr); 199 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 200 #else 201 /* For the first stab we make an array as long as the number of columns */ 202 /* mark those columns that are in baij->B */ 203 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 204 for (i=0; i<B->mbs; i++) { 205 for (j=0; j<B->ilen[i]; j++) { 206 if (!indices[aj[B->i[i] + j]]) ec++; 207 indices[aj[B->i[i] + j]] = 1; 208 } 209 } 210 211 /* form array of columns we need */ 212 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 213 ec = 0; 214 for (i=0; i<Nbs; i++) { 215 if (indices[i]) { 216 garray[ec++] = i; 217 } 218 } 219 220 /* make indices now point into garray */ 221 for (i=0; i<ec; i++) indices[garray[i]] = i; 222 223 /* compact out the extra columns in B */ 224 for (i=0; i<B->mbs; i++) { 225 for (j=0; j<B->ilen[i]; j++) { 226 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 227 } 228 } 229 B->nbs = ec; 230 231 baij->B->cmap->n = ec*mat->rmap->bs; 232 233 ierr = PetscFree(indices);CHKERRQ(ierr); 234 #endif 235 236 /* create local vector that is used to scatter into */ 237 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 238 239 /* create two temporary index sets for building scatter-gather */ 240 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 241 242 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 243 for (i=0; i<ec; i++) stmp[i] = i; 244 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 245 246 /* create temporary global vector to generate scatter context */ 247 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 248 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 249 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 250 251 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 252 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 253 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 255 256 baij->garray = garray; 257 258 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 259 ierr = ISDestroy(&from);CHKERRQ(ierr); 260 ierr = ISDestroy(&to);CHKERRQ(ierr); 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 Takes the local part of an already assembled MPISBAIJ matrix 266 and disassembles it. This is to allow new nonzeros into the matrix 267 that require more communication in the matrix vector multiply. 268 Thus certain data-structures must be rebuilt. 269 270 Kind of slow! But that's what application programmers get when 271 they are sloppy. 272 */ 273 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 274 { 275 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 276 Mat B = baij->B,Bnew; 277 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 280 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 281 MatScalar *a = Bbaij->a; 282 PetscScalar *atmp; 283 #if defined(PETSC_USE_REAL_MAT_SINGLE) 284 PetscInt l; 285 #endif 286 287 PetscFunctionBegin; 288 #if defined(PETSC_USE_REAL_MAT_SINGLE) 289 ierr = PetscMalloc1(A->rmap->bs,&atmp); 290 #endif 291 /* free stuff related to matrix-vec multiply */ 292 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 293 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 294 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 295 296 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 297 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 298 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 299 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 300 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 301 302 if (baij->colmap) { 303 #if defined(PETSC_USE_CTABLE) 304 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 305 #else 306 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 307 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 308 #endif 309 } 310 311 /* make sure that B is assembled so we can access its values */ 312 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 313 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 314 315 /* invent new B and copy stuff over */ 316 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 317 for (i=0; i<mbs; i++) { 318 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 319 } 320 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 321 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 322 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 323 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 324 ierr = PetscFree(nz);CHKERRQ(ierr); 325 326 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 327 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 328 } 329 330 /* 331 Ensure that B's nonzerostate is monotonically increasing. 332 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 333 */ 334 Bnew->nonzerostate = B->nonzerostate; 335 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 336 for (i=0; i<mbs; i++) { 337 rvals[0] = bs*i; 338 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 339 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 340 col = garray[Bbaij->j[j]]*bs; 341 for (k=0; k<bs; k++) { 342 #if defined(PETSC_USE_REAL_MAT_SINGLE) 343 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 344 #else 345 atmp = a+j*bs2 + k*bs; 346 #endif 347 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 348 col++; 349 } 350 } 351 } 352 #if defined(PETSC_USE_REAL_MAT_SINGLE) 353 ierr = PetscFree(atmp);CHKERRQ(ierr); 354 #endif 355 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 356 357 baij->garray = 0; 358 359 ierr = PetscFree(rvals);CHKERRQ(ierr); 360 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 361 ierr = MatDestroy(&B);CHKERRQ(ierr); 362 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 363 364 baij->B = Bnew; 365 366 A->was_assembled = PETSC_FALSE; 367 PetscFunctionReturn(0); 368 } 369