1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rowners,*sowners,*ec_owner,k; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->sMvctx) { 28 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 29 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 30 sbaij->sMvctx = 0; 31 } 32 33 /* For the first stab we make an array as long as the number of columns */ 34 /* mark those columns that are in sbaij->B */ 35 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 36 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 37 for (i=0; i<mbs; i++) { 38 for (j=0; j<B->ilen[i]; j++) { 39 if (!indices[aj[B->i[i] + j]]) ec++; 40 indices[aj[B->i[i] + j] ] = 1; 41 } 42 } 43 44 /* form arrays of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr); 47 ec_owner = sgarray + 2*ec; 48 49 ec = 0; 50 for (j=0; j<size; j++){ 51 for (i=owners[j]; i<owners[j+1]; i++){ 52 if (indices[i]) { 53 garray[ec] = i; 54 ec_owner[ec] = j; 55 ec++; 56 } 57 } 58 } 59 60 /* make indices now point into garray */ 61 for (i=0; i<ec; i++) indices[garray[i]] = i; 62 63 /* compact out the extra columns in B */ 64 for (i=0; i<mbs; i++) { 65 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 66 } 67 B->nbs = ec; 68 sbaij->B->n = ec*mat->bs; 69 ierr = PetscFree(indices);CHKERRQ(ierr); 70 71 /* create local vector that is used to scatter into */ 72 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 73 74 /* create two temporary index sets for building scatter-gather */ 75 ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 76 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 77 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 78 79 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 81 82 /* generate the scatter context 83 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 84 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 85 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 86 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 87 88 sbaij->garray = garray; 89 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 91 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 92 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 93 94 ierr = ISDestroy(from);CHKERRQ(ierr); 95 ierr = ISDestroy(to);CHKERRQ(ierr); 96 97 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 98 lsize = (mbs + ec)*bs; 99 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 100 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 101 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 102 103 sowners = sbaij->slvec0->map.range; 104 105 /* x index in the IS sfrom */ 106 for (i=0; i<ec; i++) { 107 j = ec_owner[i]; 108 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 109 } 110 /* b index in the IS sfrom */ 111 k = sowners[rank]/bs + mbs; 112 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 113 114 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 116 117 /* x index in the IS sto */ 118 k = sowners[rank]/bs + mbs; 119 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 120 /* b index in the IS sto */ 121 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 122 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 124 125 /* gnerate the SBAIJ scatter context */ 126 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 127 128 /* 129 Post the receives for the first matrix vector product. We sync-chronize after 130 this on the chance that the user immediately calls MatMult() after assemblying 131 the matrix. 132 */ 133 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 134 135 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 136 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 137 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 138 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 139 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 140 141 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 143 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 144 145 ierr = PetscFree(stmp);CHKERRQ(ierr); 146 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 147 148 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 149 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 150 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 151 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 152 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 155 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 156 157 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 158 ierr = ISDestroy(from);CHKERRQ(ierr); 159 ierr = ISDestroy(to);CHKERRQ(ierr); 160 ierr = VecDestroy(gvec);CHKERRQ(ierr); 161 ierr = PetscFree(sgarray);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 #undef __FUNCT__ 166 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 167 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 168 { 169 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 170 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 171 PetscErrorCode ierr; 172 PetscInt i,j,*aj = B->j,ec = 0,*garray; 173 PetscInt bs = mat->bs,*stmp; 174 IS from,to; 175 Vec gvec; 176 #if defined (PETSC_USE_CTABLE) 177 PetscTable gid1_lid1; 178 PetscTablePosition tpos; 179 PetscInt gid,lid; 180 #else 181 PetscInt Nbs = baij->Nbs,*indices; 182 #endif 183 184 PetscFunctionBegin; 185 #if defined (PETSC_USE_CTABLE) 186 /* use a table - Mark Adams */ 187 PetscTableCreate(B->mbs,&gid1_lid1); 188 for (i=0; i<B->mbs; i++) { 189 for (j=0; j<B->ilen[i]; j++) { 190 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 191 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 192 if (!data) { 193 /* one based table */ 194 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 195 } 196 } 197 } 198 /* form array of columns we need */ 199 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 200 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 201 while (tpos) { 202 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 203 gid--; lid--; 204 garray[lid] = gid; 205 } 206 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 207 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 208 for (i=0; i<ec; i++) { 209 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 210 } 211 /* compact out the extra columns in B */ 212 for (i=0; i<B->mbs; i++) { 213 for (j=0; j<B->ilen[i]; j++) { 214 PetscInt gid1 = aj[B->i[i] + j] + 1; 215 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 216 lid --; 217 aj[B->i[i]+j] = lid; 218 } 219 } 220 B->nbs = ec; 221 baij->B->n = ec*mat->bs; 222 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 223 /* Mark Adams */ 224 #else 225 /* For the first stab we make an array as long as the number of columns */ 226 /* mark those columns that are in baij->B */ 227 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 228 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 229 for (i=0; i<B->mbs; i++) { 230 for (j=0; j<B->ilen[i]; j++) { 231 if (!indices[aj[B->i[i] + j]]) ec++; 232 indices[aj[B->i[i] + j] ] = 1; 233 } 234 } 235 236 /* form array of columns we need */ 237 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 238 ec = 0; 239 for (i=0; i<Nbs; i++) { 240 if (indices[i]) { 241 garray[ec++] = i; 242 } 243 } 244 245 /* make indices now point into garray */ 246 for (i=0; i<ec; i++) { 247 indices[garray[i]] = i; 248 } 249 250 /* compact out the extra columns in B */ 251 for (i=0; i<B->mbs; i++) { 252 for (j=0; j<B->ilen[i]; j++) { 253 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 254 } 255 } 256 B->nbs = ec; 257 baij->B->n = ec*mat->bs; 258 ierr = PetscFree(indices);CHKERRQ(ierr); 259 #endif 260 261 /* create local vector that is used to scatter into */ 262 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 263 264 /* create two temporary index sets for building scatter-gather */ 265 for (i=0; i<ec; i++) { 266 garray[i] = bs*garray[i]; 267 } 268 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 269 for (i=0; i<ec; i++) { 270 garray[i] = garray[i]/bs; 271 } 272 273 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 274 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 275 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 276 ierr = PetscFree(stmp);CHKERRQ(ierr); 277 278 /* create temporary global vector to generate scatter context */ 279 /* this is inefficient, but otherwise we must do either 280 1) save garray until the first actual scatter when the vector is known or 281 2) have another way of generating a scatter context without a vector.*/ 282 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 283 284 /* gnerate the scatter context */ 285 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 286 287 /* 288 Post the receives for the first matrix vector product. We sync-chronize after 289 this on the chance that the user immediately calls MatMult() after assemblying 290 the matrix. 291 */ 292 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 293 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 294 295 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 296 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 297 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 298 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 299 baij->garray = garray; 300 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 301 ierr = ISDestroy(from);CHKERRQ(ierr); 302 ierr = ISDestroy(to);CHKERRQ(ierr); 303 ierr = VecDestroy(gvec);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 /* 308 Takes the local part of an already assembled MPISBAIJ matrix 309 and disassembles it. This is to allow new nonzeros into the matrix 310 that require more communication in the matrix vector multiply. 311 Thus certain data-structures must be rebuilt. 312 313 Kind of slow! But that's what application programmers get when 314 they are sloppy. 315 */ 316 #undef __FUNCT__ 317 #define __FUNCT__ "DisAssemble_MPISBAIJ" 318 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 319 { 320 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 321 Mat B = baij->B,Bnew; 322 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 323 PetscErrorCode ierr; 324 PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 325 PetscInt k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 326 MatScalar *a = Bbaij->a; 327 PetscScalar *atmp; 328 #if defined(PETSC_USE_MAT_SINGLE) 329 PetscInt l; 330 #endif 331 332 PetscFunctionBegin; 333 #if defined(PETSC_USE_MAT_SINGLE) 334 ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp); 335 #endif 336 /* free stuff related to matrix-vec multiply */ 337 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 338 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 339 baij->lvec = 0; 340 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 341 baij->Mvctx = 0; 342 343 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 344 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 345 baij->slvec0 = 0; 346 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 347 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 348 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 349 baij->slvec1 = 0; 350 351 if (baij->colmap) { 352 #if defined (PETSC_USE_CTABLE) 353 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 354 #else 355 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 356 baij->colmap = 0; 357 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 358 #endif 359 } 360 361 /* make sure that B is assembled so we can access its values */ 362 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 363 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 364 365 /* invent new B and copy stuff over */ 366 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 367 for (i=0; i<mbs; i++) { 368 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 369 } 370 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 371 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 372 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 373 ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr); 374 ierr = PetscFree(nz);CHKERRQ(ierr); 375 376 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 377 for (i=0; i<mbs; i++) { 378 rvals[0] = bs*i; 379 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 380 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 381 col = garray[Bbaij->j[j]]*bs; 382 for (k=0; k<bs; k++) { 383 #if defined(PETSC_USE_MAT_SINGLE) 384 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 385 #else 386 atmp = a+j*bs2 + k*bs; 387 #endif 388 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 389 col++; 390 } 391 } 392 } 393 #if defined(PETSC_USE_MAT_SINGLE) 394 ierr = PetscFree(atmp);CHKERRQ(ierr); 395 #endif 396 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 397 baij->garray = 0; 398 ierr = PetscFree(rvals);CHKERRQ(ierr); 399 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 400 ierr = MatDestroy(B);CHKERRQ(ierr); 401 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 402 baij->B = Bnew; 403 A->was_assembled = PETSC_FALSE; 404 PetscFunctionReturn(0); 405 } 406 407 408 409