1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 int MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 18 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 int rank=sbaij->rank,lsize,size=sbaij->size; 22 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 23 PetscMap vecmap; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->lvec) { 28 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 29 sbaij->lvec = 0; 30 } 31 if (sbaij->Mvctx) { 32 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 33 sbaij->Mvctx = 0; 34 } 35 36 /* For the first stab we make an array as long as the number of columns */ 37 /* mark those columns that are in sbaij->B */ 38 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 39 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<mbs; i++) { 41 for (j=0; j<B->ilen[i]; j++) { 42 if (!indices[aj[B->i[i] + j]]) ec++; 43 indices[aj[B->i[i] + j] ] = 1; 44 } 45 } 46 47 /* form arrays of columns we need */ 48 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 49 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 50 ec_owner = sgarray + 2*ec; 51 52 ec = 0; 53 for (j=0; j<size; j++){ 54 for (i=owners[j]; i<owners[j+1]; i++){ 55 if (indices[i]) { 56 garray[ec] = i; 57 ec_owner[ec] = j; 58 ec++; 59 } 60 } 61 } 62 63 /* make indices now point into garray */ 64 for (i=0; i<ec; i++) indices[garray[i]] = i; 65 66 /* compact out the extra columns in B */ 67 for (i=0; i<mbs; i++) { 68 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 69 } 70 B->nbs = ec; 71 sbaij->B->n = ec*B->bs; 72 ierr = PetscFree(indices);CHKERRQ(ierr); 73 74 /* create local vector that is used to scatter into */ 75 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 76 77 /* create two temporary index sets for building scatter-gather */ 78 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 79 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 81 82 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 83 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 84 85 /* generate the scatter context */ 86 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 87 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 88 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 89 90 sbaij->garray = garray; 91 PetscLogObjectParent(mat,sbaij->Mvctx); 92 PetscLogObjectParent(mat,sbaij->lvec); 93 PetscLogObjectParent(mat,from); 94 PetscLogObjectParent(mat,to); 95 96 ierr = ISDestroy(from);CHKERRQ(ierr); 97 ierr = ISDestroy(to);CHKERRQ(ierr); 98 99 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 100 lsize = (mbs + ec)*bs; 101 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 102 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 103 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 104 105 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 106 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 107 108 /* x index in the IS sfrom */ 109 for (i=0; i<ec; i++) { 110 j = ec_owner[i]; 111 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 112 } 113 /* b index in the IS sfrom */ 114 k = sowners[rank]/bs + mbs; 115 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 116 117 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 119 120 /* x index in the IS sto */ 121 k = sowners[rank]/bs + mbs; 122 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 123 /* b index in the IS sto */ 124 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 125 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 127 128 /* gnerate the SBAIJ scatter context */ 129 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 130 131 /* 132 Post the receives for the first matrix vector product. We sync-chronize after 133 this on the chance that the user immediately calls MatMult() after assemblying 134 the matrix. 135 */ 136 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 137 138 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 139 ierr = VecGetArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr); 140 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 142 ierr = VecRestoreArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr); 143 144 ierr = VecGetArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr); 145 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 146 ierr = VecRestoreArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr); 147 148 ierr = PetscFree(stmp);CHKERRQ(ierr); 149 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 150 151 PetscLogObjectParent(mat,sbaij->sMvctx); 152 PetscLogObjectParent(mat,sbaij->slvec0); 153 PetscLogObjectParent(mat,sbaij->slvec1); 154 PetscLogObjectParent(mat,sbaij->slvec0b); 155 PetscLogObjectParent(mat,sbaij->slvec1a); 156 PetscLogObjectParent(mat,sbaij->slvec1b); 157 PetscLogObjectParent(mat,from); 158 PetscLogObjectParent(mat,to); 159 160 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 161 ierr = ISDestroy(from);CHKERRQ(ierr); 162 ierr = ISDestroy(to);CHKERRQ(ierr); 163 ierr = VecDestroy(gvec);CHKERRQ(ierr); 164 ierr = PetscFree(sgarray);CHKERRQ(ierr); 165 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 171 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 172 { 173 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 174 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 175 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 176 int bs = baij->bs,*stmp; 177 IS from,to; 178 Vec gvec; 179 #if defined (PETSC_USE_CTABLE) 180 PetscTable gid1_lid1; 181 PetscTablePosition tpos; 182 int gid,lid; 183 #endif 184 185 PetscFunctionBegin; 186 187 #if defined (PETSC_USE_CTABLE) 188 /* use a table - Mark Adams */ 189 PetscTableCreate(B->mbs,&gid1_lid1); 190 for (i=0; i<B->mbs; i++) { 191 for (j=0; j<B->ilen[i]; j++) { 192 int data,gid1 = aj[B->i[i]+j] + 1; 193 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 194 if (!data) { 195 /* one based table */ 196 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 197 } 198 } 199 } 200 /* form array of columns we need */ 201 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 202 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 203 while (tpos) { 204 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 205 gid--; lid--; 206 garray[lid] = gid; 207 } 208 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 209 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 210 for (i=0; i<ec; i++) { 211 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 212 } 213 /* compact out the extra columns in B */ 214 for (i=0; i<B->mbs; i++) { 215 for (j=0; j<B->ilen[i]; j++) { 216 int gid1 = aj[B->i[i] + j] + 1; 217 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 218 lid --; 219 aj[B->i[i]+j] = lid; 220 } 221 } 222 B->nbs = ec; 223 baij->B->n = ec*B->bs; 224 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 225 /* Mark Adams */ 226 #else 227 /* For the first stab we make an array as long as the number of columns */ 228 /* mark those columns that are in baij->B */ 229 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 230 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 231 for (i=0; i<B->mbs; i++) { 232 for (j=0; j<B->ilen[i]; j++) { 233 if (!indices[aj[B->i[i] + j]]) ec++; 234 indices[aj[B->i[i] + j] ] = 1; 235 } 236 } 237 238 /* form array of columns we need */ 239 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 240 ec = 0; 241 for (i=0; i<Nbs; i++) { 242 if (indices[i]) { 243 garray[ec++] = i; 244 } 245 } 246 247 /* make indices now point into garray */ 248 for (i=0; i<ec; i++) { 249 indices[garray[i]] = i; 250 } 251 252 /* compact out the extra columns in B */ 253 for (i=0; i<B->mbs; i++) { 254 for (j=0; j<B->ilen[i]; j++) { 255 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 256 } 257 } 258 B->nbs = ec; 259 baij->B->n = ec*B->bs; 260 ierr = PetscFree(indices);CHKERRQ(ierr); 261 #endif 262 263 /* create local vector that is used to scatter into */ 264 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 265 266 /* create two temporary index sets for building scatter-gather */ 267 for (i=0; i<ec; i++) { 268 garray[i] = bs*garray[i]; 269 } 270 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 271 for (i=0; i<ec; i++) { 272 garray[i] = garray[i]/bs; 273 } 274 275 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 276 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 277 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 278 ierr = PetscFree(stmp);CHKERRQ(ierr); 279 280 /* create temporary global vector to generate scatter context */ 281 /* this is inefficient, but otherwise we must do either 282 1) save garray until the first actual scatter when the vector is known or 283 2) have another way of generating a scatter context without a vector.*/ 284 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 285 286 /* gnerate the scatter context */ 287 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 288 289 /* 290 Post the receives for the first matrix vector product. We sync-chronize after 291 this on the chance that the user immediately calls MatMult() after assemblying 292 the matrix. 293 */ 294 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 295 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 296 297 PetscLogObjectParent(mat,baij->Mvctx); 298 PetscLogObjectParent(mat,baij->lvec); 299 PetscLogObjectParent(mat,from); 300 PetscLogObjectParent(mat,to); 301 baij->garray = garray; 302 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 303 ierr = ISDestroy(from);CHKERRQ(ierr); 304 ierr = ISDestroy(to);CHKERRQ(ierr); 305 ierr = VecDestroy(gvec);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 310 /* 311 Takes the local part of an already assembled MPIBAIJ matrix 312 and disassembles it. This is to allow new nonzeros into the matrix 313 that require more communication in the matrix vector multiply. 314 Thus certain data-structures must be rebuilt. 315 316 Kind of slow! But that's what application programmers get when 317 they are sloppy. 318 */ 319 #undef __FUNCT__ 320 #define __FUNCT__ "DisAssemble_MPISBAIJ" 321 int DisAssemble_MPISBAIJ(Mat A) 322 { 323 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 324 Mat B = baij->B,Bnew; 325 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 326 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 327 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 328 MatScalar *a = Bbaij->a; 329 PetscScalar *atmp; 330 #if defined(PETSC_USE_MAT_SINGLE) 331 int l; 332 #endif 333 334 PetscFunctionBegin; 335 #if defined(PETSC_USE_MAT_SINGLE) 336 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 337 #endif 338 /* free stuff related to matrix-vec multiply */ 339 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 340 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 341 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 342 if (baij->colmap) { 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 345 #else 346 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 347 baij->colmap = 0; 348 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 349 #endif 350 } 351 352 /* make sure that B is assembled so we can access its values */ 353 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 355 356 /* invent new B and copy stuff over */ 357 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 358 for (i=0; i<mbs; i++) { 359 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 360 } 361 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 362 ierr = PetscFree(nz);CHKERRQ(ierr); 363 364 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 365 for (i=0; i<mbs; i++) { 366 rvals[0] = bs*i; 367 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 368 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 369 col = garray[Bbaij->j[j]]*bs; 370 for (k=0; k<bs; k++) { 371 #if defined(PETSC_USE_MAT_SINGLE) 372 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 373 #else 374 atmp = a+j*bs2 + k*bs; 375 #endif 376 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 377 col++; 378 } 379 } 380 } 381 #if defined(PETSC_USE_MAT_SINGLE) 382 ierr = PetscFree(atmp);CHKERRQ(ierr); 383 #endif 384 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 385 baij->garray = 0; 386 ierr = PetscFree(rvals);CHKERRQ(ierr); 387 PetscLogObjectMemory(A,-ec*sizeof(int)); 388 ierr = MatDestroy(B);CHKERRQ(ierr); 389 PetscLogObjectParent(A,Bnew); 390 baij->B = Bnew; 391 A->was_assembled = PETSC_FALSE; 392 PetscFunctionReturn(0); 393 } 394 395 396 397