1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 int MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 18 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 int rank=sbaij->rank,lsize,size=sbaij->size; 22 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 23 PetscMap vecmap; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->lvec) { 28 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 29 sbaij->lvec = 0; 30 } 31 if (sbaij->Mvctx) { 32 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 33 sbaij->Mvctx = 0; 34 } 35 36 /* For the first stab we make an array as long as the number of columns */ 37 /* mark those columns that are in sbaij->B */ 38 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 39 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<mbs; i++) { 41 for (j=0; j<B->ilen[i]; j++) { 42 if (!indices[aj[B->i[i] + j]]) ec++; 43 indices[aj[B->i[i] + j] ] = 1; 44 } 45 } 46 47 /* form arrays of columns we need */ 48 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 49 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 50 ec_owner = sgarray + 2*ec; 51 52 ec = 0; 53 for (j=0; j<size; j++){ 54 for (i=owners[j]; i<owners[j+1]; i++){ 55 if (indices[i]) { 56 garray[ec] = i; 57 ec_owner[ec] = j; 58 ec++; 59 } 60 } 61 } 62 63 /* make indices now point into garray */ 64 for (i=0; i<ec; i++) indices[garray[i]] = i; 65 66 /* compact out the extra columns in B */ 67 for (i=0; i<mbs; i++) { 68 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 69 } 70 B->nbs = ec; 71 sbaij->B->n = ec*B->bs; 72 ierr = PetscFree(indices);CHKERRQ(ierr); 73 74 /* create local vector that is used to scatter into */ 75 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 76 77 /* create two temporary index sets for building scatter-gather */ 78 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 79 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 81 82 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 83 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 84 85 /* generate the scatter context 86 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 87 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 88 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 89 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 90 91 sbaij->garray = garray; 92 PetscLogObjectParent(mat,sbaij->Mvctx); 93 PetscLogObjectParent(mat,sbaij->lvec); 94 PetscLogObjectParent(mat,from); 95 PetscLogObjectParent(mat,to); 96 97 ierr = ISDestroy(from);CHKERRQ(ierr); 98 ierr = ISDestroy(to);CHKERRQ(ierr); 99 100 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 101 lsize = (mbs + ec)*bs; 102 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 103 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 104 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 105 106 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 107 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 108 109 /* x index in the IS sfrom */ 110 for (i=0; i<ec; i++) { 111 j = ec_owner[i]; 112 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 113 } 114 /* b index in the IS sfrom */ 115 k = sowners[rank]/bs + mbs; 116 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 117 118 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 120 121 /* x index in the IS sto */ 122 k = sowners[rank]/bs + mbs; 123 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 124 /* b index in the IS sto */ 125 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 126 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 128 129 /* gnerate the SBAIJ scatter context */ 130 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 131 132 /* 133 Post the receives for the first matrix vector product. We sync-chronize after 134 this on the chance that the user immediately calls MatMult() after assemblying 135 the matrix. 136 */ 137 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 138 139 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 140 ierr = VecGetArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 143 ierr = VecRestoreArrayFast(sbaij->slvec1,&ptr);CHKERRQ(ierr); 144 145 ierr = VecGetArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr); 146 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 147 ierr = VecRestoreArrayFast(sbaij->slvec0,&ptr);CHKERRQ(ierr); 148 149 ierr = PetscFree(stmp);CHKERRQ(ierr); 150 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 151 152 PetscLogObjectParent(mat,sbaij->sMvctx); 153 PetscLogObjectParent(mat,sbaij->slvec0); 154 PetscLogObjectParent(mat,sbaij->slvec1); 155 PetscLogObjectParent(mat,sbaij->slvec0b); 156 PetscLogObjectParent(mat,sbaij->slvec1a); 157 PetscLogObjectParent(mat,sbaij->slvec1b); 158 PetscLogObjectParent(mat,from); 159 PetscLogObjectParent(mat,to); 160 161 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 162 ierr = ISDestroy(from);CHKERRQ(ierr); 163 ierr = ISDestroy(to);CHKERRQ(ierr); 164 ierr = VecDestroy(gvec);CHKERRQ(ierr); 165 ierr = PetscFree(sgarray);CHKERRQ(ierr); 166 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 173 { 174 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 175 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 176 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 177 int bs = baij->bs,*stmp; 178 IS from,to; 179 Vec gvec; 180 #if defined (PETSC_USE_CTABLE) 181 PetscTable gid1_lid1; 182 PetscTablePosition tpos; 183 int gid,lid; 184 #endif 185 186 PetscFunctionBegin; 187 188 #if defined (PETSC_USE_CTABLE) 189 /* use a table - Mark Adams */ 190 PetscTableCreate(B->mbs,&gid1_lid1); 191 for (i=0; i<B->mbs; i++) { 192 for (j=0; j<B->ilen[i]; j++) { 193 int data,gid1 = aj[B->i[i]+j] + 1; 194 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 195 if (!data) { 196 /* one based table */ 197 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 198 } 199 } 200 } 201 /* form array of columns we need */ 202 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 203 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 204 while (tpos) { 205 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 206 gid--; lid--; 207 garray[lid] = gid; 208 } 209 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 210 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 211 for (i=0; i<ec; i++) { 212 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 213 } 214 /* compact out the extra columns in B */ 215 for (i=0; i<B->mbs; i++) { 216 for (j=0; j<B->ilen[i]; j++) { 217 int gid1 = aj[B->i[i] + j] + 1; 218 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 219 lid --; 220 aj[B->i[i]+j] = lid; 221 } 222 } 223 B->nbs = ec; 224 baij->B->n = ec*B->bs; 225 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 226 /* Mark Adams */ 227 #else 228 /* For the first stab we make an array as long as the number of columns */ 229 /* mark those columns that are in baij->B */ 230 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 231 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 232 for (i=0; i<B->mbs; i++) { 233 for (j=0; j<B->ilen[i]; j++) { 234 if (!indices[aj[B->i[i] + j]]) ec++; 235 indices[aj[B->i[i] + j] ] = 1; 236 } 237 } 238 239 /* form array of columns we need */ 240 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 241 ec = 0; 242 for (i=0; i<Nbs; i++) { 243 if (indices[i]) { 244 garray[ec++] = i; 245 } 246 } 247 248 /* make indices now point into garray */ 249 for (i=0; i<ec; i++) { 250 indices[garray[i]] = i; 251 } 252 253 /* compact out the extra columns in B */ 254 for (i=0; i<B->mbs; i++) { 255 for (j=0; j<B->ilen[i]; j++) { 256 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 257 } 258 } 259 B->nbs = ec; 260 baij->B->n = ec*B->bs; 261 ierr = PetscFree(indices);CHKERRQ(ierr); 262 #endif 263 264 /* create local vector that is used to scatter into */ 265 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 266 267 /* create two temporary index sets for building scatter-gather */ 268 for (i=0; i<ec; i++) { 269 garray[i] = bs*garray[i]; 270 } 271 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 272 for (i=0; i<ec; i++) { 273 garray[i] = garray[i]/bs; 274 } 275 276 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 277 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 278 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 279 ierr = PetscFree(stmp);CHKERRQ(ierr); 280 281 /* create temporary global vector to generate scatter context */ 282 /* this is inefficient, but otherwise we must do either 283 1) save garray until the first actual scatter when the vector is known or 284 2) have another way of generating a scatter context without a vector.*/ 285 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 286 287 /* gnerate the scatter context */ 288 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 289 290 /* 291 Post the receives for the first matrix vector product. We sync-chronize after 292 this on the chance that the user immediately calls MatMult() after assemblying 293 the matrix. 294 */ 295 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 296 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 297 298 PetscLogObjectParent(mat,baij->Mvctx); 299 PetscLogObjectParent(mat,baij->lvec); 300 PetscLogObjectParent(mat,from); 301 PetscLogObjectParent(mat,to); 302 baij->garray = garray; 303 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 304 ierr = ISDestroy(from);CHKERRQ(ierr); 305 ierr = ISDestroy(to);CHKERRQ(ierr); 306 ierr = VecDestroy(gvec);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 311 /* 312 Takes the local part of an already assembled MPIBAIJ matrix 313 and disassembles it. This is to allow new nonzeros into the matrix 314 that require more communication in the matrix vector multiply. 315 Thus certain data-structures must be rebuilt. 316 317 Kind of slow! But that's what application programmers get when 318 they are sloppy. 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "DisAssemble_MPISBAIJ" 322 int DisAssemble_MPISBAIJ(Mat A) 323 { 324 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 325 Mat B = baij->B,Bnew; 326 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 327 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 328 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 329 MatScalar *a = Bbaij->a; 330 PetscScalar *atmp; 331 #if defined(PETSC_USE_MAT_SINGLE) 332 int l; 333 #endif 334 335 PetscFunctionBegin; 336 #if defined(PETSC_USE_MAT_SINGLE) 337 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 338 #endif 339 /* free stuff related to matrix-vec multiply */ 340 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 341 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 342 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 343 if (baij->colmap) { 344 #if defined (PETSC_USE_CTABLE) 345 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 346 #else 347 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 348 baij->colmap = 0; 349 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 350 #endif 351 } 352 353 /* make sure that B is assembled so we can access its values */ 354 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 355 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 356 357 /* invent new B and copy stuff over */ 358 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 359 for (i=0; i<mbs; i++) { 360 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 361 } 362 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 363 ierr = PetscFree(nz);CHKERRQ(ierr); 364 365 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 366 for (i=0; i<mbs; i++) { 367 rvals[0] = bs*i; 368 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 369 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 370 col = garray[Bbaij->j[j]]*bs; 371 for (k=0; k<bs; k++) { 372 #if defined(PETSC_USE_MAT_SINGLE) 373 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 374 #else 375 atmp = a+j*bs2 + k*bs; 376 #endif 377 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 378 col++; 379 } 380 } 381 } 382 #if defined(PETSC_USE_MAT_SINGLE) 383 ierr = PetscFree(atmp);CHKERRQ(ierr); 384 #endif 385 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 386 baij->garray = 0; 387 ierr = PetscFree(rvals);CHKERRQ(ierr); 388 PetscLogObjectMemory(A,-ec*sizeof(int)); 389 ierr = MatDestroy(B);CHKERRQ(ierr); 390 PetscLogObjectParent(A,Bnew); 391 baij->B = Bnew; 392 A->was_assembled = PETSC_FALSE; 393 PetscFunctionReturn(0); 394 } 395 396 397 398