1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 int MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 18 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 int rank=sbaij->rank,lsize,size=sbaij->size; 22 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 23 PetscMap vecmap; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->lvec) { 28 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 29 sbaij->lvec = 0; 30 } 31 if (sbaij->Mvctx) { 32 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 33 sbaij->Mvctx = 0; 34 } 35 36 /* For the first stab we make an array as long as the number of columns */ 37 /* mark those columns that are in sbaij->B */ 38 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 39 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<mbs; i++) { 41 for (j=0; j<B->ilen[i]; j++) { 42 if (!indices[aj[B->i[i] + j]]) ec++; 43 indices[aj[B->i[i] + j] ] = 1; 44 } 45 } 46 47 /* form arrays of columns we need */ 48 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 49 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 50 ec_owner = sgarray + 2*ec; 51 52 ec = 0; 53 for (j=0; j<size; j++){ 54 for (i=owners[j]; i<owners[j+1]; i++){ 55 if (indices[i]) { 56 garray[ec] = i; 57 ec_owner[ec] = j; 58 ec++; 59 } 60 } 61 } 62 63 /* make indices now point into garray */ 64 for (i=0; i<ec; i++) indices[garray[i]] = i; 65 66 /* compact out the extra columns in B */ 67 for (i=0; i<mbs; i++) { 68 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 69 } 70 B->nbs = ec; 71 sbaij->B->n = ec*B->bs; 72 ierr = PetscFree(indices);CHKERRQ(ierr); 73 74 /* create local vector that is used to scatter into */ 75 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 76 77 /* create two temporary index sets for building scatter-gather */ 78 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 79 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 81 82 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 83 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 84 85 /* generate the scatter context 86 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 87 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 88 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 89 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 90 91 sbaij->garray = garray; 92 PetscLogObjectParent(mat,sbaij->Mvctx); 93 PetscLogObjectParent(mat,sbaij->lvec); 94 PetscLogObjectParent(mat,from); 95 PetscLogObjectParent(mat,to); 96 97 ierr = ISDestroy(from);CHKERRQ(ierr); 98 ierr = ISDestroy(to);CHKERRQ(ierr); 99 100 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 101 lsize = (mbs + ec)*bs; 102 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 103 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 104 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 105 106 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 107 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 108 109 /* x index in the IS sfrom */ 110 for (i=0; i<ec; i++) { 111 j = ec_owner[i]; 112 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 113 } 114 /* b index in the IS sfrom */ 115 k = sowners[rank]/bs + mbs; 116 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 117 118 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 120 121 /* x index in the IS sto */ 122 k = sowners[rank]/bs + mbs; 123 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 124 /* b index in the IS sto */ 125 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 126 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 128 129 /* gnerate the SBAIJ scatter context */ 130 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 131 132 /* 133 Post the receives for the first matrix vector product. We sync-chronize after 134 this on the chance that the user immediately calls MatMult() after assemblying 135 the matrix. 136 */ 137 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 138 139 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 140 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 143 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 144 145 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 146 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 147 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 148 149 ierr = PetscFree(stmp);CHKERRQ(ierr); 150 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 151 152 PetscLogObjectParent(mat,sbaij->sMvctx); 153 PetscLogObjectParent(mat,sbaij->slvec0); 154 PetscLogObjectParent(mat,sbaij->slvec1); 155 PetscLogObjectParent(mat,sbaij->slvec0b); 156 PetscLogObjectParent(mat,sbaij->slvec1a); 157 PetscLogObjectParent(mat,sbaij->slvec1b); 158 PetscLogObjectParent(mat,from); 159 PetscLogObjectParent(mat,to); 160 161 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 162 ierr = ISDestroy(from);CHKERRQ(ierr); 163 ierr = ISDestroy(to);CHKERRQ(ierr); 164 ierr = VecDestroy(gvec);CHKERRQ(ierr); 165 ierr = PetscFree(sgarray);CHKERRQ(ierr); 166 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 173 { 174 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 175 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 176 int i,j,*aj = B->j,ierr,ec = 0,*garray; 177 int bs = baij->bs,*stmp; 178 IS from,to; 179 Vec gvec; 180 #if defined (PETSC_USE_CTABLE) 181 PetscTable gid1_lid1; 182 PetscTablePosition tpos; 183 int gid,lid; 184 #else 185 int Nbs = baij->Nbs,*indices; 186 #endif 187 188 PetscFunctionBegin; 189 190 #if defined (PETSC_USE_CTABLE) 191 /* use a table - Mark Adams */ 192 PetscTableCreate(B->mbs,&gid1_lid1); 193 for (i=0; i<B->mbs; i++) { 194 for (j=0; j<B->ilen[i]; j++) { 195 int data,gid1 = aj[B->i[i]+j] + 1; 196 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 197 if (!data) { 198 /* one based table */ 199 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 200 } 201 } 202 } 203 /* form array of columns we need */ 204 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 205 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 206 while (tpos) { 207 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 208 gid--; lid--; 209 garray[lid] = gid; 210 } 211 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 212 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 213 for (i=0; i<ec; i++) { 214 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 215 } 216 /* compact out the extra columns in B */ 217 for (i=0; i<B->mbs; i++) { 218 for (j=0; j<B->ilen[i]; j++) { 219 int gid1 = aj[B->i[i] + j] + 1; 220 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 221 lid --; 222 aj[B->i[i]+j] = lid; 223 } 224 } 225 B->nbs = ec; 226 baij->B->n = ec*B->bs; 227 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 228 /* Mark Adams */ 229 #else 230 /* For the first stab we make an array as long as the number of columns */ 231 /* mark those columns that are in baij->B */ 232 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 233 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 234 for (i=0; i<B->mbs; i++) { 235 for (j=0; j<B->ilen[i]; j++) { 236 if (!indices[aj[B->i[i] + j]]) ec++; 237 indices[aj[B->i[i] + j] ] = 1; 238 } 239 } 240 241 /* form array of columns we need */ 242 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 243 ec = 0; 244 for (i=0; i<Nbs; i++) { 245 if (indices[i]) { 246 garray[ec++] = i; 247 } 248 } 249 250 /* make indices now point into garray */ 251 for (i=0; i<ec; i++) { 252 indices[garray[i]] = i; 253 } 254 255 /* compact out the extra columns in B */ 256 for (i=0; i<B->mbs; i++) { 257 for (j=0; j<B->ilen[i]; j++) { 258 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 259 } 260 } 261 B->nbs = ec; 262 baij->B->n = ec*B->bs; 263 ierr = PetscFree(indices);CHKERRQ(ierr); 264 #endif 265 266 /* create local vector that is used to scatter into */ 267 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 268 269 /* create two temporary index sets for building scatter-gather */ 270 for (i=0; i<ec; i++) { 271 garray[i] = bs*garray[i]; 272 } 273 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 274 for (i=0; i<ec; i++) { 275 garray[i] = garray[i]/bs; 276 } 277 278 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 279 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 280 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 281 ierr = PetscFree(stmp);CHKERRQ(ierr); 282 283 /* create temporary global vector to generate scatter context */ 284 /* this is inefficient, but otherwise we must do either 285 1) save garray until the first actual scatter when the vector is known or 286 2) have another way of generating a scatter context without a vector.*/ 287 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 288 289 /* gnerate the scatter context */ 290 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 291 292 /* 293 Post the receives for the first matrix vector product. We sync-chronize after 294 this on the chance that the user immediately calls MatMult() after assemblying 295 the matrix. 296 */ 297 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 298 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 299 300 PetscLogObjectParent(mat,baij->Mvctx); 301 PetscLogObjectParent(mat,baij->lvec); 302 PetscLogObjectParent(mat,from); 303 PetscLogObjectParent(mat,to); 304 baij->garray = garray; 305 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 306 ierr = ISDestroy(from);CHKERRQ(ierr); 307 ierr = ISDestroy(to);CHKERRQ(ierr); 308 ierr = VecDestroy(gvec);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 313 /* 314 Takes the local part of an already assembled MPIBAIJ matrix 315 and disassembles it. This is to allow new nonzeros into the matrix 316 that require more communication in the matrix vector multiply. 317 Thus certain data-structures must be rebuilt. 318 319 Kind of slow! But that's what application programmers get when 320 they are sloppy. 321 */ 322 #undef __FUNCT__ 323 #define __FUNCT__ "DisAssemble_MPISBAIJ" 324 int DisAssemble_MPISBAIJ(Mat A) 325 { 326 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 327 Mat B = baij->B,Bnew; 328 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 329 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 330 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 331 MatScalar *a = Bbaij->a; 332 PetscScalar *atmp; 333 #if defined(PETSC_USE_MAT_SINGLE) 334 int l; 335 #endif 336 337 PetscFunctionBegin; 338 #if defined(PETSC_USE_MAT_SINGLE) 339 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 340 #endif 341 /* free stuff related to matrix-vec multiply */ 342 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 343 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 344 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 345 if (baij->colmap) { 346 #if defined (PETSC_USE_CTABLE) 347 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 348 #else 349 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 350 baij->colmap = 0; 351 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 352 #endif 353 } 354 355 /* make sure that B is assembled so we can access its values */ 356 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 359 /* invent new B and copy stuff over */ 360 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 361 for (i=0; i<mbs; i++) { 362 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 363 } 364 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 365 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 366 ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr); 367 ierr = PetscFree(nz);CHKERRQ(ierr); 368 369 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 370 for (i=0; i<mbs; i++) { 371 rvals[0] = bs*i; 372 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 373 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 374 col = garray[Bbaij->j[j]]*bs; 375 for (k=0; k<bs; k++) { 376 #if defined(PETSC_USE_MAT_SINGLE) 377 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 378 #else 379 atmp = a+j*bs2 + k*bs; 380 #endif 381 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 382 col++; 383 } 384 } 385 } 386 #if defined(PETSC_USE_MAT_SINGLE) 387 ierr = PetscFree(atmp);CHKERRQ(ierr); 388 #endif 389 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 390 baij->garray = 0; 391 ierr = PetscFree(rvals);CHKERRQ(ierr); 392 PetscLogObjectMemory(A,-ec*sizeof(int)); 393 ierr = MatDestroy(B);CHKERRQ(ierr); 394 PetscLogObjectParent(A,Bnew); 395 baij->B = Bnew; 396 A->was_assembled = PETSC_FALSE; 397 PetscFunctionReturn(0); 398 } 399 400 401 402