1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 #include "src/vec/vecimpl.h" 8 9 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 10 11 12 #undef __FUNCT__ 13 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 14 int MatSetUpMultiply_MPISBAIJ(Mat mat) 15 { 16 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 17 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 18 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 19 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 int rank=sbaij->rank,lsize,size=sbaij->size; 23 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 24 PetscMap vecmap; 25 PetscScalar *ptr; 26 27 PetscFunctionBegin; 28 if (sbaij->lvec) { 29 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 30 sbaij->lvec = 0; 31 } 32 if (sbaij->Mvctx) { 33 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 34 sbaij->Mvctx = 0; 35 } 36 37 /* For the first stab we make an array as long as the number of columns */ 38 /* mark those columns that are in sbaij->B */ 39 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 40 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 41 for (i=0; i<mbs; i++) { 42 for (j=0; j<B->ilen[i]; j++) { 43 if (!indices[aj[B->i[i] + j]]) ec++; 44 indices[aj[B->i[i] + j] ] = 1; 45 } 46 } 47 48 /* form arrays of columns we need */ 49 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 50 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 51 ec_owner = sgarray + 2*ec; 52 53 ec = 0; 54 for (j=0; j<size; j++){ 55 for (i=owners[j]; i<owners[j+1]; i++){ 56 if (indices[i]) { 57 garray[ec] = i; 58 ec_owner[ec] = j; 59 ec++; 60 } 61 } 62 } 63 64 /* make indices now point into garray */ 65 for (i=0; i<ec; i++) indices[garray[i]] = i; 66 67 /* compact out the extra columns in B */ 68 for (i=0; i<mbs; i++) { 69 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 70 } 71 B->nbs = ec; 72 sbaij->B->n = ec*B->bs; 73 ierr = PetscFree(indices);CHKERRQ(ierr); 74 75 /* create local vector that is used to scatter into */ 76 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 77 78 /* create two temporary index sets for building scatter-gather */ 79 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 80 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 81 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 82 83 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 84 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 85 86 /* generate the scatter context */ 87 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 88 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 89 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 90 91 sbaij->garray = garray; 92 PetscLogObjectParent(mat,sbaij->Mvctx); 93 PetscLogObjectParent(mat,sbaij->lvec); 94 PetscLogObjectParent(mat,from); 95 PetscLogObjectParent(mat,to); 96 97 ierr = ISDestroy(from);CHKERRQ(ierr); 98 ierr = ISDestroy(to);CHKERRQ(ierr); 99 100 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 101 lsize = (mbs + ec)*bs; 102 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 103 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 104 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 105 106 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 107 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 108 109 /* x index in the IS sfrom */ 110 for (i=0; i<ec; i++) { 111 j = ec_owner[i]; 112 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 113 } 114 /* b index in the IS sfrom */ 115 k = sowners[rank]/bs + mbs; 116 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 117 118 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 120 121 /* x index in the IS sto */ 122 k = sowners[rank]/bs + mbs; 123 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 124 /* b index in the IS sto */ 125 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 126 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 128 129 /* gnerate the SBAIJ scatter context */ 130 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 131 132 /* 133 Post the receives for the first matrix vector product. We sync-chronize after 134 this on the chance that the user immediately calls MatMult() after assemblying 135 the matrix. 136 */ 137 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 138 139 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 140 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 143 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 144 145 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 146 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 147 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 148 149 ierr = PetscFree(stmp);CHKERRQ(ierr); 150 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 151 152 PetscLogObjectParent(mat,sbaij->sMvctx); 153 PetscLogObjectParent(mat,sbaij->slvec0); 154 PetscLogObjectParent(mat,sbaij->slvec1); 155 PetscLogObjectParent(mat,sbaij->slvec0b); 156 PetscLogObjectParent(mat,sbaij->slvec1a); 157 PetscLogObjectParent(mat,sbaij->slvec1b); 158 PetscLogObjectParent(mat,from); 159 PetscLogObjectParent(mat,to); 160 161 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 162 ierr = ISDestroy(from);CHKERRQ(ierr); 163 ierr = ISDestroy(to);CHKERRQ(ierr); 164 ierr = VecDestroy(gvec);CHKERRQ(ierr); 165 ierr = PetscFree(sgarray);CHKERRQ(ierr); 166 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 172 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 173 { 174 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 175 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 176 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 177 int bs = baij->bs,*stmp; 178 IS from,to; 179 Vec gvec; 180 #if defined (PETSC_USE_CTABLE) 181 PetscTable gid1_lid1; 182 PetscTablePosition tpos; 183 int gid,lid; 184 #endif 185 186 PetscFunctionBegin; 187 188 #if defined (PETSC_USE_CTABLE) 189 /* use a table - Mark Adams */ 190 PetscTableCreate(B->mbs,&gid1_lid1); 191 for (i=0; i<B->mbs; i++) { 192 for (j=0; j<B->ilen[i]; j++) { 193 int data,gid1 = aj[B->i[i]+j] + 1; 194 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 195 if (!data) { 196 /* one based table */ 197 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 198 } 199 } 200 } 201 /* form array of columns we need */ 202 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 203 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 204 while (tpos) { 205 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 206 gid--; lid--; 207 garray[lid] = gid; 208 } 209 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 210 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 211 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 212 for (i=0; i<ec; i++) { 213 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 214 } 215 /* compact out the extra columns in B */ 216 for (i=0; i<B->mbs; i++) { 217 for (j=0; j<B->ilen[i]; j++) { 218 int gid1 = aj[B->i[i] + j] + 1; 219 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 220 lid --; 221 aj[B->i[i]+j] = lid; 222 } 223 } 224 B->nbs = ec; 225 baij->B->n = ec*B->bs; 226 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 227 /* Mark Adams */ 228 #else 229 /* For the first stab we make an array as long as the number of columns */ 230 /* mark those columns that are in baij->B */ 231 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 232 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 233 for (i=0; i<B->mbs; i++) { 234 for (j=0; j<B->ilen[i]; j++) { 235 if (!indices[aj[B->i[i] + j]]) ec++; 236 indices[aj[B->i[i] + j] ] = 1; 237 } 238 } 239 240 /* form array of columns we need */ 241 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 242 ec = 0; 243 for (i=0; i<Nbs; i++) { 244 if (indices[i]) { 245 garray[ec++] = i; 246 } 247 } 248 249 /* make indices now point into garray */ 250 for (i=0; i<ec; i++) { 251 indices[garray[i]] = i; 252 } 253 254 /* compact out the extra columns in B */ 255 for (i=0; i<B->mbs; i++) { 256 for (j=0; j<B->ilen[i]; j++) { 257 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 258 } 259 } 260 B->nbs = ec; 261 baij->B->n = ec*B->bs; 262 ierr = PetscFree(indices);CHKERRQ(ierr); 263 #endif 264 265 /* create local vector that is used to scatter into */ 266 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 267 268 /* create two temporary index sets for building scatter-gather */ 269 for (i=0; i<ec; i++) { 270 garray[i] = bs*garray[i]; 271 } 272 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 273 for (i=0; i<ec; i++) { 274 garray[i] = garray[i]/bs; 275 } 276 277 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 278 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 279 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 280 ierr = PetscFree(stmp);CHKERRQ(ierr); 281 282 /* create temporary global vector to generate scatter context */ 283 /* this is inefficient, but otherwise we must do either 284 1) save garray until the first actual scatter when the vector is known or 285 2) have another way of generating a scatter context without a vector.*/ 286 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 287 288 /* gnerate the scatter context */ 289 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 290 291 /* 292 Post the receives for the first matrix vector product. We sync-chronize after 293 this on the chance that the user immediately calls MatMult() after assemblying 294 the matrix. 295 */ 296 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 297 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 298 299 PetscLogObjectParent(mat,baij->Mvctx); 300 PetscLogObjectParent(mat,baij->lvec); 301 PetscLogObjectParent(mat,from); 302 PetscLogObjectParent(mat,to); 303 baij->garray = garray; 304 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 305 ierr = ISDestroy(from);CHKERRQ(ierr); 306 ierr = ISDestroy(to);CHKERRQ(ierr); 307 ierr = VecDestroy(gvec);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 312 /* 313 Takes the local part of an already assembled MPIBAIJ matrix 314 and disassembles it. This is to allow new nonzeros into the matrix 315 that require more communication in the matrix vector multiply. 316 Thus certain data-structures must be rebuilt. 317 318 Kind of slow! But that's what application programmers get when 319 they are sloppy. 320 */ 321 #undef __FUNCT__ 322 #define __FUNCT__ "DisAssemble_MPISBAIJ" 323 int DisAssemble_MPISBAIJ(Mat A) 324 { 325 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 326 Mat B = baij->B,Bnew; 327 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 328 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 329 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 330 MatScalar *a = Bbaij->a; 331 PetscScalar *atmp; 332 #if defined(PETSC_USE_MAT_SINGLE) 333 int l; 334 #endif 335 336 PetscFunctionBegin; 337 #if defined(PETSC_USE_MAT_SINGLE) 338 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 339 #endif 340 /* free stuff related to matrix-vec multiply */ 341 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 342 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 343 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 344 if (baij->colmap) { 345 #if defined (PETSC_USE_CTABLE) 346 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 347 #else 348 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 349 baij->colmap = 0; 350 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 351 #endif 352 } 353 354 /* make sure that B is assembled so we can access its values */ 355 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 356 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 358 /* invent new B and copy stuff over */ 359 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 360 for (i=0; i<mbs; i++) { 361 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 362 } 363 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 364 ierr = PetscFree(nz);CHKERRQ(ierr); 365 366 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 367 for (i=0; i<mbs; i++) { 368 rvals[0] = bs*i; 369 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 370 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 371 col = garray[Bbaij->j[j]]*bs; 372 for (k=0; k<bs; k++) { 373 #if defined(PETSC_USE_MAT_SINGLE) 374 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 375 #else 376 atmp = a+j*bs2 + k*bs; 377 #endif 378 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 379 col++; 380 } 381 } 382 } 383 #if defined(PETSC_USE_MAT_SINGLE) 384 ierr = PetscFree(atmp);CHKERRQ(ierr); 385 #endif 386 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 387 baij->garray = 0; 388 ierr = PetscFree(rvals);CHKERRQ(ierr); 389 PetscLogObjectMemory(A,-ec*sizeof(int)); 390 ierr = MatDestroy(B);CHKERRQ(ierr); 391 PetscLogObjectParent(A,Bnew); 392 baij->B = Bnew; 393 A->was_assembled = PETSC_FALSE; 394 PetscFunctionReturn(0); 395 } 396 397 398 399