1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18 IS from,to; 19 Vec gvec; 20 int rank=sbaij->rank,lsize,size=sbaij->size; 21 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 22 PetscMap vecmap; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 if (sbaij->lvec) { 27 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 28 sbaij->lvec = 0; 29 } 30 if (sbaij->Mvctx) { 31 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 32 sbaij->Mvctx = 0; 33 } 34 35 /* For the first stab we make an array as long as the number of columns */ 36 /* mark those columns that are in sbaij->B */ 37 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 38 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<mbs; i++) { 40 for (j=0; j<B->ilen[i]; j++) { 41 if (!indices[aj[B->i[i] + j]]) ec++; 42 indices[aj[B->i[i] + j] ] = 1; 43 } 44 } 45 46 /* form arrays of columns we need */ 47 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 48 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 49 ec_owner = sgarray + 2*ec; 50 51 ec = 0; 52 for (j=0; j<size; j++){ 53 for (i=owners[j]; i<owners[j+1]; i++){ 54 if (indices[i]) { 55 garray[ec] = i; 56 ec_owner[ec] = j; 57 ec++; 58 } 59 } 60 } 61 62 /* make indices now point into garray */ 63 for (i=0; i<ec; i++) indices[garray[i]] = i; 64 65 /* compact out the extra columns in B */ 66 for (i=0; i<mbs; i++) { 67 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 68 } 69 B->nbs = ec; 70 sbaij->B->n = ec*B->bs; 71 ierr = PetscFree(indices);CHKERRQ(ierr); 72 73 /* create local vector that is used to scatter into */ 74 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 75 76 /* create two temporary index sets for building scatter-gather */ 77 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 78 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 79 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 80 81 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 82 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 83 84 /* generate the scatter context */ 85 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 86 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 87 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 88 89 sbaij->garray = garray; 90 PetscLogObjectParent(mat,sbaij->Mvctx); 91 PetscLogObjectParent(mat,sbaij->lvec); 92 PetscLogObjectParent(mat,from); 93 PetscLogObjectParent(mat,to); 94 95 ierr = ISDestroy(from);CHKERRQ(ierr); 96 ierr = ISDestroy(to);CHKERRQ(ierr); 97 98 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 99 lsize = (mbs + ec)*bs; 100 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 101 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 102 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 103 104 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 105 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 106 107 /* x index in the IS sfrom */ 108 for (i=0; i<ec; i++) { 109 j = ec_owner[i]; 110 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 111 } 112 /* b index in the IS sfrom */ 113 k = sowners[rank]/bs + mbs; 114 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 115 116 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 118 119 /* x index in the IS sto */ 120 k = sowners[rank]/bs + mbs; 121 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 122 /* b index in the IS sto */ 123 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 124 125 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 126 127 /* gnerate the SBAIJ scatter context */ 128 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 129 130 /* 131 Post the receives for the first matrix vector product. We sync-chronize after 132 this on the chance that the user immediately calls MatMult() after assemblying 133 the matrix. 134 */ 135 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 136 137 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 138 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 139 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 140 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 141 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 142 143 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 144 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 145 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 146 147 ierr = PetscFree(stmp);CHKERRQ(ierr); 148 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 149 150 PetscLogObjectParent(mat,sbaij->sMvctx); 151 PetscLogObjectParent(mat,sbaij->slvec0); 152 PetscLogObjectParent(mat,sbaij->slvec1); 153 PetscLogObjectParent(mat,sbaij->slvec0b); 154 PetscLogObjectParent(mat,sbaij->slvec1a); 155 PetscLogObjectParent(mat,sbaij->slvec1b); 156 PetscLogObjectParent(mat,from); 157 PetscLogObjectParent(mat,to); 158 159 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 160 ierr = ISDestroy(from);CHKERRQ(ierr); 161 ierr = ISDestroy(to);CHKERRQ(ierr); 162 ierr = VecDestroy(gvec);CHKERRQ(ierr); 163 ierr = PetscFree(sgarray);CHKERRQ(ierr); 164 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 170 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 171 { 172 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 173 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 174 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 175 int bs = baij->bs,*stmp; 176 IS from,to; 177 Vec gvec; 178 #if defined (PETSC_USE_CTABLE) 179 PetscTable gid1_lid1; 180 PetscTablePosition tpos; 181 int gid,lid; 182 #endif 183 184 PetscFunctionBegin; 185 186 #if defined (PETSC_USE_CTABLE) 187 /* use a table - Mark Adams */ 188 PetscTableCreate(B->mbs,&gid1_lid1); 189 for (i=0; i<B->mbs; i++) { 190 for (j=0; j<B->ilen[i]; j++) { 191 int data,gid1 = aj[B->i[i]+j] + 1; 192 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 193 if (!data) { 194 /* one based table */ 195 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 196 } 197 } 198 } 199 /* form array of columns we need */ 200 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 201 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 202 while (tpos) { 203 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 204 gid--; lid--; 205 garray[lid] = gid; 206 } 207 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 208 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 209 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 210 for (i=0; i<ec; i++) { 211 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 212 } 213 /* compact out the extra columns in B */ 214 for (i=0; i<B->mbs; i++) { 215 for (j=0; j<B->ilen[i]; j++) { 216 int gid1 = aj[B->i[i] + j] + 1; 217 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 218 lid --; 219 aj[B->i[i]+j] = lid; 220 } 221 } 222 B->nbs = ec; 223 baij->B->n = ec*B->bs; 224 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 225 /* Mark Adams */ 226 #else 227 /* For the first stab we make an array as long as the number of columns */ 228 /* mark those columns that are in baij->B */ 229 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 230 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 231 for (i=0; i<B->mbs; i++) { 232 for (j=0; j<B->ilen[i]; j++) { 233 if (!indices[aj[B->i[i] + j]]) ec++; 234 indices[aj[B->i[i] + j] ] = 1; 235 } 236 } 237 238 /* form array of columns we need */ 239 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 240 ec = 0; 241 for (i=0; i<Nbs; i++) { 242 if (indices[i]) { 243 garray[ec++] = i; 244 } 245 } 246 247 /* make indices now point into garray */ 248 for (i=0; i<ec; i++) { 249 indices[garray[i]] = i; 250 } 251 252 /* compact out the extra columns in B */ 253 for (i=0; i<B->mbs; i++) { 254 for (j=0; j<B->ilen[i]; j++) { 255 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 256 } 257 } 258 B->nbs = ec; 259 baij->B->n = ec*B->bs; 260 ierr = PetscFree(indices);CHKERRQ(ierr); 261 #endif 262 263 /* create local vector that is used to scatter into */ 264 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 265 266 /* create two temporary index sets for building scatter-gather */ 267 for (i=0; i<ec; i++) { 268 garray[i] = bs*garray[i]; 269 } 270 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 271 for (i=0; i<ec; i++) { 272 garray[i] = garray[i]/bs; 273 } 274 275 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 276 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 277 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 278 ierr = PetscFree(stmp);CHKERRQ(ierr); 279 280 /* create temporary global vector to generate scatter context */ 281 /* this is inefficient, but otherwise we must do either 282 1) save garray until the first actual scatter when the vector is known or 283 2) have another way of generating a scatter context without a vector.*/ 284 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 285 286 /* gnerate the scatter context */ 287 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 288 289 /* 290 Post the receives for the first matrix vector product. We sync-chronize after 291 this on the chance that the user immediately calls MatMult() after assemblying 292 the matrix. 293 */ 294 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 295 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 296 297 PetscLogObjectParent(mat,baij->Mvctx); 298 PetscLogObjectParent(mat,baij->lvec); 299 PetscLogObjectParent(mat,from); 300 PetscLogObjectParent(mat,to); 301 baij->garray = garray; 302 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 303 ierr = ISDestroy(from);CHKERRQ(ierr); 304 ierr = ISDestroy(to);CHKERRQ(ierr); 305 ierr = VecDestroy(gvec);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 310 /* 311 Takes the local part of an already assembled MPIBAIJ matrix 312 and disassembles it. This is to allow new nonzeros into the matrix 313 that require more communication in the matrix vector multiply. 314 Thus certain data-structures must be rebuilt. 315 316 Kind of slow! But that's what application programmers get when 317 they are sloppy. 318 */ 319 #undef __FUNCT__ 320 #define __FUNCT__ "DisAssemble_MPISBAIJ" 321 int DisAssemble_MPISBAIJ(Mat A) 322 { 323 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 324 Mat B = baij->B,Bnew; 325 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 326 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 327 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 328 MatScalar *a = Bbaij->a; 329 PetscScalar *atmp; 330 #if defined(PETSC_USE_MAT_SINGLE) 331 int l; 332 #endif 333 334 PetscFunctionBegin; 335 #if defined(PETSC_USE_MAT_SINGLE) 336 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 337 #endif 338 /* free stuff related to matrix-vec multiply */ 339 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 340 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 341 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 342 if (baij->colmap) { 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 345 #else 346 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 347 baij->colmap = 0; 348 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 349 #endif 350 } 351 352 /* make sure that B is assembled so we can access its values */ 353 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 355 356 /* invent new B and copy stuff over */ 357 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 358 for (i=0; i<mbs; i++) { 359 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 360 } 361 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 362 ierr = PetscFree(nz);CHKERRQ(ierr); 363 364 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 365 for (i=0; i<mbs; i++) { 366 rvals[0] = bs*i; 367 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 368 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 369 col = garray[Bbaij->j[j]]*bs; 370 for (k=0; k<bs; k++) { 371 #if defined(PETSC_USE_MAT_SINGLE) 372 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 373 #else 374 atmp = a+j*bs2 + k*bs; 375 #endif 376 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 377 col++; 378 } 379 } 380 } 381 #if defined(PETSC_USE_MAT_SINGLE) 382 ierr = PetscFree(atmp);CHKERRQ(ierr); 383 #endif 384 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 385 baij->garray = 0; 386 ierr = PetscFree(rvals);CHKERRQ(ierr); 387 PetscLogObjectMemory(A,-ec*sizeof(int)); 388 ierr = MatDestroy(B);CHKERRQ(ierr); 389 PetscLogObjectParent(A,Bnew); 390 baij->B = Bnew; 391 A->was_assembled = PETSC_FALSE; 392 PetscFunctionReturn(0); 393 } 394 395 396 397