1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18 IS from,to; 19 Vec gvec; 20 int rank=sbaij->rank,lsize,size=sbaij->size; 21 int *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k; 22 PetscMap vecmap; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 /* 27 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank); 28 PetscSynchronizedFlush(PETSC_COMM_WORLD); 29 */ 30 ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr); 31 if (sbaij->lvec) { 32 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 33 sbaij->lvec = 0; 34 } 35 if (sbaij->Mvctx) { 36 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 37 sbaij->Mvctx = 0; 38 } 39 40 /* For the first stab we make an array as long as the number of columns */ 41 /* mark those columns that are in sbaij->B */ 42 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 43 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 44 for (i=0; i<mbs; i++) { 45 for (j=0; j<B->ilen[i]; j++) { 46 if (!indices[aj[B->i[i] + j]]) ec++; 47 indices[aj[B->i[i] + j] ] = 1; 48 } 49 } 50 51 /* form arrays of columns we need */ 52 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 53 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 54 ec_owner = sgarray + 2*ec; 55 56 ec = 0; 57 for (j=0; j<size; j++){ 58 for (i=owners[j]; i<owners[j+1]; i++){ 59 if (indices[i]) { 60 garray[ec] = i; 61 ec_owner[ec] = j; 62 ec++; 63 } 64 } 65 } 66 67 if (rank == prank){ 68 printf("proc[%d]: \n",rank); 69 for (i=0; i<ec;i++) printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]); 70 } 71 72 /* make indices now point into garray */ 73 for (i=0; i<ec; i++) { 74 indices[garray[i]] = i; 75 } 76 77 /* compact out the extra columns in B */ 78 for (i=0; i<mbs; i++) { 79 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 80 } 81 B->nbs = ec; 82 sbaij->B->n = ec*B->bs; 83 ierr = PetscFree(indices);CHKERRQ(ierr); 84 85 /* create local vector that is used to scatter into */ 86 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 87 88 /* create two temporary index sets for building scatter-gather */ 89 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 90 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 91 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 92 93 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 94 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 95 96 if (rank == prank){ 97 ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr); 98 ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 99 ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr); 100 ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 101 } 102 103 /* generate the scatter context */ 104 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 105 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 106 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 107 108 sbaij->garray = garray; 109 PetscLogObjectParent(mat,sbaij->Mvctx); 110 PetscLogObjectParent(mat,sbaij->lvec); 111 PetscLogObjectParent(mat,from); 112 PetscLogObjectParent(mat,to); 113 114 ierr = ISDestroy(from);CHKERRQ(ierr); 115 ierr = ISDestroy(to);CHKERRQ(ierr); 116 117 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 118 lsize = (mbs + ec)*bs; 119 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 120 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 121 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 122 123 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 124 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 125 126 /* x index in the IS sfrom */ 127 for (i=0; i<ec; i++) { 128 j = ec_owner[i]; 129 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 130 } 131 /* b index in the IS sfrom */ 132 k = sowners[rank]/bs + mbs; 133 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 134 135 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 136 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 137 138 /* x index in the IS sto */ 139 k = sowners[rank]/bs + mbs; 140 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 141 /* b index in the IS sto */ 142 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 143 144 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 145 146 if (rank == prank){ 147 ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr); 148 ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 149 ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr); 150 ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 151 } 152 153 /* gnerate the SBAIJ scatter context */ 154 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 155 156 /* 157 Post the receives for the first matrix vector product. We sync-chronize after 158 this on the chance that the user immediately calls MatMult() after assemblying 159 the matrix. 160 */ 161 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 162 163 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 164 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 165 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 166 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 167 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 168 169 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 170 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 171 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 172 173 ierr = PetscFree(stmp);CHKERRQ(ierr); 174 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 175 176 PetscLogObjectParent(mat,sbaij->sMvctx); 177 PetscLogObjectParent(mat,sbaij->slvec0); 178 PetscLogObjectParent(mat,sbaij->slvec1); 179 PetscLogObjectParent(mat,sbaij->slvec0b); 180 PetscLogObjectParent(mat,sbaij->slvec1a); 181 PetscLogObjectParent(mat,sbaij->slvec1b); 182 PetscLogObjectParent(mat,from); 183 PetscLogObjectParent(mat,to); 184 185 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 186 ierr = ISDestroy(from);CHKERRQ(ierr); 187 ierr = ISDestroy(to);CHKERRQ(ierr); 188 ierr = VecDestroy(gvec);CHKERRQ(ierr); 189 ierr = PetscFree(sgarray);CHKERRQ(ierr); 190 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 196 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 197 { 198 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 199 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 200 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 201 int bs = baij->bs,*stmp; 202 IS from,to; 203 Vec gvec; 204 #if defined (PETSC_USE_CTABLE) 205 PetscTable gid1_lid1; 206 PetscTablePosition tpos; 207 int gid,lid; 208 #endif 209 210 PetscFunctionBegin; 211 212 #if defined (PETSC_USE_CTABLE) 213 /* use a table - Mark Adams */ 214 PetscTableCreate(B->mbs,&gid1_lid1); 215 for (i=0; i<B->mbs; i++) { 216 for (j=0; j<B->ilen[i]; j++) { 217 int data,gid1 = aj[B->i[i]+j] + 1; 218 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 219 if (!data) { 220 /* one based table */ 221 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 222 } 223 } 224 } 225 /* form array of columns we need */ 226 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 227 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 228 while (tpos) { 229 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 230 gid--; lid--; 231 garray[lid] = gid; 232 } 233 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 234 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 235 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 236 for (i=0; i<ec; i++) { 237 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 238 } 239 /* compact out the extra columns in B */ 240 for (i=0; i<B->mbs; i++) { 241 for (j=0; j<B->ilen[i]; j++) { 242 int gid1 = aj[B->i[i] + j] + 1; 243 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 244 lid --; 245 aj[B->i[i]+j] = lid; 246 } 247 } 248 B->nbs = ec; 249 baij->B->n = ec*B->bs; 250 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 251 /* Mark Adams */ 252 #else 253 /* For the first stab we make an array as long as the number of columns */ 254 /* mark those columns that are in baij->B */ 255 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 256 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 257 for (i=0; i<B->mbs; i++) { 258 for (j=0; j<B->ilen[i]; j++) { 259 if (!indices[aj[B->i[i] + j]]) ec++; 260 indices[aj[B->i[i] + j] ] = 1; 261 } 262 } 263 264 /* form array of columns we need */ 265 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 266 ec = 0; 267 for (i=0; i<Nbs; i++) { 268 if (indices[i]) { 269 garray[ec++] = i; 270 } 271 } 272 273 /* make indices now point into garray */ 274 for (i=0; i<ec; i++) { 275 indices[garray[i]] = i; 276 } 277 278 /* compact out the extra columns in B */ 279 for (i=0; i<B->mbs; i++) { 280 for (j=0; j<B->ilen[i]; j++) { 281 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 282 } 283 } 284 B->nbs = ec; 285 baij->B->n = ec*B->bs; 286 ierr = PetscFree(indices);CHKERRQ(ierr); 287 #endif 288 289 /* create local vector that is used to scatter into */ 290 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 291 292 /* create two temporary index sets for building scatter-gather */ 293 for (i=0; i<ec; i++) { 294 garray[i] = bs*garray[i]; 295 } 296 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 297 for (i=0; i<ec; i++) { 298 garray[i] = garray[i]/bs; 299 } 300 301 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 302 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 303 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 304 ierr = PetscFree(stmp);CHKERRQ(ierr); 305 306 /* create temporary global vector to generate scatter context */ 307 /* this is inefficient, but otherwise we must do either 308 1) save garray until the first actual scatter when the vector is known or 309 2) have another way of generating a scatter context without a vector.*/ 310 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 311 312 /* gnerate the scatter context */ 313 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 314 315 /* 316 Post the receives for the first matrix vector product. We sync-chronize after 317 this on the chance that the user immediately calls MatMult() after assemblying 318 the matrix. 319 */ 320 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 321 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 322 323 PetscLogObjectParent(mat,baij->Mvctx); 324 PetscLogObjectParent(mat,baij->lvec); 325 PetscLogObjectParent(mat,from); 326 PetscLogObjectParent(mat,to); 327 baij->garray = garray; 328 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 329 ierr = ISDestroy(from);CHKERRQ(ierr); 330 ierr = ISDestroy(to);CHKERRQ(ierr); 331 ierr = VecDestroy(gvec);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 336 /* 337 Takes the local part of an already assembled MPIBAIJ matrix 338 and disassembles it. This is to allow new nonzeros into the matrix 339 that require more communication in the matrix vector multiply. 340 Thus certain data-structures must be rebuilt. 341 342 Kind of slow! But that's what application programmers get when 343 they are sloppy. 344 */ 345 #undef __FUNCT__ 346 #define __FUNCT__ "DisAssemble_MPISBAIJ" 347 int DisAssemble_MPISBAIJ(Mat A) 348 { 349 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 350 Mat B = baij->B,Bnew; 351 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 352 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 353 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 354 MatScalar *a = Bbaij->a; 355 PetscScalar *atmp; 356 #if defined(PETSC_USE_MAT_SINGLE) 357 int l; 358 #endif 359 360 PetscFunctionBegin; 361 #if defined(PETSC_USE_MAT_SINGLE) 362 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 363 #endif 364 /* free stuff related to matrix-vec multiply */ 365 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 366 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 367 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 368 if (baij->colmap) { 369 #if defined (PETSC_USE_CTABLE) 370 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 371 #else 372 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 373 baij->colmap = 0; 374 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 375 #endif 376 } 377 378 /* make sure that B is assembled so we can access its values */ 379 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 381 382 /* invent new B and copy stuff over */ 383 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 384 for (i=0; i<mbs; i++) { 385 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 386 } 387 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 388 ierr = PetscFree(nz);CHKERRQ(ierr); 389 390 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 391 for (i=0; i<mbs; i++) { 392 rvals[0] = bs*i; 393 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 394 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 395 col = garray[Bbaij->j[j]]*bs; 396 for (k=0; k<bs; k++) { 397 #if defined(PETSC_USE_MAT_SINGLE) 398 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 399 #else 400 atmp = a+j*bs2 + k*bs; 401 #endif 402 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 403 col++; 404 } 405 } 406 } 407 #if defined(PETSC_USE_MAT_SINGLE) 408 ierr = PetscFree(atmp);CHKERRQ(ierr); 409 #endif 410 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 411 baij->garray = 0; 412 ierr = PetscFree(rvals);CHKERRQ(ierr); 413 PetscLogObjectMemory(A,-ec*sizeof(int)); 414 ierr = MatDestroy(B);CHKERRQ(ierr); 415 PetscLogObjectParent(A,Bnew); 416 baij->B = Bnew; 417 A->was_assembled = PETSC_FALSE; 418 PetscFunctionReturn(0); 419 } 420 421 422 423