1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18 IS from,to; 19 Vec gvec; 20 int rank=sbaij->rank,lsize,size=sbaij->size; 21 int *owners=sbaij->rowners,*sowners,*ec_owner,prank=100,k; 22 PetscMap vecmap; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 /* 27 PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], MatSetUpMultiply_MPISBAIJ is called ...\n",rank); 28 PetscSynchronizedFlush(PETSC_COMM_WORLD); 29 */ 30 ierr = PetscOptionsGetInt(PETSC_NULL,"-prank",&prank,PETSC_NULL);CHKERRQ(ierr); 31 if (sbaij->lvec) { 32 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 33 sbaij->lvec = 0; 34 } 35 if (sbaij->Mvctx) { 36 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 37 sbaij->Mvctx = 0; 38 } 39 40 /* For the first stab we make an array as long as the number of columns */ 41 /* mark those columns that are in sbaij->B */ 42 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 43 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 44 for (i=0; i<mbs; i++) { 45 for (j=0; j<B->ilen[i]; j++) { 46 if (!indices[aj[B->i[i] + j]]) ec++; 47 indices[aj[B->i[i] + j] ] = 1; 48 } 49 } 50 51 /* form arrays of columns we need */ 52 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 53 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 54 ec_owner = sgarray + 2*ec; 55 56 ec = 0; 57 for (j=0; j<size; j++){ 58 for (i=owners[j]; i<owners[j+1]; i++){ 59 if (indices[i]) { 60 garray[ec] = i; 61 ec_owner[ec] = j; 62 ec++; 63 } 64 } 65 } 66 67 if (rank == prank){ 68 printf("proc[%d]: \n",rank); 69 for (i=0; i<ec;i++) printf("[%d]: garray = %d, ec_owner = %d\n", i,garray[i], ec_owner[i]); 70 } 71 72 /* make indices now point into garray */ 73 for (i=0; i<ec; i++) { 74 indices[garray[i]] = i; 75 } 76 77 /* compact out the extra columns in B */ 78 for (i=0; i<mbs; i++) { 79 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 80 } 81 B->nbs = ec; 82 sbaij->B->n = ec*B->bs; 83 ierr = PetscFree(indices);CHKERRQ(ierr); 84 85 /* create local vector that is used to scatter into */ 86 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 87 88 /* create two temporary index sets for building scatter-gather */ 89 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 90 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 91 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 92 93 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 94 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 95 96 if (rank == prank){ 97 ierr = PetscPrintf(PETSC_COMM_SELF," from: ");CHKERRQ(ierr); 98 ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 99 ierr = PetscPrintf(PETSC_COMM_SELF," to:");CHKERRQ(ierr); 100 ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 101 } 102 103 /* generate the scatter context */ 104 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 105 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 106 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 107 108 sbaij->garray = garray; 109 PetscLogObjectParent(mat,sbaij->Mvctx); 110 PetscLogObjectParent(mat,sbaij->lvec); 111 PetscLogObjectParent(mat,from); 112 PetscLogObjectParent(mat,to); 113 114 ierr = ISDestroy(from);CHKERRQ(ierr); 115 ierr = ISDestroy(to);CHKERRQ(ierr); 116 117 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 118 lsize = (mbs + ec)*bs; 119 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 120 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 121 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 122 123 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 124 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 125 /* for (i=0; i<=size;i++) sowners[i] /= bs; */ 126 127 if (rank==prank){ 128 printf("length of slvec0: \n", vec_size); 129 for (i=0; i<=size;i++) printf("[%d]: owners = %d, sowners/bs = %d\n", i,owners[i], sowners[i]/bs); 130 } 131 132 /* x index in the IS sfrom */ 133 for (i=0; i<ec; i++) { 134 j = ec_owner[i]; 135 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 136 } 137 /* b index in the IS sfrom */ 138 k = sowners[rank]/bs + mbs; 139 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 140 141 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 142 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 143 144 /* x index in the IS sto */ 145 k = sowners[rank]/bs + mbs; 146 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 147 /* b index in the IS sto */ 148 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 149 150 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 151 152 if (rank == prank){ 153 ierr = PetscPrintf(PETSC_COMM_SELF," sfrom: ");CHKERRQ(ierr); 154 ierr = ISView(from,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 155 ierr = PetscPrintf(PETSC_COMM_SELF," sto:");CHKERRQ(ierr); 156 ierr = ISView(to,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 157 } 158 159 /* gnerate the SBAIJ scatter context */ 160 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 161 162 /* 163 Post the receives for the first matrix vector product. We sync-chronize after 164 this on the chance that the user immediately calls MatMult() after assemblying 165 the matrix. 166 */ 167 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 168 169 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 170 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 171 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 172 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 173 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 174 175 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 176 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 177 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 178 179 ierr = PetscFree(stmp);CHKERRQ(ierr); 180 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 181 182 PetscLogObjectParent(mat,sbaij->sMvctx); 183 PetscLogObjectParent(mat,sbaij->slvec0); 184 PetscLogObjectParent(mat,sbaij->slvec1); 185 PetscLogObjectParent(mat,sbaij->slvec0b); 186 PetscLogObjectParent(mat,sbaij->slvec1a); 187 PetscLogObjectParent(mat,sbaij->slvec1b); 188 PetscLogObjectParent(mat,from); 189 PetscLogObjectParent(mat,to); 190 191 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 192 ierr = ISDestroy(from);CHKERRQ(ierr); 193 ierr = ISDestroy(to);CHKERRQ(ierr); 194 ierr = VecDestroy(gvec);CHKERRQ(ierr); 195 ierr = PetscFree(sgarray);CHKERRQ(ierr); 196 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 202 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 203 { 204 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 205 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 206 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 207 int bs = baij->bs,*stmp; 208 IS from,to; 209 Vec gvec; 210 #if defined (PETSC_USE_CTABLE) 211 PetscTable gid1_lid1; 212 PetscTablePosition tpos; 213 int gid,lid; 214 #endif 215 216 PetscFunctionBegin; 217 218 #if defined (PETSC_USE_CTABLE) 219 /* use a table - Mark Adams */ 220 PetscTableCreate(B->mbs,&gid1_lid1); 221 for (i=0; i<B->mbs; i++) { 222 for (j=0; j<B->ilen[i]; j++) { 223 int data,gid1 = aj[B->i[i]+j] + 1; 224 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 225 if (!data) { 226 /* one based table */ 227 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 228 } 229 } 230 } 231 /* form array of columns we need */ 232 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 233 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 234 while (tpos) { 235 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 236 gid--; lid--; 237 garray[lid] = gid; 238 } 239 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 240 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 241 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 242 for (i=0; i<ec; i++) { 243 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 244 } 245 /* compact out the extra columns in B */ 246 for (i=0; i<B->mbs; i++) { 247 for (j=0; j<B->ilen[i]; j++) { 248 int gid1 = aj[B->i[i] + j] + 1; 249 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 250 lid --; 251 aj[B->i[i]+j] = lid; 252 } 253 } 254 B->nbs = ec; 255 baij->B->n = ec*B->bs; 256 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 257 /* Mark Adams */ 258 #else 259 /* For the first stab we make an array as long as the number of columns */ 260 /* mark those columns that are in baij->B */ 261 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 262 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 263 for (i=0; i<B->mbs; i++) { 264 for (j=0; j<B->ilen[i]; j++) { 265 if (!indices[aj[B->i[i] + j]]) ec++; 266 indices[aj[B->i[i] + j] ] = 1; 267 } 268 } 269 270 /* form array of columns we need */ 271 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 272 ec = 0; 273 for (i=0; i<Nbs; i++) { 274 if (indices[i]) { 275 garray[ec++] = i; 276 } 277 } 278 279 /* make indices now point into garray */ 280 for (i=0; i<ec; i++) { 281 indices[garray[i]] = i; 282 } 283 284 /* compact out the extra columns in B */ 285 for (i=0; i<B->mbs; i++) { 286 for (j=0; j<B->ilen[i]; j++) { 287 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 288 } 289 } 290 B->nbs = ec; 291 baij->B->n = ec*B->bs; 292 ierr = PetscFree(indices);CHKERRQ(ierr); 293 #endif 294 295 /* create local vector that is used to scatter into */ 296 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 297 298 /* create two temporary index sets for building scatter-gather */ 299 for (i=0; i<ec; i++) { 300 garray[i] = bs*garray[i]; 301 } 302 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 303 for (i=0; i<ec; i++) { 304 garray[i] = garray[i]/bs; 305 } 306 307 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 308 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 309 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 310 ierr = PetscFree(stmp);CHKERRQ(ierr); 311 312 /* create temporary global vector to generate scatter context */ 313 /* this is inefficient, but otherwise we must do either 314 1) save garray until the first actual scatter when the vector is known or 315 2) have another way of generating a scatter context without a vector.*/ 316 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 317 318 /* gnerate the scatter context */ 319 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 320 321 /* 322 Post the receives for the first matrix vector product. We sync-chronize after 323 this on the chance that the user immediately calls MatMult() after assemblying 324 the matrix. 325 */ 326 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 327 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 328 329 PetscLogObjectParent(mat,baij->Mvctx); 330 PetscLogObjectParent(mat,baij->lvec); 331 PetscLogObjectParent(mat,from); 332 PetscLogObjectParent(mat,to); 333 baij->garray = garray; 334 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 335 ierr = ISDestroy(from);CHKERRQ(ierr); 336 ierr = ISDestroy(to);CHKERRQ(ierr); 337 ierr = VecDestroy(gvec);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 342 /* 343 Takes the local part of an already assembled MPIBAIJ matrix 344 and disassembles it. This is to allow new nonzeros into the matrix 345 that require more communication in the matrix vector multiply. 346 Thus certain data-structures must be rebuilt. 347 348 Kind of slow! But that's what application programmers get when 349 they are sloppy. 350 */ 351 #undef __FUNCT__ 352 #define __FUNCT__ "DisAssemble_MPISBAIJ" 353 int DisAssemble_MPISBAIJ(Mat A) 354 { 355 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 356 Mat B = baij->B,Bnew; 357 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 358 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 359 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 360 MatScalar *a = Bbaij->a; 361 PetscScalar *atmp; 362 #if defined(PETSC_USE_MAT_SINGLE) 363 int l; 364 #endif 365 366 PetscFunctionBegin; 367 #if defined(PETSC_USE_MAT_SINGLE) 368 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 369 #endif 370 /* free stuff related to matrix-vec multiply */ 371 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 372 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 373 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 374 if (baij->colmap) { 375 #if defined (PETSC_USE_CTABLE) 376 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 377 #else 378 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 379 baij->colmap = 0; 380 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 381 #endif 382 } 383 384 /* make sure that B is assembled so we can access its values */ 385 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 386 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 387 388 /* invent new B and copy stuff over */ 389 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 390 for (i=0; i<mbs; i++) { 391 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 392 } 393 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 394 ierr = PetscFree(nz);CHKERRQ(ierr); 395 396 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 397 for (i=0; i<mbs; i++) { 398 rvals[0] = bs*i; 399 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 400 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 401 col = garray[Bbaij->j[j]]*bs; 402 for (k=0; k<bs; k++) { 403 #if defined(PETSC_USE_MAT_SINGLE) 404 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 405 #else 406 atmp = a+j*bs2 + k*bs; 407 #endif 408 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 409 col++; 410 } 411 } 412 } 413 #if defined(PETSC_USE_MAT_SINGLE) 414 ierr = PetscFree(atmp);CHKERRQ(ierr); 415 #endif 416 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 417 baij->garray = 0; 418 ierr = PetscFree(rvals);CHKERRQ(ierr); 419 PetscLogObjectMemory(A,-ec*sizeof(int)); 420 ierr = MatDestroy(B);CHKERRQ(ierr); 421 PetscLogObjectParent(A,Bnew); 422 baij->B = Bnew; 423 A->was_assembled = PETSC_FALSE; 424 PetscFunctionReturn(0); 425 } 426 427 428 429