1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 6 7 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18 IS from,to; 19 Vec gvec; 20 int rank=sbaij->rank,lsize,size=sbaij->size; 21 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 22 PetscMap vecmap; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 if (sbaij->lvec) { 27 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 28 sbaij->lvec = 0; 29 } 30 if (sbaij->Mvctx) { 31 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 32 sbaij->Mvctx = 0; 33 } 34 35 /* For the first stab we make an array as long as the number of columns */ 36 /* mark those columns that are in sbaij->B */ 37 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 38 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<mbs; i++) { 40 for (j=0; j<B->ilen[i]; j++) { 41 if (!indices[aj[B->i[i] + j]]) ec++; 42 indices[aj[B->i[i] + j] ] = 1; 43 } 44 } 45 46 /* form arrays of columns we need */ 47 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 48 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 49 ec_owner = sgarray + 2*ec; 50 51 ec = 0; 52 for (j=0; j<size; j++){ 53 for (i=owners[j]; i<owners[j+1]; i++){ 54 if (indices[i]) { 55 garray[ec] = i; 56 ec_owner[ec] = j; 57 ec++; 58 } 59 } 60 } 61 62 /* make indices now point into garray */ 63 for (i=0; i<ec; i++) indices[garray[i]] = i; 64 65 /* compact out the extra columns in B */ 66 for (i=0; i<mbs; i++) { 67 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 68 } 69 B->nbs = ec; 70 sbaij->B->n = ec*B->bs; 71 ierr = PetscFree(indices);CHKERRQ(ierr); 72 73 /* create local vector that is used to scatter into */ 74 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 75 76 /* create two temporary index sets for building scatter-gather */ 77 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 78 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 79 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 80 81 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 82 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 83 84 /* generate the scatter context 85 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 86 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 87 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 88 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 89 90 sbaij->garray = garray; 91 PetscLogObjectParent(mat,sbaij->Mvctx); 92 PetscLogObjectParent(mat,sbaij->lvec); 93 PetscLogObjectParent(mat,from); 94 PetscLogObjectParent(mat,to); 95 96 ierr = ISDestroy(from);CHKERRQ(ierr); 97 ierr = ISDestroy(to);CHKERRQ(ierr); 98 99 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 100 lsize = (mbs + ec)*bs; 101 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 102 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 103 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 104 105 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 106 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 107 108 /* x index in the IS sfrom */ 109 for (i=0; i<ec; i++) { 110 j = ec_owner[i]; 111 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 112 } 113 /* b index in the IS sfrom */ 114 k = sowners[rank]/bs + mbs; 115 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 116 117 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 119 120 /* x index in the IS sto */ 121 k = sowners[rank]/bs + mbs; 122 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 123 /* b index in the IS sto */ 124 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 125 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 127 128 /* gnerate the SBAIJ scatter context */ 129 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 130 131 /* 132 Post the receives for the first matrix vector product. We sync-chronize after 133 this on the chance that the user immediately calls MatMult() after assemblying 134 the matrix. 135 */ 136 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 137 138 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 139 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 140 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 142 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 143 144 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 145 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 146 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 147 148 ierr = PetscFree(stmp);CHKERRQ(ierr); 149 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 150 151 PetscLogObjectParent(mat,sbaij->sMvctx); 152 PetscLogObjectParent(mat,sbaij->slvec0); 153 PetscLogObjectParent(mat,sbaij->slvec1); 154 PetscLogObjectParent(mat,sbaij->slvec0b); 155 PetscLogObjectParent(mat,sbaij->slvec1a); 156 PetscLogObjectParent(mat,sbaij->slvec1b); 157 PetscLogObjectParent(mat,from); 158 PetscLogObjectParent(mat,to); 159 160 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 161 ierr = ISDestroy(from);CHKERRQ(ierr); 162 ierr = ISDestroy(to);CHKERRQ(ierr); 163 ierr = VecDestroy(gvec);CHKERRQ(ierr); 164 ierr = PetscFree(sgarray);CHKERRQ(ierr); 165 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 171 int MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 172 { 173 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 174 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 175 int i,j,*aj = B->j,ierr,ec = 0,*garray; 176 int bs = baij->bs,*stmp; 177 IS from,to; 178 Vec gvec; 179 #if defined (PETSC_USE_CTABLE) 180 PetscTable gid1_lid1; 181 PetscTablePosition tpos; 182 int gid,lid; 183 #else 184 int Nbs = baij->Nbs,*indices; 185 #endif 186 187 PetscFunctionBegin; 188 189 #if defined (PETSC_USE_CTABLE) 190 /* use a table - Mark Adams */ 191 PetscTableCreate(B->mbs,&gid1_lid1); 192 for (i=0; i<B->mbs; i++) { 193 for (j=0; j<B->ilen[i]; j++) { 194 int data,gid1 = aj[B->i[i]+j] + 1; 195 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 196 if (!data) { 197 /* one based table */ 198 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 199 } 200 } 201 } 202 /* form array of columns we need */ 203 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 204 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 205 while (tpos) { 206 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 207 gid--; lid--; 208 garray[lid] = gid; 209 } 210 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 211 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 212 for (i=0; i<ec; i++) { 213 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 214 } 215 /* compact out the extra columns in B */ 216 for (i=0; i<B->mbs; i++) { 217 for (j=0; j<B->ilen[i]; j++) { 218 int gid1 = aj[B->i[i] + j] + 1; 219 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 220 lid --; 221 aj[B->i[i]+j] = lid; 222 } 223 } 224 B->nbs = ec; 225 baij->B->n = ec*B->bs; 226 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 227 /* Mark Adams */ 228 #else 229 /* For the first stab we make an array as long as the number of columns */ 230 /* mark those columns that are in baij->B */ 231 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 232 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 233 for (i=0; i<B->mbs; i++) { 234 for (j=0; j<B->ilen[i]; j++) { 235 if (!indices[aj[B->i[i] + j]]) ec++; 236 indices[aj[B->i[i] + j] ] = 1; 237 } 238 } 239 240 /* form array of columns we need */ 241 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 242 ec = 0; 243 for (i=0; i<Nbs; i++) { 244 if (indices[i]) { 245 garray[ec++] = i; 246 } 247 } 248 249 /* make indices now point into garray */ 250 for (i=0; i<ec; i++) { 251 indices[garray[i]] = i; 252 } 253 254 /* compact out the extra columns in B */ 255 for (i=0; i<B->mbs; i++) { 256 for (j=0; j<B->ilen[i]; j++) { 257 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 258 } 259 } 260 B->nbs = ec; 261 baij->B->n = ec*B->bs; 262 ierr = PetscFree(indices);CHKERRQ(ierr); 263 #endif 264 265 /* create local vector that is used to scatter into */ 266 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 267 268 /* create two temporary index sets for building scatter-gather */ 269 for (i=0; i<ec; i++) { 270 garray[i] = bs*garray[i]; 271 } 272 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 273 for (i=0; i<ec; i++) { 274 garray[i] = garray[i]/bs; 275 } 276 277 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 278 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 279 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 280 ierr = PetscFree(stmp);CHKERRQ(ierr); 281 282 /* create temporary global vector to generate scatter context */ 283 /* this is inefficient, but otherwise we must do either 284 1) save garray until the first actual scatter when the vector is known or 285 2) have another way of generating a scatter context without a vector.*/ 286 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 287 288 /* gnerate the scatter context */ 289 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 290 291 /* 292 Post the receives for the first matrix vector product. We sync-chronize after 293 this on the chance that the user immediately calls MatMult() after assemblying 294 the matrix. 295 */ 296 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 297 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 298 299 PetscLogObjectParent(mat,baij->Mvctx); 300 PetscLogObjectParent(mat,baij->lvec); 301 PetscLogObjectParent(mat,from); 302 PetscLogObjectParent(mat,to); 303 baij->garray = garray; 304 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 305 ierr = ISDestroy(from);CHKERRQ(ierr); 306 ierr = ISDestroy(to);CHKERRQ(ierr); 307 ierr = VecDestroy(gvec);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 312 /* 313 Takes the local part of an already assembled MPIBAIJ matrix 314 and disassembles it. This is to allow new nonzeros into the matrix 315 that require more communication in the matrix vector multiply. 316 Thus certain data-structures must be rebuilt. 317 318 Kind of slow! But that's what application programmers get when 319 they are sloppy. 320 */ 321 #undef __FUNCT__ 322 #define __FUNCT__ "DisAssemble_MPISBAIJ" 323 int DisAssemble_MPISBAIJ(Mat A) 324 { 325 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 326 Mat B = baij->B,Bnew; 327 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 328 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 329 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 330 MatScalar *a = Bbaij->a; 331 PetscScalar *atmp; 332 #if defined(PETSC_USE_MAT_SINGLE) 333 int l; 334 #endif 335 336 PetscFunctionBegin; 337 #if defined(PETSC_USE_MAT_SINGLE) 338 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 339 #endif 340 /* free stuff related to matrix-vec multiply */ 341 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 342 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 343 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 344 if (baij->colmap) { 345 #if defined (PETSC_USE_CTABLE) 346 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 347 #else 348 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 349 baij->colmap = 0; 350 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 351 #endif 352 } 353 354 /* make sure that B is assembled so we can access its values */ 355 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 356 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 358 /* invent new B and copy stuff over */ 359 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 360 for (i=0; i<mbs; i++) { 361 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 362 } 363 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 364 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 365 ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr); 366 ierr = PetscFree(nz);CHKERRQ(ierr); 367 368 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 369 for (i=0; i<mbs; i++) { 370 rvals[0] = bs*i; 371 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 372 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 373 col = garray[Bbaij->j[j]]*bs; 374 for (k=0; k<bs; k++) { 375 #if defined(PETSC_USE_MAT_SINGLE) 376 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 377 #else 378 atmp = a+j*bs2 + k*bs; 379 #endif 380 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 381 col++; 382 } 383 } 384 } 385 #if defined(PETSC_USE_MAT_SINGLE) 386 ierr = PetscFree(atmp);CHKERRQ(ierr); 387 #endif 388 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 389 baij->garray = 0; 390 ierr = PetscFree(rvals);CHKERRQ(ierr); 391 PetscLogObjectMemory(A,-ec*sizeof(int)); 392 ierr = MatDestroy(B);CHKERRQ(ierr); 393 PetscLogObjectParent(A,Bnew); 394 baij->B = Bnew; 395 A->was_assembled = PETSC_FALSE; 396 PetscFunctionReturn(0); 397 } 398 399 400 401