1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 6 7 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray,*sgarray; 17 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 18 IS from,to; 19 Vec gvec; 20 int rank=sbaij->rank,lsize,size=sbaij->size; 21 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 22 PetscMap vecmap; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 if (sbaij->lvec) { 27 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 28 sbaij->lvec = 0; 29 } 30 if (sbaij->Mvctx) { 31 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 32 sbaij->Mvctx = 0; 33 } 34 35 /* For the first stab we make an array as long as the number of columns */ 36 /* mark those columns that are in sbaij->B */ 37 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 38 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<mbs; i++) { 40 for (j=0; j<B->ilen[i]; j++) { 41 if (!indices[aj[B->i[i] + j]]) ec++; 42 indices[aj[B->i[i] + j] ] = 1; 43 } 44 } 45 46 /* form arrays of columns we need */ 47 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 48 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 49 ec_owner = sgarray + 2*ec; 50 51 ec = 0; 52 for (j=0; j<size; j++){ 53 for (i=owners[j]; i<owners[j+1]; i++){ 54 if (indices[i]) { 55 garray[ec] = i; 56 ec_owner[ec] = j; 57 ec++; 58 } 59 } 60 } 61 62 /* make indices now point into garray */ 63 for (i=0; i<ec; i++) indices[garray[i]] = i; 64 65 /* compact out the extra columns in B */ 66 for (i=0; i<mbs; i++) { 67 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 68 } 69 B->nbs = ec; 70 sbaij->B->n = ec*B->bs; 71 ierr = PetscFree(indices);CHKERRQ(ierr); 72 73 /* create local vector that is used to scatter into */ 74 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 75 76 /* create two temporary index sets for building scatter-gather */ 77 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 78 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 79 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 80 81 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 82 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 83 84 /* generate the scatter context 85 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 86 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 87 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 88 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 89 90 sbaij->garray = garray; 91 PetscLogObjectParent(mat,sbaij->Mvctx); 92 PetscLogObjectParent(mat,sbaij->lvec); 93 PetscLogObjectParent(mat,from); 94 PetscLogObjectParent(mat,to); 95 96 ierr = ISDestroy(from);CHKERRQ(ierr); 97 ierr = ISDestroy(to);CHKERRQ(ierr); 98 99 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 100 lsize = (mbs + ec)*bs; 101 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 102 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 103 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 104 105 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 106 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 107 108 /* x index in the IS sfrom */ 109 for (i=0; i<ec; i++) { 110 j = ec_owner[i]; 111 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 112 } 113 /* b index in the IS sfrom */ 114 k = sowners[rank]/bs + mbs; 115 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 116 117 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 119 120 /* x index in the IS sto */ 121 k = sowners[rank]/bs + mbs; 122 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 123 /* b index in the IS sto */ 124 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 125 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 127 128 /* gnerate the SBAIJ scatter context */ 129 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 130 131 /* 132 Post the receives for the first matrix vector product. We sync-chronize after 133 this on the chance that the user immediately calls MatMult() after assemblying 134 the matrix. 135 */ 136 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 137 138 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 139 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 140 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 142 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 143 144 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 145 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 146 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 147 148 ierr = PetscFree(stmp);CHKERRQ(ierr); 149 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 150 151 PetscLogObjectParent(mat,sbaij->sMvctx); 152 PetscLogObjectParent(mat,sbaij->slvec0); 153 PetscLogObjectParent(mat,sbaij->slvec1); 154 PetscLogObjectParent(mat,sbaij->slvec0b); 155 PetscLogObjectParent(mat,sbaij->slvec1a); 156 PetscLogObjectParent(mat,sbaij->slvec1b); 157 PetscLogObjectParent(mat,from); 158 PetscLogObjectParent(mat,to); 159 160 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 161 ierr = ISDestroy(from);CHKERRQ(ierr); 162 ierr = ISDestroy(to);CHKERRQ(ierr); 163 ierr = VecDestroy(gvec);CHKERRQ(ierr); 164 ierr = PetscFree(sgarray);CHKERRQ(ierr); 165 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 171 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 172 { 173 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 174 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 175 int i,j,*aj = B->j,ierr,ec = 0,*garray; 176 int bs = baij->bs,*stmp; 177 IS from,to; 178 Vec gvec; 179 #if defined (PETSC_USE_CTABLE) 180 PetscTable gid1_lid1; 181 PetscTablePosition tpos; 182 int gid,lid; 183 #else 184 int Nbs = baij->Nbs,*indices; 185 #endif 186 187 PetscFunctionBegin; 188 189 #if defined (PETSC_USE_CTABLE) 190 /* use a table - Mark Adams */ 191 PetscTableCreate(B->mbs,&gid1_lid1); 192 for (i=0; i<B->mbs; i++) { 193 for (j=0; j<B->ilen[i]; j++) { 194 int data,gid1 = aj[B->i[i]+j] + 1; 195 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 196 if (!data) { 197 /* one based table */ 198 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 199 } 200 } 201 } 202 /* form array of columns we need */ 203 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 204 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 205 while (tpos) { 206 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 207 gid--; lid--; 208 garray[lid] = gid; 209 } 210 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 211 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 212 for (i=0; i<ec; i++) { 213 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 214 } 215 /* compact out the extra columns in B */ 216 for (i=0; i<B->mbs; i++) { 217 for (j=0; j<B->ilen[i]; j++) { 218 int gid1 = aj[B->i[i] + j] + 1; 219 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 220 lid --; 221 aj[B->i[i]+j] = lid; 222 } 223 } 224 B->nbs = ec; 225 baij->B->n = ec*B->bs; 226 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 227 /* Mark Adams */ 228 #else 229 /* For the first stab we make an array as long as the number of columns */ 230 /* mark those columns that are in baij->B */ 231 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 232 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 233 for (i=0; i<B->mbs; i++) { 234 for (j=0; j<B->ilen[i]; j++) { 235 if (!indices[aj[B->i[i] + j]]) ec++; 236 indices[aj[B->i[i] + j] ] = 1; 237 } 238 } 239 240 /* form array of columns we need */ 241 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 242 ec = 0; 243 for (i=0; i<Nbs; i++) { 244 if (indices[i]) { 245 garray[ec++] = i; 246 } 247 } 248 249 /* make indices now point into garray */ 250 for (i=0; i<ec; i++) { 251 indices[garray[i]] = i; 252 } 253 254 /* compact out the extra columns in B */ 255 for (i=0; i<B->mbs; i++) { 256 for (j=0; j<B->ilen[i]; j++) { 257 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 258 } 259 } 260 B->nbs = ec; 261 baij->B->n = ec*B->bs; 262 ierr = PetscFree(indices);CHKERRQ(ierr); 263 #endif 264 265 /* create local vector that is used to scatter into */ 266 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 267 268 /* create two temporary index sets for building scatter-gather */ 269 for (i=0; i<ec; i++) { 270 garray[i] = bs*garray[i]; 271 } 272 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 273 for (i=0; i<ec; i++) { 274 garray[i] = garray[i]/bs; 275 } 276 277 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 278 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 279 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 280 ierr = PetscFree(stmp);CHKERRQ(ierr); 281 282 /* create temporary global vector to generate scatter context */ 283 /* this is inefficient, but otherwise we must do either 284 1) save garray until the first actual scatter when the vector is known or 285 2) have another way of generating a scatter context without a vector.*/ 286 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 287 288 /* gnerate the scatter context */ 289 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 290 291 /* 292 Post the receives for the first matrix vector product. We sync-chronize after 293 this on the chance that the user immediately calls MatMult() after assemblying 294 the matrix. 295 */ 296 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 297 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 298 299 PetscLogObjectParent(mat,baij->Mvctx); 300 PetscLogObjectParent(mat,baij->lvec); 301 PetscLogObjectParent(mat,from); 302 PetscLogObjectParent(mat,to); 303 baij->garray = garray; 304 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 305 ierr = ISDestroy(from);CHKERRQ(ierr); 306 ierr = ISDestroy(to);CHKERRQ(ierr); 307 ierr = VecDestroy(gvec);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 312 /* 313 Takes the local part of an already assembled MPIBAIJ matrix 314 and disassembles it. This is to allow new nonzeros into the matrix 315 that require more communication in the matrix vector multiply. 316 Thus certain data-structures must be rebuilt. 317 318 Kind of slow! But that's what application programmers get when 319 they are sloppy. 320 */ 321 #undef __FUNCT__ 322 #define __FUNCT__ "DisAssemble_MPISBAIJ" 323 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 324 { 325 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 326 Mat B = baij->B,Bnew; 327 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 328 PetscErrorCode ierr; 329 int i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 330 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 331 MatScalar *a = Bbaij->a; 332 PetscScalar *atmp; 333 #if defined(PETSC_USE_MAT_SINGLE) 334 int l; 335 #endif 336 337 PetscFunctionBegin; 338 #if defined(PETSC_USE_MAT_SINGLE) 339 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 340 #endif 341 /* free stuff related to matrix-vec multiply */ 342 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 343 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 344 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 345 if (baij->colmap) { 346 #if defined (PETSC_USE_CTABLE) 347 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 348 #else 349 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 350 baij->colmap = 0; 351 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 352 #endif 353 } 354 355 /* make sure that B is assembled so we can access its values */ 356 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 357 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 359 /* invent new B and copy stuff over */ 360 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 361 for (i=0; i<mbs; i++) { 362 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 363 } 364 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 365 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 366 ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr); 367 ierr = PetscFree(nz);CHKERRQ(ierr); 368 369 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 370 for (i=0; i<mbs; i++) { 371 rvals[0] = bs*i; 372 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 373 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 374 col = garray[Bbaij->j[j]]*bs; 375 for (k=0; k<bs; k++) { 376 #if defined(PETSC_USE_MAT_SINGLE) 377 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 378 #else 379 atmp = a+j*bs2 + k*bs; 380 #endif 381 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 382 col++; 383 } 384 } 385 } 386 #if defined(PETSC_USE_MAT_SINGLE) 387 ierr = PetscFree(atmp);CHKERRQ(ierr); 388 #endif 389 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 390 baij->garray = 0; 391 ierr = PetscFree(rvals);CHKERRQ(ierr); 392 PetscLogObjectMemory(A,-ec*sizeof(int)); 393 ierr = MatDestroy(B);CHKERRQ(ierr); 394 PetscLogObjectParent(A,Bnew); 395 baij->B = Bnew; 396 A->was_assembled = PETSC_FALSE; 397 PetscFunctionReturn(0); 398 } 399 400 401 402