1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->rmap.bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->sMvctx) { 28 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 29 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 30 sbaij->sMvctx = 0; 31 } 32 33 /* For the first stab we make an array as long as the number of columns */ 34 /* mark those columns that are in sbaij->B */ 35 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 36 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 37 for (i=0; i<mbs; i++) { 38 for (j=0; j<B->ilen[i]; j++) { 39 if (!indices[aj[B->i[i] + j]]) ec++; 40 indices[aj[B->i[i] + j] ] = 1; 41 } 42 } 43 44 /* form arrays of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr); 47 ec_owner = sgarray + 2*ec; 48 49 ec = 0; 50 for (j=0; j<size; j++){ 51 for (i=owners[j]; i<owners[j+1]; i++){ 52 if (indices[i]) { 53 garray[ec] = i; 54 ec_owner[ec] = j; 55 ec++; 56 } 57 } 58 } 59 60 /* make indices now point into garray */ 61 for (i=0; i<ec; i++) indices[garray[i]] = i; 62 63 /* compact out the extra columns in B */ 64 for (i=0; i<mbs; i++) { 65 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 66 } 67 B->nbs = ec; 68 sbaij->B->cmap.n = sbaij->B->cmap.N = ec*mat->rmap.bs; 69 ierr = PetscMapInitialize(sbaij->B->comm,&(sbaij->B->cmap));CHKERRQ(ierr); 70 ierr = PetscFree(indices);CHKERRQ(ierr); 71 72 /* create local vector that is used to scatter into */ 73 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 74 75 /* create two temporary index sets for building scatter-gather */ 76 ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 77 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 78 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 79 80 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 81 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 82 83 /* generate the scatter context 84 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 85 ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr); 86 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 87 88 sbaij->garray = garray; 89 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 91 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 92 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 93 94 ierr = ISDestroy(from);CHKERRQ(ierr); 95 ierr = ISDestroy(to);CHKERRQ(ierr); 96 97 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 98 lsize = (mbs + ec)*bs; 99 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 100 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 101 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 102 103 sowners = sbaij->slvec0->map.range; 104 105 /* x index in the IS sfrom */ 106 for (i=0; i<ec; i++) { 107 j = ec_owner[i]; 108 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 109 } 110 /* b index in the IS sfrom */ 111 k = sowners[rank]/bs + mbs; 112 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 113 114 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 116 117 /* x index in the IS sto */ 118 k = sowners[rank]/bs + mbs; 119 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 120 /* b index in the IS sto */ 121 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 122 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 124 125 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 126 127 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 128 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 131 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 132 133 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 134 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 135 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 136 137 ierr = PetscFree(stmp);CHKERRQ(ierr); 138 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 139 140 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 147 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 148 149 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 150 ierr = ISDestroy(from);CHKERRQ(ierr); 151 ierr = ISDestroy(to);CHKERRQ(ierr); 152 ierr = VecDestroy(gvec);CHKERRQ(ierr); 153 ierr = PetscFree(sgarray);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 159 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 160 { 161 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 162 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 163 PetscErrorCode ierr; 164 PetscInt i,j,*aj = B->j,ec = 0,*garray; 165 PetscInt bs = mat->rmap.bs,*stmp; 166 IS from,to; 167 Vec gvec; 168 #if defined (PETSC_USE_CTABLE) 169 PetscTable gid1_lid1; 170 PetscTablePosition tpos; 171 PetscInt gid,lid; 172 #else 173 PetscInt Nbs = baij->Nbs,*indices; 174 #endif 175 176 PetscFunctionBegin; 177 #if defined (PETSC_USE_CTABLE) 178 /* use a table - Mark Adams */ 179 PetscTableCreate(B->mbs,&gid1_lid1); 180 for (i=0; i<B->mbs; i++) { 181 for (j=0; j<B->ilen[i]; j++) { 182 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 183 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 184 if (!data) { 185 /* one based table */ 186 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 187 } 188 } 189 } 190 /* form array of columns we need */ 191 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 192 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 193 while (tpos) { 194 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 195 gid--; lid--; 196 garray[lid] = gid; 197 } 198 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 199 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 200 for (i=0; i<ec; i++) { 201 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 202 } 203 /* compact out the extra columns in B */ 204 for (i=0; i<B->mbs; i++) { 205 for (j=0; j<B->ilen[i]; j++) { 206 PetscInt gid1 = aj[B->i[i] + j] + 1; 207 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 208 lid --; 209 aj[B->i[i]+j] = lid; 210 } 211 } 212 B->nbs = ec; 213 baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs; 214 ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr); 215 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 216 /* Mark Adams */ 217 #else 218 /* For the first stab we make an array as long as the number of columns */ 219 /* mark those columns that are in baij->B */ 220 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 221 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 222 for (i=0; i<B->mbs; i++) { 223 for (j=0; j<B->ilen[i]; j++) { 224 if (!indices[aj[B->i[i] + j]]) ec++; 225 indices[aj[B->i[i] + j] ] = 1; 226 } 227 } 228 229 /* form array of columns we need */ 230 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 231 ec = 0; 232 for (i=0; i<Nbs; i++) { 233 if (indices[i]) { 234 garray[ec++] = i; 235 } 236 } 237 238 /* make indices now point into garray */ 239 for (i=0; i<ec; i++) { 240 indices[garray[i]] = i; 241 } 242 243 /* compact out the extra columns in B */ 244 for (i=0; i<B->mbs; i++) { 245 for (j=0; j<B->ilen[i]; j++) { 246 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 247 } 248 } 249 B->nbs = ec; 250 baij->B->cmap.n = ec*mat->rmap.bs; 251 ierr = PetscFree(indices);CHKERRQ(ierr); 252 #endif 253 254 /* create local vector that is used to scatter into */ 255 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 256 257 /* create two temporary index sets for building scatter-gather */ 258 for (i=0; i<ec; i++) { 259 garray[i] = bs*garray[i]; 260 } 261 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 262 for (i=0; i<ec; i++) { 263 garray[i] = garray[i]/bs; 264 } 265 266 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 267 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 268 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 269 ierr = PetscFree(stmp);CHKERRQ(ierr); 270 271 /* create temporary global vector to generate scatter context */ 272 /* this is inefficient, but otherwise we must do either 273 1) save garray until the first actual scatter when the vector is known or 274 2) have another way of generating a scatter context without a vector.*/ 275 ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr); 276 277 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 278 279 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 280 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 281 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 282 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 283 baij->garray = garray; 284 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 285 ierr = ISDestroy(from);CHKERRQ(ierr); 286 ierr = ISDestroy(to);CHKERRQ(ierr); 287 ierr = VecDestroy(gvec);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* 292 Takes the local part of an already assembled MPISBAIJ matrix 293 and disassembles it. This is to allow new nonzeros into the matrix 294 that require more communication in the matrix vector multiply. 295 Thus certain data-structures must be rebuilt. 296 297 Kind of slow! But that's what application programmers get when 298 they are sloppy. 299 */ 300 #undef __FUNCT__ 301 #define __FUNCT__ "DisAssemble_MPISBAIJ" 302 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 303 { 304 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 305 Mat B = baij->B,Bnew; 306 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 307 PetscErrorCode ierr; 308 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray; 309 PetscInt k,bs=A->rmap.bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap.n; 310 MatScalar *a = Bbaij->a; 311 PetscScalar *atmp; 312 #if defined(PETSC_USE_MAT_SINGLE) 313 PetscInt l; 314 #endif 315 316 PetscFunctionBegin; 317 #if defined(PETSC_USE_MAT_SINGLE) 318 ierr = PetscMalloc(A->rmap.bs*sizeof(PetscScalar),&atmp); 319 #endif 320 /* free stuff related to matrix-vec multiply */ 321 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 322 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 323 baij->lvec = 0; 324 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 325 baij->Mvctx = 0; 326 327 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 328 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 329 baij->slvec0 = 0; 330 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 331 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 332 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 333 baij->slvec1 = 0; 334 335 if (baij->colmap) { 336 #if defined (PETSC_USE_CTABLE) 337 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 338 #else 339 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 340 baij->colmap = 0; 341 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 342 #endif 343 } 344 345 /* make sure that B is assembled so we can access its values */ 346 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 349 /* invent new B and copy stuff over */ 350 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 351 for (i=0; i<mbs; i++) { 352 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 353 } 354 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 355 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 356 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 357 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr); 358 ierr = PetscFree(nz);CHKERRQ(ierr); 359 360 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 361 for (i=0; i<mbs; i++) { 362 rvals[0] = bs*i; 363 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 364 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 365 col = garray[Bbaij->j[j]]*bs; 366 for (k=0; k<bs; k++) { 367 #if defined(PETSC_USE_MAT_SINGLE) 368 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 369 #else 370 atmp = a+j*bs2 + k*bs; 371 #endif 372 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 373 col++; 374 } 375 } 376 } 377 #if defined(PETSC_USE_MAT_SINGLE) 378 ierr = PetscFree(atmp);CHKERRQ(ierr); 379 #endif 380 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 381 baij->garray = 0; 382 ierr = PetscFree(rvals);CHKERRQ(ierr); 383 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = MatDestroy(B);CHKERRQ(ierr); 385 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 386 baij->B = Bnew; 387 A->was_assembled = PETSC_FALSE; 388 PetscFunctionReturn(0); 389 } 390 391 392 393