1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->sMvctx) { 28 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 29 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 30 sbaij->sMvctx = 0; 31 } 32 33 /* For the first stab we make an array as long as the number of columns */ 34 /* mark those columns that are in sbaij->B */ 35 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 36 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 37 for (i=0; i<mbs; i++) { 38 for (j=0; j<B->ilen[i]; j++) { 39 if (!indices[aj[B->i[i] + j]]) ec++; 40 indices[aj[B->i[i] + j] ] = 1; 41 } 42 } 43 44 /* form arrays of columns we need */ 45 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 47 48 ec = 0; 49 for (j=0; j<size; j++){ 50 for (i=owners[j]; i<owners[j+1]; i++){ 51 if (indices[i]) { 52 garray[ec] = i; 53 ec_owner[ec] = j; 54 ec++; 55 } 56 } 57 } 58 59 /* make indices now point into garray */ 60 for (i=0; i<ec; i++) indices[garray[i]] = i; 61 62 /* compact out the extra columns in B */ 63 for (i=0; i<mbs; i++) { 64 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 65 } 66 B->nbs = ec; 67 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscFree(indices);CHKERRQ(ierr); 70 71 /* create local vector that is used to scatter into */ 72 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 73 74 /* create two temporary index sets for building scatter-gather */ 75 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 76 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 77 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 78 79 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 81 82 /* generate the scatter context 83 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 84 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 85 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 86 ierr = VecDestroy(gvec);CHKERRQ(ierr); 87 88 sbaij->garray = garray; 89 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 91 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 92 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 93 94 ierr = ISDestroy(from);CHKERRQ(ierr); 95 ierr = ISDestroy(to);CHKERRQ(ierr); 96 97 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 98 lsize = (mbs + ec)*bs; 99 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 100 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 101 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 102 103 sowners = sbaij->slvec0->map->range; 104 105 /* x index in the IS sfrom */ 106 for (i=0; i<ec; i++) { 107 j = ec_owner[i]; 108 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 109 } 110 /* b index in the IS sfrom */ 111 k = sowners[rank]/bs + mbs; 112 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 113 114 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 116 117 /* x index in the IS sto */ 118 k = sowners[rank]/bs + mbs; 119 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 120 /* b index in the IS sto */ 121 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 122 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 124 125 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 126 127 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 128 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 131 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 132 133 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 134 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 135 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 136 137 ierr = PetscFree(stmp);CHKERRQ(ierr); 138 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 139 140 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 147 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 148 149 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 150 ierr = ISDestroy(from);CHKERRQ(ierr); 151 ierr = ISDestroy(to);CHKERRQ(ierr); 152 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 158 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 159 { 160 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 161 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 162 PetscErrorCode ierr; 163 PetscInt i,j,*aj = B->j,ec = 0,*garray; 164 PetscInt bs = mat->rmap->bs,*stmp; 165 IS from,to; 166 Vec gvec; 167 #if defined (PETSC_USE_CTABLE) 168 PetscTable gid1_lid1; 169 PetscTablePosition tpos; 170 PetscInt gid,lid; 171 #else 172 PetscInt Nbs = baij->Nbs,*indices; 173 #endif 174 175 PetscFunctionBegin; 176 #if defined (PETSC_USE_CTABLE) 177 /* use a table - Mark Adams */ 178 PetscTableCreate(B->mbs,&gid1_lid1); 179 for (i=0; i<B->mbs; i++) { 180 for (j=0; j<B->ilen[i]; j++) { 181 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 182 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 183 if (!data) { 184 /* one based table */ 185 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 186 } 187 } 188 } 189 /* form array of columns we need */ 190 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 191 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 192 while (tpos) { 193 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 194 gid--; lid--; 195 garray[lid] = gid; 196 } 197 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 198 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 199 for (i=0; i<ec; i++) { 200 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 201 } 202 /* compact out the extra columns in B */ 203 for (i=0; i<B->mbs; i++) { 204 for (j=0; j<B->ilen[i]; j++) { 205 PetscInt gid1 = aj[B->i[i] + j] + 1; 206 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 207 lid --; 208 aj[B->i[i]+j] = lid; 209 } 210 } 211 B->nbs = ec; 212 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 213 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 214 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 215 /* Mark Adams */ 216 #else 217 /* For the first stab we make an array as long as the number of columns */ 218 /* mark those columns that are in baij->B */ 219 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 220 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 221 for (i=0; i<B->mbs; i++) { 222 for (j=0; j<B->ilen[i]; j++) { 223 if (!indices[aj[B->i[i] + j]]) ec++; 224 indices[aj[B->i[i] + j] ] = 1; 225 } 226 } 227 228 /* form array of columns we need */ 229 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 230 ec = 0; 231 for (i=0; i<Nbs; i++) { 232 if (indices[i]) { 233 garray[ec++] = i; 234 } 235 } 236 237 /* make indices now point into garray */ 238 for (i=0; i<ec; i++) { 239 indices[garray[i]] = i; 240 } 241 242 /* compact out the extra columns in B */ 243 for (i=0; i<B->mbs; i++) { 244 for (j=0; j<B->ilen[i]; j++) { 245 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 246 } 247 } 248 B->nbs = ec; 249 baij->B->cmap->n = ec*mat->rmap->bs; 250 ierr = PetscFree(indices);CHKERRQ(ierr); 251 #endif 252 253 /* create local vector that is used to scatter into */ 254 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 255 256 /* create two temporary index sets for building scatter-gather */ 257 for (i=0; i<ec; i++) { 258 garray[i] = bs*garray[i]; 259 } 260 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 261 for (i=0; i<ec; i++) { 262 garray[i] = garray[i]/bs; 263 } 264 265 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 266 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 267 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 268 ierr = PetscFree(stmp);CHKERRQ(ierr); 269 270 /* create temporary global vector to generate scatter context */ 271 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 272 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 273 ierr = VecDestroy(gvec);CHKERRQ(ierr); 274 275 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 276 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 277 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 278 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 279 baij->garray = garray; 280 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 281 ierr = ISDestroy(from);CHKERRQ(ierr); 282 ierr = ISDestroy(to);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 /* 287 Takes the local part of an already assembled MPISBAIJ matrix 288 and disassembles it. This is to allow new nonzeros into the matrix 289 that require more communication in the matrix vector multiply. 290 Thus certain data-structures must be rebuilt. 291 292 Kind of slow! But that's what application programmers get when 293 they are sloppy. 294 */ 295 #undef __FUNCT__ 296 #define __FUNCT__ "DisAssemble_MPISBAIJ" 297 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 298 { 299 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 300 Mat B = baij->B,Bnew; 301 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 302 PetscErrorCode ierr; 303 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 304 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 305 MatScalar *a = Bbaij->a; 306 PetscScalar *atmp; 307 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 308 PetscInt l; 309 #endif 310 311 PetscFunctionBegin; 312 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 313 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 314 #endif 315 /* free stuff related to matrix-vec multiply */ 316 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 317 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 318 baij->lvec = 0; 319 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 320 baij->Mvctx = 0; 321 322 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 323 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 324 baij->slvec0 = 0; 325 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 326 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 327 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 328 baij->slvec1 = 0; 329 330 if (baij->colmap) { 331 #if defined (PETSC_USE_CTABLE) 332 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 333 #else 334 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 335 baij->colmap = 0; 336 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 337 #endif 338 } 339 340 /* make sure that B is assembled so we can access its values */ 341 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 344 /* invent new B and copy stuff over */ 345 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 346 for (i=0; i<mbs; i++) { 347 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 348 } 349 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 350 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 351 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 352 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 353 ierr = PetscFree(nz);CHKERRQ(ierr); 354 355 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 356 for (i=0; i<mbs; i++) { 357 rvals[0] = bs*i; 358 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 359 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 360 col = garray[Bbaij->j[j]]*bs; 361 for (k=0; k<bs; k++) { 362 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 363 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 364 #else 365 atmp = a+j*bs2 + k*bs; 366 #endif 367 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 368 col++; 369 } 370 } 371 } 372 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 373 ierr = PetscFree(atmp);CHKERRQ(ierr); 374 #endif 375 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 376 baij->garray = 0; 377 ierr = PetscFree(rvals);CHKERRQ(ierr); 378 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 379 ierr = MatDestroy(B);CHKERRQ(ierr); 380 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 381 baij->B = Bnew; 382 A->was_assembled = PETSC_FALSE; 383 PetscFunctionReturn(0); 384 } 385 386 387 388