1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->sMvctx) { 28 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 29 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 30 sbaij->sMvctx = 0; 31 } 32 33 /* For the first stab we make an array as long as the number of columns */ 34 /* mark those columns that are in sbaij->B */ 35 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 36 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 37 for (i=0; i<mbs; i++) { 38 for (j=0; j<B->ilen[i]; j++) { 39 if (!indices[aj[B->i[i] + j]]) ec++; 40 indices[aj[B->i[i] + j] ] = 1; 41 } 42 } 43 44 /* form arrays of columns we need */ 45 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 47 48 ec = 0; 49 for (j=0; j<size; j++){ 50 for (i=owners[j]; i<owners[j+1]; i++){ 51 if (indices[i]) { 52 garray[ec] = i; 53 ec_owner[ec] = j; 54 ec++; 55 } 56 } 57 } 58 59 /* make indices now point into garray */ 60 for (i=0; i<ec; i++) indices[garray[i]] = i; 61 62 /* compact out the extra columns in B */ 63 for (i=0; i<mbs; i++) { 64 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 65 } 66 B->nbs = ec; 67 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscFree(indices);CHKERRQ(ierr); 70 71 /* create local vector that is used to scatter into */ 72 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 73 74 /* create two temporary index sets for building scatter-gather */ 75 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 76 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 77 for (i=0; i<ec; i++) { stmp[i] = i; } 78 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 79 80 /* generate the scatter context 81 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 82 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 83 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 84 ierr = VecDestroy(gvec);CHKERRQ(ierr); 85 86 sbaij->garray = garray; 87 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 88 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 89 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 91 92 ierr = ISDestroy(from);CHKERRQ(ierr); 93 ierr = ISDestroy(to);CHKERRQ(ierr); 94 95 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 96 lsize = (mbs + ec)*bs; 97 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 98 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 99 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 100 101 sowners = sbaij->slvec0->map->range; 102 103 /* x index in the IS sfrom */ 104 for (i=0; i<ec; i++) { 105 j = ec_owner[i]; 106 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 107 } 108 /* b index in the IS sfrom */ 109 k = sowners[rank]/bs + mbs; 110 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 111 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 112 113 /* x index in the IS sto */ 114 k = sowners[rank]/bs + mbs; 115 for (i=0; i<ec; i++) stmp[i] = (k + i); 116 /* b index in the IS sto */ 117 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 118 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 120 121 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 122 123 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 124 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 127 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 128 129 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 131 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 132 133 ierr = PetscFree(stmp);CHKERRQ(ierr); 134 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 135 136 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 140 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 144 145 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 146 ierr = ISDestroy(from);CHKERRQ(ierr); 147 ierr = ISDestroy(to);CHKERRQ(ierr); 148 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 154 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 155 { 156 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 157 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 158 PetscErrorCode ierr; 159 PetscInt i,j,*aj = B->j,ec = 0,*garray; 160 PetscInt bs = mat->rmap->bs,*stmp; 161 IS from,to; 162 Vec gvec; 163 #if defined (PETSC_USE_CTABLE) 164 PetscTable gid1_lid1; 165 PetscTablePosition tpos; 166 PetscInt gid,lid; 167 #else 168 PetscInt Nbs = baij->Nbs,*indices; 169 #endif 170 171 PetscFunctionBegin; 172 #if defined (PETSC_USE_CTABLE) 173 /* use a table - Mark Adams */ 174 PetscTableCreate(B->mbs,&gid1_lid1); 175 for (i=0; i<B->mbs; i++) { 176 for (j=0; j<B->ilen[i]; j++) { 177 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 178 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 179 if (!data) { 180 /* one based table */ 181 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 182 } 183 } 184 } 185 /* form array of columns we need */ 186 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 187 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 188 while (tpos) { 189 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 190 gid--; lid--; 191 garray[lid] = gid; 192 } 193 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 194 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 195 for (i=0; i<ec; i++) { 196 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 197 } 198 /* compact out the extra columns in B */ 199 for (i=0; i<B->mbs; i++) { 200 for (j=0; j<B->ilen[i]; j++) { 201 PetscInt gid1 = aj[B->i[i] + j] + 1; 202 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 203 lid --; 204 aj[B->i[i]+j] = lid; 205 } 206 } 207 B->nbs = ec; 208 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 209 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 210 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 211 /* Mark Adams */ 212 #else 213 /* For the first stab we make an array as long as the number of columns */ 214 /* mark those columns that are in baij->B */ 215 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 216 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 217 for (i=0; i<B->mbs; i++) { 218 for (j=0; j<B->ilen[i]; j++) { 219 if (!indices[aj[B->i[i] + j]]) ec++; 220 indices[aj[B->i[i] + j] ] = 1; 221 } 222 } 223 224 /* form array of columns we need */ 225 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 226 ec = 0; 227 for (i=0; i<Nbs; i++) { 228 if (indices[i]) { 229 garray[ec++] = i; 230 } 231 } 232 233 /* make indices now point into garray */ 234 for (i=0; i<ec; i++) { 235 indices[garray[i]] = i; 236 } 237 238 /* compact out the extra columns in B */ 239 for (i=0; i<B->mbs; i++) { 240 for (j=0; j<B->ilen[i]; j++) { 241 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 242 } 243 } 244 B->nbs = ec; 245 baij->B->cmap->n = ec*mat->rmap->bs; 246 ierr = PetscFree(indices);CHKERRQ(ierr); 247 #endif 248 249 /* create local vector that is used to scatter into */ 250 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 251 252 /* create two temporary index sets for building scatter-gather */ 253 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 254 255 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 256 for (i=0; i<ec; i++) { stmp[i] = i; } 257 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 258 259 /* create temporary global vector to generate scatter context */ 260 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 261 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 262 ierr = VecDestroy(gvec);CHKERRQ(ierr); 263 264 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 265 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 266 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 267 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 268 baij->garray = garray; 269 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 270 ierr = ISDestroy(from);CHKERRQ(ierr); 271 ierr = ISDestroy(to);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 /* 276 Takes the local part of an already assembled MPISBAIJ matrix 277 and disassembles it. This is to allow new nonzeros into the matrix 278 that require more communication in the matrix vector multiply. 279 Thus certain data-structures must be rebuilt. 280 281 Kind of slow! But that's what application programmers get when 282 they are sloppy. 283 */ 284 #undef __FUNCT__ 285 #define __FUNCT__ "DisAssemble_MPISBAIJ" 286 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 289 Mat B = baij->B,Bnew; 290 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 291 PetscErrorCode ierr; 292 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 293 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 294 MatScalar *a = Bbaij->a; 295 PetscScalar *atmp; 296 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 297 PetscInt l; 298 #endif 299 300 PetscFunctionBegin; 301 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 302 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 303 #endif 304 /* free stuff related to matrix-vec multiply */ 305 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 306 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 307 baij->lvec = 0; 308 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 309 baij->Mvctx = 0; 310 311 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 312 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 313 baij->slvec0 = 0; 314 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 315 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 316 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 317 baij->slvec1 = 0; 318 319 if (baij->colmap) { 320 #if defined (PETSC_USE_CTABLE) 321 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 322 #else 323 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 324 baij->colmap = 0; 325 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 326 #endif 327 } 328 329 /* make sure that B is assembled so we can access its values */ 330 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 332 333 /* invent new B and copy stuff over */ 334 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 335 for (i=0; i<mbs; i++) { 336 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 337 } 338 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 339 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 340 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 341 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 342 ierr = PetscFree(nz);CHKERRQ(ierr); 343 344 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 345 for (i=0; i<mbs; i++) { 346 rvals[0] = bs*i; 347 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 348 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 349 col = garray[Bbaij->j[j]]*bs; 350 for (k=0; k<bs; k++) { 351 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 352 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 353 #else 354 atmp = a+j*bs2 + k*bs; 355 #endif 356 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 357 col++; 358 } 359 } 360 } 361 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 362 ierr = PetscFree(atmp);CHKERRQ(ierr); 363 #endif 364 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 365 baij->garray = 0; 366 ierr = PetscFree(rvals);CHKERRQ(ierr); 367 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 368 ierr = MatDestroy(B);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 370 baij->B = Bnew; 371 A->was_assembled = PETSC_FALSE; 372 PetscFunctionReturn(0); 373 } 374 375 376 377