1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 23 const PetscInt *sowners; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 28 29 /* For the first stab we make an array as long as the number of columns */ 30 /* mark those columns that are in sbaij->B */ 31 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 32 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 33 for (i=0; i<mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 if (!indices[aj[B->i[i] + j]]) ec++; 36 indices[aj[B->i[i] + j]] = 1; 37 } 38 } 39 40 /* form arrays of columns we need */ 41 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 42 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 43 44 ec = 0; 45 for (j=0; j<size; j++) { 46 for (i=owners[j]; i<owners[j+1]; i++) { 47 if (indices[i]) { 48 garray[ec] = i; 49 ec_owner[ec] = j; 50 ec++; 51 } 52 } 53 } 54 55 /* make indices now point into garray */ 56 for (i=0; i<ec; i++) indices[garray[i]] = i; 57 58 /* compact out the extra columns in B */ 59 for (i=0; i<mbs; i++) { 60 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 61 } 62 B->nbs = ec; 63 64 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 65 66 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscFree(indices);CHKERRQ(ierr); 68 69 /* create local vector that is used to scatter into */ 70 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 71 72 /* create two temporary index sets for building scatter-gather */ 73 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 74 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 75 for (i=0; i<ec; i++) stmp[i] = i; 76 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 77 78 /* generate the scatter context 79 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 80 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 81 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 82 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 83 84 sbaij->garray = garray; 85 86 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 87 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 88 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 89 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 90 91 ierr = ISDestroy(&from);CHKERRQ(ierr); 92 ierr = ISDestroy(&to);CHKERRQ(ierr); 93 94 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 95 lsize = (mbs + ec)*bs; 96 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 97 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 98 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 99 100 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 101 102 /* x index in the IS sfrom */ 103 for (i=0; i<ec; i++) { 104 j = ec_owner[i]; 105 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 106 } 107 /* b index in the IS sfrom */ 108 k = sowners[rank]/bs + mbs; 109 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 111 112 /* x index in the IS sto */ 113 k = sowners[rank]/bs + mbs; 114 for (i=0; i<ec; i++) stmp[i] = (k + i); 115 /* b index in the IS sto */ 116 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 117 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 119 120 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 121 122 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 123 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 126 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 127 128 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 130 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 131 132 ierr = PetscFree(stmp);CHKERRQ(ierr); 133 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 134 135 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 140 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 143 144 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 145 ierr = ISDestroy(&from);CHKERRQ(ierr); 146 ierr = ISDestroy(&to);CHKERRQ(ierr); 147 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 153 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 154 { 155 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 156 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 157 PetscErrorCode ierr; 158 PetscInt i,j,*aj = B->j,ec = 0,*garray; 159 PetscInt bs = mat->rmap->bs,*stmp; 160 IS from,to; 161 Vec gvec; 162 #if defined(PETSC_USE_CTABLE) 163 PetscTable gid1_lid1; 164 PetscTablePosition tpos; 165 PetscInt gid,lid; 166 #else 167 PetscInt Nbs = baij->Nbs,*indices; 168 #endif 169 170 PetscFunctionBegin; 171 #if defined(PETSC_USE_CTABLE) 172 /* use a table - Mark Adams */ 173 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 174 for (i=0; i<B->mbs; i++) { 175 for (j=0; j<B->ilen[i]; j++) { 176 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 177 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 178 if (!data) { 179 /* one based table */ 180 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 181 } 182 } 183 } 184 /* form array of columns we need */ 185 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 186 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 187 while (tpos) { 188 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 189 gid--; lid--; 190 garray[lid] = gid; 191 } 192 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 193 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 194 for (i=0; i<ec; i++) { 195 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 196 } 197 /* compact out the extra columns in B */ 198 for (i=0; i<B->mbs; i++) { 199 for (j=0; j<B->ilen[i]; j++) { 200 PetscInt gid1 = aj[B->i[i] + j] + 1; 201 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 202 lid--; 203 aj[B->i[i]+j] = lid; 204 } 205 } 206 B->nbs = ec; 207 208 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 209 210 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 211 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 212 #else 213 /* For the first stab we make an array as long as the number of columns */ 214 /* mark those columns that are in baij->B */ 215 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 216 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 217 for (i=0; i<B->mbs; i++) { 218 for (j=0; j<B->ilen[i]; j++) { 219 if (!indices[aj[B->i[i] + j]]) ec++; 220 indices[aj[B->i[i] + j]] = 1; 221 } 222 } 223 224 /* form array of columns we need */ 225 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 226 ec = 0; 227 for (i=0; i<Nbs; i++) { 228 if (indices[i]) { 229 garray[ec++] = i; 230 } 231 } 232 233 /* make indices now point into garray */ 234 for (i=0; i<ec; i++) indices[garray[i]] = i; 235 236 /* compact out the extra columns in B */ 237 for (i=0; i<B->mbs; i++) { 238 for (j=0; j<B->ilen[i]; j++) { 239 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 240 } 241 } 242 B->nbs = ec; 243 244 baij->B->cmap->n = ec*mat->rmap->bs; 245 246 ierr = PetscFree(indices);CHKERRQ(ierr); 247 #endif 248 249 /* create local vector that is used to scatter into */ 250 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 251 252 /* create two temporary index sets for building scatter-gather */ 253 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 254 255 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 256 for (i=0; i<ec; i++) stmp[i] = i; 257 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 258 259 /* create temporary global vector to generate scatter context */ 260 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 261 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 262 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 263 264 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 265 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 266 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 267 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 268 269 baij->garray = garray; 270 271 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 272 ierr = ISDestroy(&from);CHKERRQ(ierr); 273 ierr = ISDestroy(&to);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Takes the local part of an already assembled MPISBAIJ matrix 279 and disassembles it. This is to allow new nonzeros into the matrix 280 that require more communication in the matrix vector multiply. 281 Thus certain data-structures must be rebuilt. 282 283 Kind of slow! But that's what application programmers get when 284 they are sloppy. 285 */ 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatDisAssemble_MPISBAIJ" 288 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 289 { 290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 291 Mat B = baij->B,Bnew; 292 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 295 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 296 MatScalar *a = Bbaij->a; 297 PetscScalar *atmp; 298 #if defined(PETSC_USE_REAL_MAT_SINGLE) 299 PetscInt l; 300 #endif 301 302 PetscFunctionBegin; 303 #if defined(PETSC_USE_REAL_MAT_SINGLE) 304 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 305 #endif 306 /* free stuff related to matrix-vec multiply */ 307 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 308 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 309 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 310 311 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 312 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 313 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 314 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 315 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 316 317 if (baij->colmap) { 318 #if defined(PETSC_USE_CTABLE) 319 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 320 #else 321 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 322 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 323 #endif 324 } 325 326 /* make sure that B is assembled so we can access its values */ 327 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 328 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 329 330 /* invent new B and copy stuff over */ 331 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 332 for (i=0; i<mbs; i++) { 333 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 334 } 335 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 336 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 337 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 338 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 339 340 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 341 342 ierr = PetscFree(nz);CHKERRQ(ierr); 343 344 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 345 for (i=0; i<mbs; i++) { 346 rvals[0] = bs*i; 347 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 348 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 349 col = garray[Bbaij->j[j]]*bs; 350 for (k=0; k<bs; k++) { 351 #if defined(PETSC_USE_REAL_MAT_SINGLE) 352 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 353 #else 354 atmp = a+j*bs2 + k*bs; 355 #endif 356 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 357 col++; 358 } 359 } 360 } 361 #if defined(PETSC_USE_REAL_MAT_SINGLE) 362 ierr = PetscFree(atmp);CHKERRQ(ierr); 363 #endif 364 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 365 366 baij->garray = 0; 367 368 ierr = PetscFree(rvals);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 370 ierr = MatDestroy(&B);CHKERRQ(ierr); 371 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 372 373 baij->B = Bnew; 374 375 A->was_assembled = PETSC_FALSE; 376 PetscFunctionReturn(0); 377 } 378 379 380 381