1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 11 { 12 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 13 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 14 PetscErrorCode ierr; 15 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 16 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 17 IS from,to; 18 Vec gvec; 19 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 20 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 21 const PetscInt *sowners; 22 PetscScalar *ptr; 23 24 PetscFunctionBegin; 25 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 26 27 /* For the first stab we make an array as long as the number of columns */ 28 /* mark those columns that are in sbaij->B */ 29 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 30 for (i=0; i<mbs; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 if (!indices[aj[B->i[i] + j]]) ec++; 33 indices[aj[B->i[i] + j]] = 1; 34 } 35 } 36 37 /* form arrays of columns we need */ 38 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 39 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 40 41 ec = 0; 42 for (j=0; j<size; j++) { 43 for (i=owners[j]; i<owners[j+1]; i++) { 44 if (indices[i]) { 45 garray[ec] = i; 46 ec_owner[ec] = j; 47 ec++; 48 } 49 } 50 } 51 52 /* make indices now point into garray */ 53 for (i=0; i<ec; i++) indices[garray[i]] = i; 54 55 /* compact out the extra columns in B */ 56 for (i=0; i<mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 58 } 59 B->nbs = ec; 60 61 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 62 63 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 64 ierr = PetscFree(indices);CHKERRQ(ierr); 65 66 /* create local vector that is used to scatter into */ 67 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 68 69 /* create two temporary index sets for building scatter-gather */ 70 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 71 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 72 for (i=0; i<ec; i++) stmp[i] = i; 73 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 74 75 /* generate the scatter context 76 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 77 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 78 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 79 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 80 81 sbaij->garray = garray; 82 83 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 84 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 85 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 86 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 87 88 ierr = ISDestroy(&from);CHKERRQ(ierr); 89 ierr = ISDestroy(&to);CHKERRQ(ierr); 90 91 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 92 lsize = (mbs + ec)*bs; 93 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 94 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 95 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 96 97 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 98 99 /* x index in the IS sfrom */ 100 for (i=0; i<ec; i++) { 101 j = ec_owner[i]; 102 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 103 } 104 /* b index in the IS sfrom */ 105 k = sowners[rank]/bs + mbs; 106 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 107 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 108 109 /* x index in the IS sto */ 110 k = sowners[rank]/bs + mbs; 111 for (i=0; i<ec; i++) stmp[i] = (k + i); 112 /* b index in the IS sto */ 113 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 114 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 116 117 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 118 119 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 120 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 122 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 123 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 124 125 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 126 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 127 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 128 129 ierr = PetscFree(stmp);CHKERRQ(ierr); 130 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 131 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 140 141 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 142 ierr = ISDestroy(&from);CHKERRQ(ierr); 143 ierr = ISDestroy(&to);CHKERRQ(ierr); 144 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 149 { 150 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 151 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 152 PetscErrorCode ierr; 153 PetscInt i,j,*aj = B->j,ec = 0,*garray; 154 PetscInt bs = mat->rmap->bs,*stmp; 155 IS from,to; 156 Vec gvec; 157 #if defined(PETSC_USE_CTABLE) 158 PetscTable gid1_lid1; 159 PetscTablePosition tpos; 160 PetscInt gid,lid; 161 #else 162 PetscInt Nbs = baij->Nbs,*indices; 163 #endif 164 165 PetscFunctionBegin; 166 #if defined(PETSC_USE_CTABLE) 167 /* use a table - Mark Adams */ 168 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 169 for (i=0; i<B->mbs; i++) { 170 for (j=0; j<B->ilen[i]; j++) { 171 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 172 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 173 if (!data) { 174 /* one based table */ 175 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 176 } 177 } 178 } 179 /* form array of columns we need */ 180 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 181 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 182 while (tpos) { 183 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 184 gid--; lid--; 185 garray[lid] = gid; 186 } 187 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 188 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 189 for (i=0; i<ec; i++) { 190 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 191 } 192 /* compact out the extra columns in B */ 193 for (i=0; i<B->mbs; i++) { 194 for (j=0; j<B->ilen[i]; j++) { 195 PetscInt gid1 = aj[B->i[i] + j] + 1; 196 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 197 lid--; 198 aj[B->i[i]+j] = lid; 199 } 200 } 201 B->nbs = ec; 202 203 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 204 205 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 206 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 207 #else 208 /* For the first stab we make an array as long as the number of columns */ 209 /* mark those columns that are in baij->B */ 210 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 211 for (i=0; i<B->mbs; i++) { 212 for (j=0; j<B->ilen[i]; j++) { 213 if (!indices[aj[B->i[i] + j]]) ec++; 214 indices[aj[B->i[i] + j]] = 1; 215 } 216 } 217 218 /* form array of columns we need */ 219 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 220 ec = 0; 221 for (i=0; i<Nbs; i++) { 222 if (indices[i]) { 223 garray[ec++] = i; 224 } 225 } 226 227 /* make indices now point into garray */ 228 for (i=0; i<ec; i++) indices[garray[i]] = i; 229 230 /* compact out the extra columns in B */ 231 for (i=0; i<B->mbs; i++) { 232 for (j=0; j<B->ilen[i]; j++) { 233 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 234 } 235 } 236 B->nbs = ec; 237 238 baij->B->cmap->n = ec*mat->rmap->bs; 239 240 ierr = PetscFree(indices);CHKERRQ(ierr); 241 #endif 242 243 /* create local vector that is used to scatter into */ 244 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 245 246 /* create two temporary index sets for building scatter-gather */ 247 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 248 249 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 250 for (i=0; i<ec; i++) stmp[i] = i; 251 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 252 253 /* create temporary global vector to generate scatter context */ 254 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 255 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 256 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 257 258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 259 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 260 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 262 263 baij->garray = garray; 264 265 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 266 ierr = ISDestroy(&from);CHKERRQ(ierr); 267 ierr = ISDestroy(&to);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 /* 272 Takes the local part of an already assembled MPISBAIJ matrix 273 and disassembles it. This is to allow new nonzeros into the matrix 274 that require more communication in the matrix vector multiply. 275 Thus certain data-structures must be rebuilt. 276 277 Kind of slow! But that's what application programmers get when 278 they are sloppy. 279 */ 280 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 281 { 282 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 283 Mat B = baij->B,Bnew; 284 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 285 PetscErrorCode ierr; 286 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 287 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 288 MatScalar *a = Bbaij->a; 289 PetscScalar *atmp; 290 #if defined(PETSC_USE_REAL_MAT_SINGLE) 291 PetscInt l; 292 #endif 293 294 PetscFunctionBegin; 295 #if defined(PETSC_USE_REAL_MAT_SINGLE) 296 ierr = PetscMalloc1(A->rmap->bs,&atmp); 297 #endif 298 /* free stuff related to matrix-vec multiply */ 299 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 302 303 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 304 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 305 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 306 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 307 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 308 309 if (baij->colmap) { 310 #if defined(PETSC_USE_CTABLE) 311 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 312 #else 313 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 314 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 315 #endif 316 } 317 318 /* make sure that B is assembled so we can access its values */ 319 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 322 /* invent new B and copy stuff over */ 323 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 324 for (i=0; i<mbs; i++) { 325 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 326 } 327 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 328 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 329 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 330 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 331 ierr = PetscFree(nz);CHKERRQ(ierr); 332 333 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 334 /* 335 Ensure that B's nonzerostate is monotonically increasing. 336 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 337 */ 338 Bnew->nonzerostate = B->nonzerostate; 339 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 340 for (i=0; i<mbs; i++) { 341 rvals[0] = bs*i; 342 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 343 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 344 col = garray[Bbaij->j[j]]*bs; 345 for (k=0; k<bs; k++) { 346 #if defined(PETSC_USE_REAL_MAT_SINGLE) 347 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 348 #else 349 atmp = a+j*bs2 + k*bs; 350 #endif 351 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 352 col++; 353 } 354 } 355 } 356 #if defined(PETSC_USE_REAL_MAT_SINGLE) 357 ierr = PetscFree(atmp);CHKERRQ(ierr); 358 #endif 359 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 360 361 baij->garray = 0; 362 363 ierr = PetscFree(rvals);CHKERRQ(ierr); 364 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 365 ierr = MatDestroy(&B);CHKERRQ(ierr); 366 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 367 368 baij->B = Bnew; 369 370 A->was_assembled = PETSC_FALSE; 371 PetscFunctionReturn(0); 372 } 373