1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 27 28 /* For the first stab we make an array as long as the number of columns */ 29 /* mark those columns that are in sbaij->B */ 30 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 31 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 32 for (i=0; i<mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 if (!indices[aj[B->i[i] + j]]) ec++; 35 indices[aj[B->i[i] + j]] = 1; 36 } 37 } 38 39 /* form arrays of columns we need */ 40 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 41 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 42 43 ec = 0; 44 for (j=0; j<size; j++) { 45 for (i=owners[j]; i<owners[j+1]; i++) { 46 if (indices[i]) { 47 garray[ec] = i; 48 ec_owner[ec] = j; 49 ec++; 50 } 51 } 52 } 53 54 /* make indices now point into garray */ 55 for (i=0; i<ec; i++) indices[garray[i]] = i; 56 57 /* compact out the extra columns in B */ 58 for (i=0; i<mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 60 } 61 B->nbs = ec; 62 63 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 64 65 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscFree(indices);CHKERRQ(ierr); 67 68 /* create local vector that is used to scatter into */ 69 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 70 71 /* create two temporary index sets for building scatter-gather */ 72 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 73 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 74 for (i=0; i<ec; i++) stmp[i] = i; 75 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 76 77 /* generate the scatter context 78 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 79 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 80 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 81 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 82 83 sbaij->garray = garray; 84 85 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 86 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 87 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 88 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 89 90 ierr = ISDestroy(&from);CHKERRQ(ierr); 91 ierr = ISDestroy(&to);CHKERRQ(ierr); 92 93 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 94 lsize = (mbs + ec)*bs; 95 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 96 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 97 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 98 99 sowners = sbaij->slvec0->map->range; 100 101 /* x index in the IS sfrom */ 102 for (i=0; i<ec; i++) { 103 j = ec_owner[i]; 104 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 105 } 106 /* b index in the IS sfrom */ 107 k = sowners[rank]/bs + mbs; 108 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 109 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 110 111 /* x index in the IS sto */ 112 k = sowners[rank]/bs + mbs; 113 for (i=0; i<ec; i++) stmp[i] = (k + i); 114 /* b index in the IS sto */ 115 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 116 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 118 119 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 120 121 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 122 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 123 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 125 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 126 127 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 128 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 129 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 130 131 ierr = PetscFree(stmp);CHKERRQ(ierr); 132 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 133 134 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 140 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 142 143 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 144 ierr = ISDestroy(&from);CHKERRQ(ierr); 145 ierr = ISDestroy(&to);CHKERRQ(ierr); 146 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 152 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 153 { 154 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 155 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 156 PetscErrorCode ierr; 157 PetscInt i,j,*aj = B->j,ec = 0,*garray; 158 PetscInt bs = mat->rmap->bs,*stmp; 159 IS from,to; 160 Vec gvec; 161 #if defined(PETSC_USE_CTABLE) 162 PetscTable gid1_lid1; 163 PetscTablePosition tpos; 164 PetscInt gid,lid; 165 #else 166 PetscInt Nbs = baij->Nbs,*indices; 167 #endif 168 169 PetscFunctionBegin; 170 #if defined(PETSC_USE_CTABLE) 171 /* use a table - Mark Adams */ 172 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 173 for (i=0; i<B->mbs; i++) { 174 for (j=0; j<B->ilen[i]; j++) { 175 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 176 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 177 if (!data) { 178 /* one based table */ 179 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 180 } 181 } 182 } 183 /* form array of columns we need */ 184 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 185 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 186 while (tpos) { 187 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 188 gid--; lid--; 189 garray[lid] = gid; 190 } 191 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 192 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 193 for (i=0; i<ec; i++) { 194 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 195 } 196 /* compact out the extra columns in B */ 197 for (i=0; i<B->mbs; i++) { 198 for (j=0; j<B->ilen[i]; j++) { 199 PetscInt gid1 = aj[B->i[i] + j] + 1; 200 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 201 lid--; 202 aj[B->i[i]+j] = lid; 203 } 204 } 205 B->nbs = ec; 206 207 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 208 209 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 210 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 211 #else 212 /* For the first stab we make an array as long as the number of columns */ 213 /* mark those columns that are in baij->B */ 214 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 215 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 216 for (i=0; i<B->mbs; i++) { 217 for (j=0; j<B->ilen[i]; j++) { 218 if (!indices[aj[B->i[i] + j]]) ec++; 219 indices[aj[B->i[i] + j]] = 1; 220 } 221 } 222 223 /* form array of columns we need */ 224 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 225 ec = 0; 226 for (i=0; i<Nbs; i++) { 227 if (indices[i]) { 228 garray[ec++] = i; 229 } 230 } 231 232 /* make indices now point into garray */ 233 for (i=0; i<ec; i++) indices[garray[i]] = i; 234 235 /* compact out the extra columns in B */ 236 for (i=0; i<B->mbs; i++) { 237 for (j=0; j<B->ilen[i]; j++) { 238 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 239 } 240 } 241 B->nbs = ec; 242 243 baij->B->cmap->n = ec*mat->rmap->bs; 244 245 ierr = PetscFree(indices);CHKERRQ(ierr); 246 #endif 247 248 /* create local vector that is used to scatter into */ 249 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 250 251 /* create two temporary index sets for building scatter-gather */ 252 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 253 254 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 255 for (i=0; i<ec; i++) stmp[i] = i; 256 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 257 258 /* create temporary global vector to generate scatter context */ 259 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 260 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 261 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 262 263 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 264 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 265 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 266 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 267 268 baij->garray = garray; 269 270 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 271 ierr = ISDestroy(&from);CHKERRQ(ierr); 272 ierr = ISDestroy(&to);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 /* 277 Takes the local part of an already assembled MPISBAIJ matrix 278 and disassembles it. This is to allow new nonzeros into the matrix 279 that require more communication in the matrix vector multiply. 280 Thus certain data-structures must be rebuilt. 281 282 Kind of slow! But that's what application programmers get when 283 they are sloppy. 284 */ 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatDisAssemble_MPISBAIJ" 287 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 288 { 289 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 290 Mat B = baij->B,Bnew; 291 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 294 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 295 MatScalar *a = Bbaij->a; 296 PetscScalar *atmp; 297 #if defined(PETSC_USE_REAL_MAT_SINGLE) 298 PetscInt l; 299 #endif 300 301 PetscFunctionBegin; 302 #if defined(PETSC_USE_REAL_MAT_SINGLE) 303 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 304 #endif 305 /* free stuff related to matrix-vec multiply */ 306 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 307 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 308 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 309 310 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 311 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 312 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 313 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 314 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 315 316 if (baij->colmap) { 317 #if defined(PETSC_USE_CTABLE) 318 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 319 #else 320 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 321 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 322 #endif 323 } 324 325 /* make sure that B is assembled so we can access its values */ 326 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 327 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 328 329 /* invent new B and copy stuff over */ 330 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 331 for (i=0; i<mbs; i++) { 332 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 333 } 334 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 335 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 336 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 337 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 338 339 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 340 341 ierr = PetscFree(nz);CHKERRQ(ierr); 342 343 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 344 for (i=0; i<mbs; i++) { 345 rvals[0] = bs*i; 346 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 347 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 348 col = garray[Bbaij->j[j]]*bs; 349 for (k=0; k<bs; k++) { 350 #if defined(PETSC_USE_REAL_MAT_SINGLE) 351 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 352 #else 353 atmp = a+j*bs2 + k*bs; 354 #endif 355 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 356 col++; 357 } 358 } 359 } 360 #if defined(PETSC_USE_REAL_MAT_SINGLE) 361 ierr = PetscFree(atmp);CHKERRQ(ierr); 362 #endif 363 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 364 365 baij->garray = 0; 366 367 ierr = PetscFree(rvals);CHKERRQ(ierr); 368 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 369 ierr = MatDestroy(&B);CHKERRQ(ierr); 370 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 371 372 baij->B = Bnew; 373 374 A->was_assembled = PETSC_FALSE; 375 PetscFunctionReturn(0); 376 } 377 378 379 380