1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 27 28 /* For the first stab we make an array as long as the number of columns */ 29 /* mark those columns that are in sbaij->B */ 30 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 31 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 32 for (i=0; i<mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 if (!indices[aj[B->i[i] + j]]) ec++; 35 indices[aj[B->i[i] + j] ] = 1; 36 } 37 } 38 39 /* form arrays of columns we need */ 40 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 41 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 42 43 ec = 0; 44 for (j=0; j<size; j++){ 45 for (i=owners[j]; i<owners[j+1]; i++){ 46 if (indices[i]) { 47 garray[ec] = i; 48 ec_owner[ec] = j; 49 ec++; 50 } 51 } 52 } 53 54 /* make indices now point into garray */ 55 for (i=0; i<ec; i++) indices[garray[i]] = i; 56 57 /* compact out the extra columns in B */ 58 for (i=0; i<mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 60 } 61 B->nbs = ec; 62 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 63 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 64 ierr = PetscFree(indices);CHKERRQ(ierr); 65 66 /* create local vector that is used to scatter into */ 67 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 68 69 /* create two temporary index sets for building scatter-gather */ 70 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 71 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 72 for (i=0; i<ec; i++) { stmp[i] = i; } 73 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 74 75 /* generate the scatter context 76 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 77 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 78 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 79 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 80 81 sbaij->garray = garray; 82 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 83 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 84 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 85 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 86 87 ierr = ISDestroy(&from);CHKERRQ(ierr); 88 ierr = ISDestroy(&to);CHKERRQ(ierr); 89 90 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 91 lsize = (mbs + ec)*bs; 92 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 93 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 94 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 95 96 sowners = sbaij->slvec0->map->range; 97 98 /* x index in the IS sfrom */ 99 for (i=0; i<ec; i++) { 100 j = ec_owner[i]; 101 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 102 } 103 /* b index in the IS sfrom */ 104 k = sowners[rank]/bs + mbs; 105 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 106 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 107 108 /* x index in the IS sto */ 109 k = sowners[rank]/bs + mbs; 110 for (i=0; i<ec; i++) stmp[i] = (k + i); 111 /* b index in the IS sto */ 112 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 113 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 115 116 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 117 118 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 119 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 120 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 122 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 123 124 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 126 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 127 128 ierr = PetscFree(stmp);CHKERRQ(ierr); 129 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 130 131 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 139 140 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 141 ierr = ISDestroy(&from);CHKERRQ(ierr); 142 ierr = ISDestroy(&to);CHKERRQ(ierr); 143 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 149 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 150 { 151 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 152 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 153 PetscErrorCode ierr; 154 PetscInt i,j,*aj = B->j,ec = 0,*garray; 155 PetscInt bs = mat->rmap->bs,*stmp; 156 IS from,to; 157 Vec gvec; 158 #if defined (PETSC_USE_CTABLE) 159 PetscTable gid1_lid1; 160 PetscTablePosition tpos; 161 PetscInt gid,lid; 162 #else 163 PetscInt Nbs = baij->Nbs,*indices; 164 #endif 165 166 PetscFunctionBegin; 167 #if defined (PETSC_USE_CTABLE) 168 /* use a table - Mark Adams */ 169 PetscTableCreate(B->mbs,&gid1_lid1); 170 for (i=0; i<B->mbs; i++) { 171 for (j=0; j<B->ilen[i]; j++) { 172 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 173 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 174 if (!data) { 175 /* one based table */ 176 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 177 } 178 } 179 } 180 /* form array of columns we need */ 181 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 182 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 183 while (tpos) { 184 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 185 gid--; lid--; 186 garray[lid] = gid; 187 } 188 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 189 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 190 for (i=0; i<ec; i++) { 191 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 192 } 193 /* compact out the extra columns in B */ 194 for (i=0; i<B->mbs; i++) { 195 for (j=0; j<B->ilen[i]; j++) { 196 PetscInt gid1 = aj[B->i[i] + j] + 1; 197 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 198 lid --; 199 aj[B->i[i]+j] = lid; 200 } 201 } 202 B->nbs = ec; 203 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 204 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 205 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 206 /* Mark Adams */ 207 #else 208 /* For the first stab we make an array as long as the number of columns */ 209 /* mark those columns that are in baij->B */ 210 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 211 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 212 for (i=0; i<B->mbs; i++) { 213 for (j=0; j<B->ilen[i]; j++) { 214 if (!indices[aj[B->i[i] + j]]) ec++; 215 indices[aj[B->i[i] + j] ] = 1; 216 } 217 } 218 219 /* form array of columns we need */ 220 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 221 ec = 0; 222 for (i=0; i<Nbs; i++) { 223 if (indices[i]) { 224 garray[ec++] = i; 225 } 226 } 227 228 /* make indices now point into garray */ 229 for (i=0; i<ec; i++) { 230 indices[garray[i]] = i; 231 } 232 233 /* compact out the extra columns in B */ 234 for (i=0; i<B->mbs; i++) { 235 for (j=0; j<B->ilen[i]; j++) { 236 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 237 } 238 } 239 B->nbs = ec; 240 baij->B->cmap->n = ec*mat->rmap->bs; 241 ierr = PetscFree(indices);CHKERRQ(ierr); 242 #endif 243 244 /* create local vector that is used to scatter into */ 245 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 246 247 /* create two temporary index sets for building scatter-gather */ 248 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 249 250 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 251 for (i=0; i<ec; i++) { stmp[i] = i; } 252 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 253 254 /* create temporary global vector to generate scatter context */ 255 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 256 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 257 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 258 259 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 260 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 261 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 262 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 263 baij->garray = garray; 264 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = ISDestroy(&from);CHKERRQ(ierr); 266 ierr = ISDestroy(&to);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 /* 271 Takes the local part of an already assembled MPISBAIJ matrix 272 and disassembles it. This is to allow new nonzeros into the matrix 273 that require more communication in the matrix vector multiply. 274 Thus certain data-structures must be rebuilt. 275 276 Kind of slow! But that's what application programmers get when 277 they are sloppy. 278 */ 279 #undef __FUNCT__ 280 #define __FUNCT__ "DisAssemble_MPISBAIJ" 281 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 282 { 283 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 284 Mat B = baij->B,Bnew; 285 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 286 PetscErrorCode ierr; 287 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 288 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 289 MatScalar *a = Bbaij->a; 290 PetscScalar *atmp; 291 #if defined(PETSC_USE_REAL_MAT_SINGLE) 292 PetscInt l; 293 #endif 294 295 PetscFunctionBegin; 296 #if defined(PETSC_USE_REAL_MAT_SINGLE) 297 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 298 #endif 299 /* free stuff related to matrix-vec multiply */ 300 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 301 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 302 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 303 304 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 305 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 306 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 307 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 308 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 309 310 if (baij->colmap) { 311 #if defined (PETSC_USE_CTABLE) 312 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 313 #else 314 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 315 baij->colmap = 0; 316 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 317 #endif 318 } 319 320 /* make sure that B is assembled so we can access its values */ 321 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 323 324 /* invent new B and copy stuff over */ 325 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 326 for (i=0; i<mbs; i++) { 327 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 328 } 329 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 330 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 331 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 332 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 333 ierr = PetscFree(nz);CHKERRQ(ierr); 334 335 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 336 for (i=0; i<mbs; i++) { 337 rvals[0] = bs*i; 338 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 339 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 340 col = garray[Bbaij->j[j]]*bs; 341 for (k=0; k<bs; k++) { 342 #if defined(PETSC_USE_REAL_MAT_SINGLE) 343 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 344 #else 345 atmp = a+j*bs2 + k*bs; 346 #endif 347 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 348 col++; 349 } 350 } 351 } 352 #if defined(PETSC_USE_REAL_MAT_SINGLE) 353 ierr = PetscFree(atmp);CHKERRQ(ierr); 354 #endif 355 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 356 baij->garray = 0; 357 ierr = PetscFree(rvals);CHKERRQ(ierr); 358 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 359 ierr = MatDestroy(&B);CHKERRQ(ierr); 360 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 361 baij->B = Bnew; 362 A->was_assembled = PETSC_FALSE; 363 PetscFunctionReturn(0); 364 } 365 366 367 368