1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 if (sbaij->sMvctx) { 27 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 28 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 29 sbaij->sMvctx = 0; 30 } 31 32 /* For the first stab we make an array as long as the number of columns */ 33 /* mark those columns that are in sbaij->B */ 34 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 35 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 36 for (i=0; i<mbs; i++) { 37 for (j=0; j<B->ilen[i]; j++) { 38 if (!indices[aj[B->i[i] + j]]) ec++; 39 indices[aj[B->i[i] + j] ] = 1; 40 } 41 } 42 43 /* form arrays of columns we need */ 44 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 45 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 46 47 ec = 0; 48 for (j=0; j<size; j++){ 49 for (i=owners[j]; i<owners[j+1]; i++){ 50 if (indices[i]) { 51 garray[ec] = i; 52 ec_owner[ec] = j; 53 ec++; 54 } 55 } 56 } 57 58 /* make indices now point into garray */ 59 for (i=0; i<ec; i++) indices[garray[i]] = i; 60 61 /* compact out the extra columns in B */ 62 for (i=0; i<mbs; i++) { 63 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 64 } 65 B->nbs = ec; 66 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 67 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 68 ierr = PetscFree(indices);CHKERRQ(ierr); 69 70 /* create local vector that is used to scatter into */ 71 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 72 73 /* create two temporary index sets for building scatter-gather */ 74 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 75 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 76 for (i=0; i<ec; i++) { stmp[i] = i; } 77 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 78 79 /* generate the scatter context 80 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 81 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 82 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 83 ierr = VecDestroy(gvec);CHKERRQ(ierr); 84 85 sbaij->garray = garray; 86 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 87 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 88 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 89 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 90 91 ierr = ISDestroy(from);CHKERRQ(ierr); 92 ierr = ISDestroy(to);CHKERRQ(ierr); 93 94 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 95 lsize = (mbs + ec)*bs; 96 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 97 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 98 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 99 100 sowners = sbaij->slvec0->map->range; 101 102 /* x index in the IS sfrom */ 103 for (i=0; i<ec; i++) { 104 j = ec_owner[i]; 105 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 106 } 107 /* b index in the IS sfrom */ 108 k = sowners[rank]/bs + mbs; 109 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 111 112 /* x index in the IS sto */ 113 k = sowners[rank]/bs + mbs; 114 for (i=0; i<ec; i++) stmp[i] = (k + i); 115 /* b index in the IS sto */ 116 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 117 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 119 120 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 121 122 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 123 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 126 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 127 128 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 129 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 130 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 131 132 ierr = PetscFree(stmp);CHKERRQ(ierr); 133 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 134 135 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 140 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 143 144 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 145 ierr = ISDestroy(from);CHKERRQ(ierr); 146 ierr = ISDestroy(to);CHKERRQ(ierr); 147 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 153 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 154 { 155 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 156 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 157 PetscErrorCode ierr; 158 PetscInt i,j,*aj = B->j,ec = 0,*garray; 159 PetscInt bs = mat->rmap->bs,*stmp; 160 IS from,to; 161 Vec gvec; 162 #if defined (PETSC_USE_CTABLE) 163 PetscTable gid1_lid1; 164 PetscTablePosition tpos; 165 PetscInt gid,lid; 166 #else 167 PetscInt Nbs = baij->Nbs,*indices; 168 #endif 169 170 PetscFunctionBegin; 171 #if defined (PETSC_USE_CTABLE) 172 /* use a table - Mark Adams */ 173 PetscTableCreate(B->mbs,&gid1_lid1); 174 for (i=0; i<B->mbs; i++) { 175 for (j=0; j<B->ilen[i]; j++) { 176 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 177 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 178 if (!data) { 179 /* one based table */ 180 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 181 } 182 } 183 } 184 /* form array of columns we need */ 185 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 186 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 187 while (tpos) { 188 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 189 gid--; lid--; 190 garray[lid] = gid; 191 } 192 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 193 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 194 for (i=0; i<ec; i++) { 195 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 196 } 197 /* compact out the extra columns in B */ 198 for (i=0; i<B->mbs; i++) { 199 for (j=0; j<B->ilen[i]; j++) { 200 PetscInt gid1 = aj[B->i[i] + j] + 1; 201 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 202 lid --; 203 aj[B->i[i]+j] = lid; 204 } 205 } 206 B->nbs = ec; 207 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 208 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 209 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 210 /* Mark Adams */ 211 #else 212 /* For the first stab we make an array as long as the number of columns */ 213 /* mark those columns that are in baij->B */ 214 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 215 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 216 for (i=0; i<B->mbs; i++) { 217 for (j=0; j<B->ilen[i]; j++) { 218 if (!indices[aj[B->i[i] + j]]) ec++; 219 indices[aj[B->i[i] + j] ] = 1; 220 } 221 } 222 223 /* form array of columns we need */ 224 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 225 ec = 0; 226 for (i=0; i<Nbs; i++) { 227 if (indices[i]) { 228 garray[ec++] = i; 229 } 230 } 231 232 /* make indices now point into garray */ 233 for (i=0; i<ec; i++) { 234 indices[garray[i]] = i; 235 } 236 237 /* compact out the extra columns in B */ 238 for (i=0; i<B->mbs; i++) { 239 for (j=0; j<B->ilen[i]; j++) { 240 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 241 } 242 } 243 B->nbs = ec; 244 baij->B->cmap->n = ec*mat->rmap->bs; 245 ierr = PetscFree(indices);CHKERRQ(ierr); 246 #endif 247 248 /* create local vector that is used to scatter into */ 249 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 250 251 /* create two temporary index sets for building scatter-gather */ 252 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 253 254 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 255 for (i=0; i<ec; i++) { stmp[i] = i; } 256 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 257 258 /* create temporary global vector to generate scatter context */ 259 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 260 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 261 ierr = VecDestroy(gvec);CHKERRQ(ierr); 262 263 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 264 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 265 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 266 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 267 baij->garray = garray; 268 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 269 ierr = ISDestroy(from);CHKERRQ(ierr); 270 ierr = ISDestroy(to);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 /* 275 Takes the local part of an already assembled MPISBAIJ matrix 276 and disassembles it. This is to allow new nonzeros into the matrix 277 that require more communication in the matrix vector multiply. 278 Thus certain data-structures must be rebuilt. 279 280 Kind of slow! But that's what application programmers get when 281 they are sloppy. 282 */ 283 #undef __FUNCT__ 284 #define __FUNCT__ "DisAssemble_MPISBAIJ" 285 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 286 { 287 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 288 Mat B = baij->B,Bnew; 289 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 290 PetscErrorCode ierr; 291 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 292 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 293 MatScalar *a = Bbaij->a; 294 PetscScalar *atmp; 295 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 296 PetscInt l; 297 #endif 298 299 PetscFunctionBegin; 300 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 301 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 302 #endif 303 /* free stuff related to matrix-vec multiply */ 304 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 305 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 306 baij->lvec = 0; 307 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 308 baij->Mvctx = 0; 309 310 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 311 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 312 baij->slvec0 = 0; 313 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 314 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 315 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 316 baij->slvec1 = 0; 317 318 if (baij->colmap) { 319 #if defined (PETSC_USE_CTABLE) 320 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 321 #else 322 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 323 baij->colmap = 0; 324 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 325 #endif 326 } 327 328 /* make sure that B is assembled so we can access its values */ 329 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 /* invent new B and copy stuff over */ 333 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 334 for (i=0; i<mbs; i++) { 335 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 336 } 337 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 338 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 339 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 340 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 341 ierr = PetscFree(nz);CHKERRQ(ierr); 342 343 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 344 for (i=0; i<mbs; i++) { 345 rvals[0] = bs*i; 346 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 347 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 348 col = garray[Bbaij->j[j]]*bs; 349 for (k=0; k<bs; k++) { 350 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 351 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 352 #else 353 atmp = a+j*bs2 + k*bs; 354 #endif 355 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 356 col++; 357 } 358 } 359 } 360 #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 361 ierr = PetscFree(atmp);CHKERRQ(ierr); 362 #endif 363 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 364 baij->garray = 0; 365 ierr = PetscFree(rvals);CHKERRQ(ierr); 366 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 367 ierr = MatDestroy(B);CHKERRQ(ierr); 368 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 369 baij->B = Bnew; 370 A->was_assembled = PETSC_FALSE; 371 PetscFunctionReturn(0); 372 } 373 374 375 376