1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 13 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 14 { 15 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 16 PetscErrorCode ierr; 17 PetscInt i,*idxb = 0; 18 PetscScalar *va,*vb; 19 Vec vtmp; 20 21 PetscFunctionBegin; 22 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 23 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 24 if (idx) { 25 for (i=0; i<A->rmap->n; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 31 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 32 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 33 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 34 35 for (i=0; i<A->rmap->n; i++) { 36 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 37 va[i] = vb[i]; 38 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 39 } 40 } 41 42 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 43 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 44 ierr = PetscFree(idxb);CHKERRQ(ierr); 45 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatStoreValues_MPIBAIJ" 51 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 52 { 53 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 54 PetscErrorCode ierr; 55 56 PetscFunctionBegin; 57 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 58 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 65 { 66 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 67 PetscErrorCode ierr; 68 69 PetscFunctionBegin; 70 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 71 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 83 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 84 { 85 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 86 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 87 PetscErrorCode ierr; 88 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 89 90 PetscFunctionBegin; 91 #if defined(PETSC_USE_CTABLE) 92 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 93 for (i=0; i<nbs; i++) { 94 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 95 } 96 #else 97 ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 98 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 99 ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 100 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 101 #endif 102 PetscFunctionReturn(0); 103 } 104 105 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 106 { \ 107 \ 108 brow = row/bs; \ 109 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 110 rmax = aimax[brow]; nrow = ailen[brow]; \ 111 bcol = col/bs; \ 112 ridx = row % bs; cidx = col % bs; \ 113 low = 0; high = nrow; \ 114 while (high-low > 3) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > bcol) high = t; \ 117 else low = t; \ 118 } \ 119 for (_i=low; _i<high; _i++) { \ 120 if (rp[_i] > bcol) break; \ 121 if (rp[_i] == bcol) { \ 122 bap = ap + bs2*_i + bs*cidx + ridx; \ 123 if (addv == ADD_VALUES) *bap += value; \ 124 else *bap = value; \ 125 goto a_noinsert; \ 126 } \ 127 } \ 128 if (a->nonew == 1) goto a_noinsert; \ 129 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 130 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 131 N = nrow++ - 1; \ 132 /* shift up all the later entries in this row */ \ 133 for (ii=N; ii>=_i; ii--) { \ 134 rp[ii+1] = rp[ii]; \ 135 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 136 } \ 137 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 138 rp[_i] = bcol; \ 139 ap[bs2*_i + bs*cidx + ridx] = value; \ 140 a_noinsert:; \ 141 ailen[brow] = nrow; \ 142 } 143 144 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 145 { \ 146 brow = row/bs; \ 147 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 148 rmax = bimax[brow]; nrow = bilen[brow]; \ 149 bcol = col/bs; \ 150 ridx = row % bs; cidx = col % bs; \ 151 low = 0; high = nrow; \ 152 while (high-low > 3) { \ 153 t = (low+high)/2; \ 154 if (rp[t] > bcol) high = t; \ 155 else low = t; \ 156 } \ 157 for (_i=low; _i<high; _i++) { \ 158 if (rp[_i] > bcol) break; \ 159 if (rp[_i] == bcol) { \ 160 bap = ap + bs2*_i + bs*cidx + ridx; \ 161 if (addv == ADD_VALUES) *bap += value; \ 162 else *bap = value; \ 163 goto b_noinsert; \ 164 } \ 165 } \ 166 if (b->nonew == 1) goto b_noinsert; \ 167 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 168 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 169 N = nrow++ - 1; \ 170 /* shift up all the later entries in this row */ \ 171 for (ii=N; ii>=_i; ii--) { \ 172 rp[ii+1] = rp[ii]; \ 173 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 174 } \ 175 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 176 rp[_i] = bcol; \ 177 ap[bs2*_i + bs*cidx + ridx] = value; \ 178 b_noinsert:; \ 179 bilen[brow] = nrow; \ 180 } 181 182 #undef __FUNCT__ 183 #define __FUNCT__ "MatSetValues_MPIBAIJ" 184 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 185 { 186 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 187 MatScalar value; 188 PetscBool roworiented = baij->roworiented; 189 PetscErrorCode ierr; 190 PetscInt i,j,row,col; 191 PetscInt rstart_orig=mat->rmap->rstart; 192 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 193 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 194 195 /* Some Variables required in the macro */ 196 Mat A = baij->A; 197 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 198 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 199 MatScalar *aa =a->a; 200 201 Mat B = baij->B; 202 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 203 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 204 MatScalar *ba =b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 207 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 208 MatScalar *ap,*bap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_DEBUG) 214 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 215 #endif 216 if (im[i] >= rstart_orig && im[i] < rend_orig) { 217 row = im[i] - rstart_orig; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart_orig && in[j] < cend_orig) { 220 col = in[j] - cstart_orig; 221 if (roworiented) value = v[i*n+j]; 222 else value = v[i+j*m]; 223 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 224 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_DEBUG) 227 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!baij->colmap) { 232 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined(PETSC_USE_CTABLE) 235 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 236 col = col - 1; 237 #else 238 col = baij->colmap[in[j]/bs] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 241 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 244 B = baij->B; 245 b = (Mat_SeqBAIJ*)(B)->data; 246 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 247 ba =b->a; 248 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 249 else col += in[j]%bs; 250 } else col = in[j]; 251 if (roworiented) value = v[i*n+j]; 252 else value = v[i+j*m]; 253 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 254 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 255 } 256 } 257 } else { 258 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 259 if (!baij->donotstash) { 260 mat->assembled = PETSC_FALSE; 261 if (roworiented) { 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 263 } else { 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined" 274 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 275 { 276 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 277 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 278 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 279 PetscErrorCode ierr; 280 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 281 PetscBool roworiented=a->roworiented; 282 const PetscScalar *value = v; 283 MatScalar *ap,*aa = a->a,*bap; 284 285 PetscFunctionBegin; 286 rp = aj + ai[row]; 287 ap = aa + bs2*ai[row]; 288 rmax = imax[row]; 289 nrow = ailen[row]; 290 value = v; 291 low = 0; 292 high = nrow; 293 while (high-low > 7) { 294 t = (low+high)/2; 295 if (rp[t] > col) high = t; 296 else low = t; 297 } 298 for (i=low; i<high; i++) { 299 if (rp[i] > col) break; 300 if (rp[i] == col) { 301 bap = ap + bs2*i; 302 if (roworiented) { 303 if (is == ADD_VALUES) { 304 for (ii=0; ii<bs; ii++) { 305 for (jj=ii; jj<bs2; jj+=bs) { 306 bap[jj] += *value++; 307 } 308 } 309 } else { 310 for (ii=0; ii<bs; ii++) { 311 for (jj=ii; jj<bs2; jj+=bs) { 312 bap[jj] = *value++; 313 } 314 } 315 } 316 } else { 317 if (is == ADD_VALUES) { 318 for (ii=0; ii<bs; ii++,value+=bs) { 319 for (jj=0; jj<bs; jj++) { 320 bap[jj] += value[jj]; 321 } 322 bap += bs; 323 } 324 } else { 325 for (ii=0; ii<bs; ii++,value+=bs) { 326 for (jj=0; jj<bs; jj++) { 327 bap[jj] = value[jj]; 328 } 329 bap += bs; 330 } 331 } 332 } 333 goto noinsert2; 334 } 335 } 336 if (nonew == 1) goto noinsert2; 337 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 338 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 339 N = nrow++ - 1; high++; 340 /* shift up all the later entries in this row */ 341 for (ii=N; ii>=i; ii--) { 342 rp[ii+1] = rp[ii]; 343 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 344 } 345 if (N >= i) { 346 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 347 } 348 rp[i] = col; 349 bap = ap + bs2*i; 350 if (roworiented) { 351 for (ii=0; ii<bs; ii++) { 352 for (jj=ii; jj<bs2; jj+=bs) { 353 bap[jj] = *value++; 354 } 355 } 356 } else { 357 for (ii=0; ii<bs; ii++) { 358 for (jj=0; jj<bs; jj++) { 359 *bap++ = *value++; 360 } 361 } 362 } 363 noinsert2:; 364 ailen[row] = nrow; 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 370 /* 371 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 372 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 373 */ 374 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 375 { 376 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 377 const PetscScalar *value; 378 MatScalar *barray = baij->barray; 379 PetscBool roworiented = baij->roworiented; 380 PetscErrorCode ierr; 381 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 382 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 383 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 384 385 PetscFunctionBegin; 386 if (!barray) { 387 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 388 baij->barray = barray; 389 } 390 391 if (roworiented) stepval = (n-1)*bs; 392 else stepval = (m-1)*bs; 393 394 for (i=0; i<m; i++) { 395 if (im[i] < 0) continue; 396 #if defined(PETSC_USE_DEBUG) 397 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 398 #endif 399 if (im[i] >= rstart && im[i] < rend) { 400 row = im[i] - rstart; 401 for (j=0; j<n; j++) { 402 /* If NumCol = 1 then a copy is not required */ 403 if ((roworiented) && (n == 1)) { 404 barray = (MatScalar*)v + i*bs2; 405 } else if ((!roworiented) && (m == 1)) { 406 barray = (MatScalar*)v + j*bs2; 407 } else { /* Here a copy is required */ 408 if (roworiented) { 409 value = v + (i*(stepval+bs) + j)*bs; 410 } else { 411 value = v + (j*(stepval+bs) + i)*bs; 412 } 413 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 414 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 415 barray += bs; 416 } 417 barray -= bs2; 418 } 419 420 if (in[j] >= cstart && in[j] < cend) { 421 col = in[j] - cstart; 422 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 423 } else if (in[j] < 0) continue; 424 #if defined(PETSC_USE_DEBUG) 425 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 426 #endif 427 else { 428 if (mat->was_assembled) { 429 if (!baij->colmap) { 430 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 433 #if defined(PETSC_USE_DEBUG) 434 #if defined(PETSC_USE_CTABLE) 435 { PetscInt data; 436 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 437 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 438 } 439 #else 440 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 441 #endif 442 #endif 443 #if defined(PETSC_USE_CTABLE) 444 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 445 col = (col - 1)/bs; 446 #else 447 col = (baij->colmap[in[j]] - 1)/bs; 448 #endif 449 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 450 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 451 col = in[j]; 452 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 453 } else col = in[j]; 454 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 455 } 456 } 457 } else { 458 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 459 if (!baij->donotstash) { 460 if (roworiented) { 461 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 462 } else { 463 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 464 } 465 } 466 } 467 } 468 PetscFunctionReturn(0); 469 } 470 471 #define HASH_KEY 0.6180339887 472 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 473 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 474 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 477 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 478 { 479 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 480 PetscBool roworiented = baij->roworiented; 481 PetscErrorCode ierr; 482 PetscInt i,j,row,col; 483 PetscInt rstart_orig=mat->rmap->rstart; 484 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 485 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 486 PetscReal tmp; 487 MatScalar **HD = baij->hd,value; 488 #if defined(PETSC_USE_DEBUG) 489 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 490 #endif 491 492 PetscFunctionBegin; 493 for (i=0; i<m; i++) { 494 #if defined(PETSC_USE_DEBUG) 495 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 496 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 497 #endif 498 row = im[i]; 499 if (row >= rstart_orig && row < rend_orig) { 500 for (j=0; j<n; j++) { 501 col = in[j]; 502 if (roworiented) value = v[i*n+j]; 503 else value = v[i+j*m]; 504 /* Look up PetscInto the Hash Table */ 505 key = (row/bs)*Nbs+(col/bs)+1; 506 h1 = HASH(size,key,tmp); 507 508 509 idx = h1; 510 #if defined(PETSC_USE_DEBUG) 511 insert_ct++; 512 total_ct++; 513 if (HT[idx] != key) { 514 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 515 if (idx == size) { 516 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 517 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 518 } 519 } 520 #else 521 if (HT[idx] != key) { 522 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 523 if (idx == size) { 524 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 525 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 526 } 527 } 528 #endif 529 /* A HASH table entry is found, so insert the values at the correct address */ 530 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 531 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 532 } 533 } else if (!baij->donotstash) { 534 if (roworiented) { 535 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 536 } else { 537 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 538 } 539 } 540 } 541 #if defined(PETSC_USE_DEBUG) 542 baij->ht_total_ct += total_ct; 543 baij->ht_insert_ct += insert_ct; 544 #endif 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 550 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 551 { 552 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 553 PetscBool roworiented = baij->roworiented; 554 PetscErrorCode ierr; 555 PetscInt i,j,ii,jj,row,col; 556 PetscInt rstart=baij->rstartbs; 557 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 558 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 559 PetscReal tmp; 560 MatScalar **HD = baij->hd,*baij_a; 561 const PetscScalar *v_t,*value; 562 #if defined(PETSC_USE_DEBUG) 563 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 564 #endif 565 566 PetscFunctionBegin; 567 if (roworiented) stepval = (n-1)*bs; 568 else stepval = (m-1)*bs; 569 570 for (i=0; i<m; i++) { 571 #if defined(PETSC_USE_DEBUG) 572 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 573 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 574 #endif 575 row = im[i]; 576 v_t = v + i*nbs2; 577 if (row >= rstart && row < rend) { 578 for (j=0; j<n; j++) { 579 col = in[j]; 580 581 /* Look up into the Hash Table */ 582 key = row*Nbs+col+1; 583 h1 = HASH(size,key,tmp); 584 585 idx = h1; 586 #if defined(PETSC_USE_DEBUG) 587 total_ct++; 588 insert_ct++; 589 if (HT[idx] != key) { 590 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 591 if (idx == size) { 592 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 593 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 594 } 595 } 596 #else 597 if (HT[idx] != key) { 598 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 599 if (idx == size) { 600 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 601 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 602 } 603 } 604 #endif 605 baij_a = HD[idx]; 606 if (roworiented) { 607 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 608 /* value = v + (i*(stepval+bs)+j)*bs; */ 609 value = v_t; 610 v_t += bs; 611 if (addv == ADD_VALUES) { 612 for (ii=0; ii<bs; ii++,value+=stepval) { 613 for (jj=ii; jj<bs2; jj+=bs) { 614 baij_a[jj] += *value++; 615 } 616 } 617 } else { 618 for (ii=0; ii<bs; ii++,value+=stepval) { 619 for (jj=ii; jj<bs2; jj+=bs) { 620 baij_a[jj] = *value++; 621 } 622 } 623 } 624 } else { 625 value = v + j*(stepval+bs)*bs + i*bs; 626 if (addv == ADD_VALUES) { 627 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 628 for (jj=0; jj<bs; jj++) { 629 baij_a[jj] += *value++; 630 } 631 } 632 } else { 633 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 634 for (jj=0; jj<bs; jj++) { 635 baij_a[jj] = *value++; 636 } 637 } 638 } 639 } 640 } 641 } else { 642 if (!baij->donotstash) { 643 if (roworiented) { 644 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 645 } else { 646 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 647 } 648 } 649 } 650 } 651 #if defined(PETSC_USE_DEBUG) 652 baij->ht_total_ct += total_ct; 653 baij->ht_insert_ct += insert_ct; 654 #endif 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetValues_MPIBAIJ" 660 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 661 { 662 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 663 PetscErrorCode ierr; 664 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 665 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 666 667 PetscFunctionBegin; 668 for (i=0; i<m; i++) { 669 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 670 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 671 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 672 row = idxm[i] - bsrstart; 673 for (j=0; j<n; j++) { 674 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 675 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 676 if (idxn[j] >= bscstart && idxn[j] < bscend) { 677 col = idxn[j] - bscstart; 678 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 679 } else { 680 if (!baij->colmap) { 681 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 682 } 683 #if defined(PETSC_USE_CTABLE) 684 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 685 data--; 686 #else 687 data = baij->colmap[idxn[j]/bs]-1; 688 #endif 689 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 690 else { 691 col = data + idxn[j]%bs; 692 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 693 } 694 } 695 } 696 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatNorm_MPIBAIJ" 703 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 704 { 705 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 706 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 707 PetscErrorCode ierr; 708 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 709 PetscReal sum = 0.0; 710 MatScalar *v; 711 712 PetscFunctionBegin; 713 if (baij->size == 1) { 714 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 715 } else { 716 if (type == NORM_FROBENIUS) { 717 v = amat->a; 718 nz = amat->nz*bs2; 719 for (i=0; i<nz; i++) { 720 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 721 } 722 v = bmat->a; 723 nz = bmat->nz*bs2; 724 for (i=0; i<nz; i++) { 725 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 726 } 727 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 728 *nrm = PetscSqrtReal(*nrm); 729 } else if (type == NORM_1) { /* max column sum */ 730 PetscReal *tmp,*tmp2; 731 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 732 ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 733 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 734 v = amat->a; jj = amat->j; 735 for (i=0; i<amat->nz; i++) { 736 for (j=0; j<bs; j++) { 737 col = bs*(cstart + *jj) + j; /* column index */ 738 for (row=0; row<bs; row++) { 739 tmp[col] += PetscAbsScalar(*v); v++; 740 } 741 } 742 jj++; 743 } 744 v = bmat->a; jj = bmat->j; 745 for (i=0; i<bmat->nz; i++) { 746 for (j=0; j<bs; j++) { 747 col = bs*garray[*jj] + j; 748 for (row=0; row<bs; row++) { 749 tmp[col] += PetscAbsScalar(*v); v++; 750 } 751 } 752 jj++; 753 } 754 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 755 *nrm = 0.0; 756 for (j=0; j<mat->cmap->N; j++) { 757 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 758 } 759 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 760 } else if (type == NORM_INFINITY) { /* max row sum */ 761 PetscReal *sums; 762 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 763 sum = 0.0; 764 for (j=0; j<amat->mbs; j++) { 765 for (row=0; row<bs; row++) sums[row] = 0.0; 766 v = amat->a + bs2*amat->i[j]; 767 nz = amat->i[j+1]-amat->i[j]; 768 for (i=0; i<nz; i++) { 769 for (col=0; col<bs; col++) { 770 for (row=0; row<bs; row++) { 771 sums[row] += PetscAbsScalar(*v); v++; 772 } 773 } 774 } 775 v = bmat->a + bs2*bmat->i[j]; 776 nz = bmat->i[j+1]-bmat->i[j]; 777 for (i=0; i<nz; i++) { 778 for (col=0; col<bs; col++) { 779 for (row=0; row<bs; row++) { 780 sums[row] += PetscAbsScalar(*v); v++; 781 } 782 } 783 } 784 for (row=0; row<bs; row++) { 785 if (sums[row] > sum) sum = sums[row]; 786 } 787 } 788 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 789 ierr = PetscFree(sums);CHKERRQ(ierr); 790 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 /* 796 Creates the hash table, and sets the table 797 This table is created only once. 798 If new entried need to be added to the matrix 799 then the hash table has to be destroyed and 800 recreated. 801 */ 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 804 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 805 { 806 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 807 Mat A = baij->A,B=baij->B; 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 809 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 810 PetscErrorCode ierr; 811 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 812 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 813 PetscInt *HT,key; 814 MatScalar **HD; 815 PetscReal tmp; 816 #if defined(PETSC_USE_INFO) 817 PetscInt ct=0,max=0; 818 #endif 819 820 PetscFunctionBegin; 821 if (baij->ht) PetscFunctionReturn(0); 822 823 baij->ht_size = (PetscInt)(factor*nz); 824 ht_size = baij->ht_size; 825 826 /* Allocate Memory for Hash Table */ 827 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 828 HD = baij->hd; 829 HT = baij->ht; 830 831 /* Loop Over A */ 832 for (i=0; i<a->mbs; i++) { 833 for (j=ai[i]; j<ai[i+1]; j++) { 834 row = i+rstart; 835 col = aj[j]+cstart; 836 837 key = row*Nbs + col + 1; 838 h1 = HASH(ht_size,key,tmp); 839 for (k=0; k<ht_size; k++) { 840 if (!HT[(h1+k)%ht_size]) { 841 HT[(h1+k)%ht_size] = key; 842 HD[(h1+k)%ht_size] = a->a + j*bs2; 843 break; 844 #if defined(PETSC_USE_INFO) 845 } else { 846 ct++; 847 #endif 848 } 849 } 850 #if defined(PETSC_USE_INFO) 851 if (k> max) max = k; 852 #endif 853 } 854 } 855 /* Loop Over B */ 856 for (i=0; i<b->mbs; i++) { 857 for (j=bi[i]; j<bi[i+1]; j++) { 858 row = i+rstart; 859 col = garray[bj[j]]; 860 key = row*Nbs + col + 1; 861 h1 = HASH(ht_size,key,tmp); 862 for (k=0; k<ht_size; k++) { 863 if (!HT[(h1+k)%ht_size]) { 864 HT[(h1+k)%ht_size] = key; 865 HD[(h1+k)%ht_size] = b->a + j*bs2; 866 break; 867 #if defined(PETSC_USE_INFO) 868 } else { 869 ct++; 870 #endif 871 } 872 } 873 #if defined(PETSC_USE_INFO) 874 if (k> max) max = k; 875 #endif 876 } 877 } 878 879 /* Print Summary */ 880 #if defined(PETSC_USE_INFO) 881 for (i=0,j=0; i<ht_size; i++) { 882 if (HT[i]) j++; 883 } 884 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 885 #endif 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 891 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 892 { 893 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 894 PetscErrorCode ierr; 895 PetscInt nstash,reallocs; 896 897 PetscFunctionBegin; 898 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 899 900 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 901 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 902 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 903 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 904 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 905 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 909 #undef __FUNCT__ 910 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 911 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 912 { 913 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 914 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 915 PetscErrorCode ierr; 916 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 917 PetscInt *row,*col; 918 PetscBool r1,r2,r3,other_disassembled; 919 MatScalar *val; 920 PetscMPIInt n; 921 922 PetscFunctionBegin; 923 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 924 if (!baij->donotstash && !mat->nooffprocentries) { 925 while (1) { 926 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 927 if (!flg) break; 928 929 for (i=0; i<n;) { 930 /* Now identify the consecutive vals belonging to the same row */ 931 for (j=i,rstart=row[j]; j<n; j++) { 932 if (row[j] != rstart) break; 933 } 934 if (j < n) ncols = j-i; 935 else ncols = n-i; 936 /* Now assemble all these values with a single function call */ 937 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 938 i = j; 939 } 940 } 941 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 942 /* Now process the block-stash. Since the values are stashed column-oriented, 943 set the roworiented flag to column oriented, and after MatSetValues() 944 restore the original flags */ 945 r1 = baij->roworiented; 946 r2 = a->roworiented; 947 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 948 949 baij->roworiented = PETSC_FALSE; 950 a->roworiented = PETSC_FALSE; 951 952 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 953 while (1) { 954 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 955 if (!flg) break; 956 957 for (i=0; i<n;) { 958 /* Now identify the consecutive vals belonging to the same row */ 959 for (j=i,rstart=row[j]; j<n; j++) { 960 if (row[j] != rstart) break; 961 } 962 if (j < n) ncols = j-i; 963 else ncols = n-i; 964 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 965 i = j; 966 } 967 } 968 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 969 970 baij->roworiented = r1; 971 a->roworiented = r2; 972 973 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 974 } 975 976 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 978 979 /* determine if any processor has disassembled, if so we must 980 also disassemble ourselfs, in order that we may reassemble. */ 981 /* 982 if nonzero structure of submatrix B cannot change then we know that 983 no processor disassembled thus we can skip this stuff 984 */ 985 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 986 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 987 if (mat->was_assembled && !other_disassembled) { 988 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 989 } 990 } 991 992 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 993 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 994 } 995 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 996 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 997 998 #if defined(PETSC_USE_INFO) 999 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1000 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 1001 1002 baij->ht_total_ct = 0; 1003 baij->ht_insert_ct = 0; 1004 } 1005 #endif 1006 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1007 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1008 1009 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1010 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1011 } 1012 1013 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1014 1015 baij->rowvalues = 0; 1016 1017 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1018 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 1019 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1020 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 1025 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 1026 #include <petscdraw.h> 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 1029 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1030 { 1031 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1032 PetscErrorCode ierr; 1033 PetscMPIInt rank = baij->rank; 1034 PetscInt bs = mat->rmap->bs; 1035 PetscBool iascii,isdraw; 1036 PetscViewer sviewer; 1037 PetscViewerFormat format; 1038 1039 PetscFunctionBegin; 1040 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1041 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1042 if (iascii) { 1043 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1044 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1045 MatInfo info; 1046 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1047 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1048 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1049 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 1050 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 1051 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1052 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1053 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1055 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1056 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1057 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1058 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1061 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1064 PetscFunctionReturn(0); 1065 } 1066 } 1067 1068 if (isdraw) { 1069 PetscDraw draw; 1070 PetscBool isnull; 1071 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1072 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1073 if (isnull) PetscFunctionReturn(0); 1074 } 1075 1076 { 1077 /* assemble the entire matrix onto first processor. */ 1078 Mat A; 1079 Mat_SeqBAIJ *Aloc; 1080 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1081 MatScalar *a; 1082 const char *matname; 1083 1084 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1085 /* Perhaps this should be the type of mat? */ 1086 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1087 if (!rank) { 1088 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1089 } else { 1090 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1091 } 1092 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1093 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1094 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1095 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1096 1097 /* copy over the A part */ 1098 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1099 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1100 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1101 1102 for (i=0; i<mbs; i++) { 1103 rvals[0] = bs*(baij->rstartbs + i); 1104 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1105 for (j=ai[i]; j<ai[i+1]; j++) { 1106 col = (baij->cstartbs+aj[j])*bs; 1107 for (k=0; k<bs; k++) { 1108 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1109 col++; a += bs; 1110 } 1111 } 1112 } 1113 /* copy over the B part */ 1114 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1115 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1116 for (i=0; i<mbs; i++) { 1117 rvals[0] = bs*(baij->rstartbs + i); 1118 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1119 for (j=ai[i]; j<ai[i+1]; j++) { 1120 col = baij->garray[aj[j]]*bs; 1121 for (k=0; k<bs; k++) { 1122 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1123 col++; a += bs; 1124 } 1125 } 1126 } 1127 ierr = PetscFree(rvals);CHKERRQ(ierr); 1128 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1129 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1130 /* 1131 Everyone has to call to draw the matrix since the graphics waits are 1132 synchronized across all processors that share the PetscDraw object 1133 */ 1134 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1135 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1136 if (!rank) { 1137 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1138 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1139 } 1140 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1141 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1142 ierr = MatDestroy(&A);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1149 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1150 { 1151 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1152 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1153 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1154 PetscErrorCode ierr; 1155 PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1156 PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1157 int fd; 1158 PetscScalar *column_values; 1159 FILE *file; 1160 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1161 PetscInt message_count,flowcontrolcount; 1162 1163 PetscFunctionBegin; 1164 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1166 nz = bs2*(A->nz + B->nz); 1167 rlen = mat->rmap->n; 1168 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1169 if (!rank) { 1170 header[0] = MAT_FILE_CLASSID; 1171 header[1] = mat->rmap->N; 1172 header[2] = mat->cmap->N; 1173 1174 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1175 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1176 /* get largest number of rows any processor has */ 1177 range = mat->rmap->range; 1178 for (i=1; i<size; i++) { 1179 rlen = PetscMax(rlen,range[i+1] - range[i]); 1180 } 1181 } else { 1182 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 } 1184 1185 ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1186 /* compute lengths of each row */ 1187 for (i=0; i<a->mbs; i++) { 1188 crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1189 } 1190 /* store the row lengths to the file */ 1191 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1192 if (!rank) { 1193 MPI_Status status; 1194 ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1195 rlen = (range[1] - range[0])/bs; 1196 for (i=0; i<rlen; i++) { 1197 for (j=0; j<bs; j++) { 1198 row_lens[i*bs+j] = bs*crow_lens[i]; 1199 } 1200 } 1201 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 for (i=1; i<size; i++) { 1203 rlen = (range[i+1] - range[i])/bs; 1204 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1205 ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1206 for (k=0; k<rlen; k++) { 1207 for (j=0; j<bs; j++) { 1208 row_lens[k*bs+j] = bs*crow_lens[k]; 1209 } 1210 } 1211 ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1212 } 1213 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1214 ierr = PetscFree(row_lens);CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1217 ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1219 } 1220 ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1221 1222 /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1223 information needed to make it for each row from a block row. This does require more communication but still not more than 1224 the communication needed for the nonzero values */ 1225 nzmax = nz; /* space a largest processor needs */ 1226 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1228 cnt = 0; 1229 for (i=0; i<a->mbs; i++) { 1230 pcnt = cnt; 1231 for (j=B->i[i]; j<B->i[i+1]; j++) { 1232 if ((col = garray[B->j[j]]) > cstart) break; 1233 for (l=0; l<bs; l++) { 1234 column_indices[cnt++] = bs*col+l; 1235 } 1236 } 1237 for (k=A->i[i]; k<A->i[i+1]; k++) { 1238 for (l=0; l<bs; l++) { 1239 column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1240 } 1241 } 1242 for (; j<B->i[i+1]; j++) { 1243 for (l=0; l<bs; l++) { 1244 column_indices[cnt++] = bs*garray[B->j[j]]+l; 1245 } 1246 } 1247 len = cnt - pcnt; 1248 for (k=1; k<bs; k++) { 1249 ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1250 cnt += len; 1251 } 1252 } 1253 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1254 1255 /* store the columns to the file */ 1256 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1257 if (!rank) { 1258 MPI_Status status; 1259 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1260 for (i=1; i<size; i++) { 1261 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1262 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1263 ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1274 1275 /* load up the numerical values */ 1276 ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1277 cnt = 0; 1278 for (i=0; i<a->mbs; i++) { 1279 rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1280 for (j=B->i[i]; j<B->i[i+1]; j++) { 1281 if (garray[B->j[j]] > cstart) break; 1282 for (l=0; l<bs; l++) { 1283 for (ll=0; ll<bs; ll++) { 1284 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1285 } 1286 } 1287 cnt += bs; 1288 } 1289 for (k=A->i[i]; k<A->i[i+1]; k++) { 1290 for (l=0; l<bs; l++) { 1291 for (ll=0; ll<bs; ll++) { 1292 column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1293 } 1294 } 1295 cnt += bs; 1296 } 1297 for (; j<B->i[i+1]; j++) { 1298 for (l=0; l<bs; l++) { 1299 for (ll=0; ll<bs; ll++) { 1300 column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1301 } 1302 } 1303 cnt += bs; 1304 } 1305 cnt += (bs-1)*rlen; 1306 } 1307 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1308 1309 /* store the column values to the file */ 1310 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1311 if (!rank) { 1312 MPI_Status status; 1313 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1314 for (i=1; i<size; i++) { 1315 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1316 ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1317 ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1318 ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1319 } 1320 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1321 } else { 1322 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1323 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1324 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1325 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1326 } 1327 ierr = PetscFree(column_values);CHKERRQ(ierr); 1328 1329 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1330 if (file) { 1331 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1332 } 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatView_MPIBAIJ" 1338 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1339 { 1340 PetscErrorCode ierr; 1341 PetscBool iascii,isdraw,issocket,isbinary; 1342 1343 PetscFunctionBegin; 1344 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1345 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1346 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1347 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1348 if (iascii || isdraw || issocket) { 1349 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1350 } else if (isbinary) { 1351 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1352 } 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatDestroy_MPIBAIJ" 1358 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1359 { 1360 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 #if defined(PETSC_USE_LOG) 1365 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1366 #endif 1367 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1368 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1369 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1370 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1371 #if defined(PETSC_USE_CTABLE) 1372 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1373 #else 1374 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1375 #endif 1376 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1377 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1378 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1379 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1380 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1381 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1382 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1383 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1384 1385 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1386 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1387 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1388 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1389 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1390 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1391 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1392 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1393 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1394 #if defined(PETSC_HAVE_HYPRE) 1395 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1396 #endif 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatMult_MPIBAIJ" 1402 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1403 { 1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1405 PetscErrorCode ierr; 1406 PetscInt nt; 1407 1408 PetscFunctionBegin; 1409 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1410 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1411 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1412 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1413 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1414 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1415 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1416 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1422 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1423 { 1424 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1429 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1430 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1431 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } 1434 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1437 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1438 { 1439 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1440 PetscErrorCode ierr; 1441 PetscBool merged; 1442 1443 PetscFunctionBegin; 1444 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1445 /* do nondiagonal part */ 1446 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1447 if (!merged) { 1448 /* send it on its way */ 1449 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1450 /* do local part */ 1451 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1452 /* receive remote parts: note this assumes the values are not actually */ 1453 /* inserted in yy until the next line */ 1454 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1455 } else { 1456 /* do local part */ 1457 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1458 /* send it on its way */ 1459 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1460 /* values actually were received in the Begin() but we need to call this nop */ 1461 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1468 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1469 { 1470 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 /* do nondiagonal part */ 1475 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1476 /* send it on its way */ 1477 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1478 /* do local part */ 1479 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1480 /* receive remote parts: note this assumes the values are not actually */ 1481 /* inserted in yy until the next line, which is true for my implementation*/ 1482 /* but is not perhaps always true. */ 1483 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 /* 1488 This only works correctly for square matrices where the subblock A->A is the 1489 diagonal block 1490 */ 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1493 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1494 { 1495 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1496 PetscErrorCode ierr; 1497 1498 PetscFunctionBegin; 1499 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1500 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatScale_MPIBAIJ" 1506 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1507 { 1508 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1513 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatGetRow_MPIBAIJ" 1519 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1520 { 1521 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1522 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1523 PetscErrorCode ierr; 1524 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1525 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1526 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1527 1528 PetscFunctionBegin; 1529 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1530 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1531 mat->getrowactive = PETSC_TRUE; 1532 1533 if (!mat->rowvalues && (idx || v)) { 1534 /* 1535 allocate enough space to hold information from the longest row. 1536 */ 1537 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1538 PetscInt max = 1,mbs = mat->mbs,tmp; 1539 for (i=0; i<mbs; i++) { 1540 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1541 if (max < tmp) max = tmp; 1542 } 1543 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1544 } 1545 lrow = row - brstart; 1546 1547 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1548 if (!v) {pvA = 0; pvB = 0;} 1549 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1550 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1551 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1552 nztot = nzA + nzB; 1553 1554 cmap = mat->garray; 1555 if (v || idx) { 1556 if (nztot) { 1557 /* Sort by increasing column numbers, assuming A and B already sorted */ 1558 PetscInt imark = -1; 1559 if (v) { 1560 *v = v_p = mat->rowvalues; 1561 for (i=0; i<nzB; i++) { 1562 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1563 else break; 1564 } 1565 imark = i; 1566 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1567 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1568 } 1569 if (idx) { 1570 *idx = idx_p = mat->rowindices; 1571 if (imark > -1) { 1572 for (i=0; i<imark; i++) { 1573 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1574 } 1575 } else { 1576 for (i=0; i<nzB; i++) { 1577 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1578 else break; 1579 } 1580 imark = i; 1581 } 1582 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1583 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1584 } 1585 } else { 1586 if (idx) *idx = 0; 1587 if (v) *v = 0; 1588 } 1589 } 1590 *nz = nztot; 1591 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1592 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #undef __FUNCT__ 1597 #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1598 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1599 { 1600 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1601 1602 PetscFunctionBegin; 1603 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1604 baij->getrowactive = PETSC_FALSE; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1610 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1611 { 1612 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1613 PetscErrorCode ierr; 1614 1615 PetscFunctionBegin; 1616 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1617 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1623 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1624 { 1625 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1626 Mat A = a->A,B = a->B; 1627 PetscErrorCode ierr; 1628 PetscReal isend[5],irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = (PetscReal)matin->rmap->bs; 1632 1633 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1634 1635 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1636 isend[3] = info->memory; isend[4] = info->mallocs; 1637 1638 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1639 1640 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1641 isend[3] += info->memory; isend[4] += info->mallocs; 1642 1643 if (flag == MAT_LOCAL) { 1644 info->nz_used = isend[0]; 1645 info->nz_allocated = isend[1]; 1646 info->nz_unneeded = isend[2]; 1647 info->memory = isend[3]; 1648 info->mallocs = isend[4]; 1649 } else if (flag == MAT_GLOBAL_MAX) { 1650 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } else if (flag == MAT_GLOBAL_SUM) { 1658 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1659 1660 info->nz_used = irecv[0]; 1661 info->nz_allocated = irecv[1]; 1662 info->nz_unneeded = irecv[2]; 1663 info->memory = irecv[3]; 1664 info->mallocs = irecv[4]; 1665 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1666 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1667 info->fill_ratio_needed = 0; 1668 info->factor_mallocs = 0; 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatSetOption_MPIBAIJ" 1674 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1675 { 1676 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1677 PetscErrorCode ierr; 1678 1679 PetscFunctionBegin; 1680 switch (op) { 1681 case MAT_NEW_NONZERO_LOCATIONS: 1682 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1683 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1684 case MAT_KEEP_NONZERO_PATTERN: 1685 case MAT_NEW_NONZERO_LOCATION_ERR: 1686 MatCheckPreallocated(A,1); 1687 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1688 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1689 break; 1690 case MAT_ROW_ORIENTED: 1691 MatCheckPreallocated(A,1); 1692 a->roworiented = flg; 1693 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_NEW_DIAGONALS: 1698 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1699 break; 1700 case MAT_IGNORE_OFF_PROC_ENTRIES: 1701 a->donotstash = flg; 1702 break; 1703 case MAT_USE_HASH_TABLE: 1704 a->ht_flag = flg; 1705 a->ht_fact = 1.39; 1706 break; 1707 case MAT_SYMMETRIC: 1708 case MAT_STRUCTURALLY_SYMMETRIC: 1709 case MAT_HERMITIAN: 1710 case MAT_SUBMAT_SINGLEIS: 1711 case MAT_SYMMETRY_ETERNAL: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 break; 1715 default: 1716 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatTranspose_MPIBAIJ" 1723 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1724 { 1725 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1726 Mat_SeqBAIJ *Aloc; 1727 Mat B; 1728 PetscErrorCode ierr; 1729 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1730 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1731 MatScalar *a; 1732 1733 PetscFunctionBegin; 1734 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1735 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1736 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1737 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1738 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1739 /* Do not know preallocation information, but must set block size */ 1740 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1741 } else { 1742 B = *matout; 1743 } 1744 1745 /* copy over the A part */ 1746 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1747 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1748 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1749 1750 for (i=0; i<mbs; i++) { 1751 rvals[0] = bs*(baij->rstartbs + i); 1752 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1753 for (j=ai[i]; j<ai[i+1]; j++) { 1754 col = (baij->cstartbs+aj[j])*bs; 1755 for (k=0; k<bs; k++) { 1756 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1757 1758 col++; a += bs; 1759 } 1760 } 1761 } 1762 /* copy over the B part */ 1763 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1764 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1765 for (i=0; i<mbs; i++) { 1766 rvals[0] = bs*(baij->rstartbs + i); 1767 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1768 for (j=ai[i]; j<ai[i+1]; j++) { 1769 col = baij->garray[aj[j]]*bs; 1770 for (k=0; k<bs; k++) { 1771 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1772 col++; 1773 a += bs; 1774 } 1775 } 1776 } 1777 ierr = PetscFree(rvals);CHKERRQ(ierr); 1778 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1779 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1780 1781 if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 1782 else { 1783 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1784 } 1785 PetscFunctionReturn(0); 1786 } 1787 1788 #undef __FUNCT__ 1789 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1790 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1791 { 1792 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1793 Mat a = baij->A,b = baij->B; 1794 PetscErrorCode ierr; 1795 PetscInt s1,s2,s3; 1796 1797 PetscFunctionBegin; 1798 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1799 if (rr) { 1800 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1801 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1802 /* Overlap communication with computation. */ 1803 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1804 } 1805 if (ll) { 1806 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1807 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1808 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1809 } 1810 /* scale the diagonal block */ 1811 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1812 1813 if (rr) { 1814 /* Do a scatter end and then right scale the off-diagonal block */ 1815 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1816 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1817 } 1818 PetscFunctionReturn(0); 1819 } 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatZeroRows_MPIBAIJ" 1823 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1824 { 1825 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1826 PetscInt *lrows; 1827 PetscInt r, len; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 /* get locally owned rows */ 1832 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1833 /* fix right hand side if needed */ 1834 if (x && b) { 1835 const PetscScalar *xx; 1836 PetscScalar *bb; 1837 1838 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1839 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1840 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1841 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1842 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1843 } 1844 1845 /* actually zap the local rows */ 1846 /* 1847 Zero the required rows. If the "diagonal block" of the matrix 1848 is square and the user wishes to set the diagonal we use separate 1849 code so that MatSetValues() is not called for each diagonal allocating 1850 new memory, thus calling lots of mallocs and slowing things down. 1851 1852 */ 1853 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1854 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1855 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 1856 PetscBool cong; 1857 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 1858 if (cong) A->congruentlayouts = 1; 1859 else A->congruentlayouts = 0; 1860 } 1861 if ((diag != 0.0) && A->congruentlayouts) { 1862 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1863 } else if (diag != 0.0) { 1864 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1865 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1866 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1867 for (r = 0; r < len; ++r) { 1868 const PetscInt row = lrows[r] + A->rmap->rstart; 1869 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1870 } 1871 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1873 } else { 1874 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1875 } 1876 ierr = PetscFree(lrows);CHKERRQ(ierr); 1877 1878 /* only change matrix nonzero state if pattern was allowed to be changed */ 1879 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1880 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1881 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 1888 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1889 { 1890 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1891 PetscErrorCode ierr; 1892 PetscMPIInt n = A->rmap->n; 1893 PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 1894 PetscInt *lrows,*owners = A->rmap->range; 1895 PetscSFNode *rrows; 1896 PetscSF sf; 1897 const PetscScalar *xx; 1898 PetscScalar *bb,*mask; 1899 Vec xmask,lmask; 1900 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1901 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1902 PetscScalar *aa; 1903 1904 PetscFunctionBegin; 1905 /* Create SF where leaves are input rows and roots are owned rows */ 1906 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1907 for (r = 0; r < n; ++r) lrows[r] = -1; 1908 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1909 for (r = 0; r < N; ++r) { 1910 const PetscInt idx = rows[r]; 1911 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1912 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1913 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1914 } 1915 rrows[r].rank = p; 1916 rrows[r].index = rows[r] - owners[p]; 1917 } 1918 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1919 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1920 /* Collect flags for rows to be zeroed */ 1921 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1922 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1923 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1924 /* Compress and put in row numbers */ 1925 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1926 /* zero diagonal part of matrix */ 1927 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1928 /* handle off diagonal part of matrix */ 1929 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1930 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1931 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1932 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1933 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1934 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1935 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1936 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1937 if (x) { 1938 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1939 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1940 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1941 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1942 } 1943 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1944 /* remove zeroed rows of off diagonal matrix */ 1945 for (i = 0; i < len; ++i) { 1946 row = lrows[i]; 1947 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1948 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1949 for (k = 0; k < count; ++k) { 1950 aa[0] = 0.0; 1951 aa += bs; 1952 } 1953 } 1954 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1955 for (i = 0; i < l->B->rmap->N; ++i) { 1956 row = i/bs; 1957 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1958 for (k = 0; k < bs; ++k) { 1959 col = bs*baij->j[j] + k; 1960 if (PetscAbsScalar(mask[col])) { 1961 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1962 if (x) bb[i] -= aa[0]*xx[col]; 1963 aa[0] = 0.0; 1964 } 1965 } 1966 } 1967 } 1968 if (x) { 1969 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1970 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1971 } 1972 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1973 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1974 ierr = PetscFree(lrows);CHKERRQ(ierr); 1975 1976 /* only change matrix nonzero state if pattern was allowed to be changed */ 1977 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1978 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1979 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1980 } 1981 PetscFunctionReturn(0); 1982 } 1983 1984 #undef __FUNCT__ 1985 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1986 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1987 { 1988 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1989 PetscErrorCode ierr; 1990 1991 PetscFunctionBegin; 1992 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatEqual_MPIBAIJ" 2000 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 2001 { 2002 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 2003 Mat a,b,c,d; 2004 PetscBool flg; 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 a = matA->A; b = matA->B; 2009 c = matB->A; d = matB->B; 2010 2011 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2012 if (flg) { 2013 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2014 } 2015 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatCopy_MPIBAIJ" 2021 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 2022 { 2023 PetscErrorCode ierr; 2024 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2025 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2026 2027 PetscFunctionBegin; 2028 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2029 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2030 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2031 } else { 2032 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2033 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "MatSetUp_MPIBAIJ" 2040 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 2041 { 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 2051 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2052 { 2053 PetscErrorCode ierr; 2054 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 2055 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2056 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2057 2058 PetscFunctionBegin; 2059 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 #undef __FUNCT__ 2064 #define __FUNCT__ "MatAXPY_MPIBAIJ" 2065 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2066 { 2067 PetscErrorCode ierr; 2068 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 2069 PetscBLASInt bnz,one=1; 2070 Mat_SeqBAIJ *x,*y; 2071 2072 PetscFunctionBegin; 2073 if (str == SAME_NONZERO_PATTERN) { 2074 PetscScalar alpha = a; 2075 x = (Mat_SeqBAIJ*)xx->A->data; 2076 y = (Mat_SeqBAIJ*)yy->A->data; 2077 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2078 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2079 x = (Mat_SeqBAIJ*)xx->B->data; 2080 y = (Mat_SeqBAIJ*)yy->B->data; 2081 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2082 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2083 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2084 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2085 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2086 } else { 2087 Mat B; 2088 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 2089 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2090 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2091 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2092 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2093 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2094 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2095 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 2096 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2097 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2098 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2099 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 2100 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2101 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2102 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2103 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatRealPart_MPIBAIJ" 2110 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 2111 { 2112 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2113 PetscErrorCode ierr; 2114 2115 PetscFunctionBegin; 2116 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2117 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 #undef __FUNCT__ 2122 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 2123 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 2124 { 2125 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2130 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 2136 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 2137 { 2138 PetscErrorCode ierr; 2139 IS iscol_local; 2140 PetscInt csize; 2141 2142 PetscFunctionBegin; 2143 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2144 if (call == MAT_REUSE_MATRIX) { 2145 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2146 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2147 } else { 2148 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2149 } 2150 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2151 if (call == MAT_INITIAL_MATRIX) { 2152 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 2153 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 2160 /* 2161 Not great since it makes two copies of the submatrix, first an SeqBAIJ 2162 in local and then by concatenating the local matrices the end result. 2163 Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 2164 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 2165 */ 2166 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2167 { 2168 PetscErrorCode ierr; 2169 PetscMPIInt rank,size; 2170 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 2171 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 2172 Mat M,Mreuse; 2173 MatScalar *vwork,*aa; 2174 MPI_Comm comm; 2175 IS isrow_new, iscol_new; 2176 PetscBool idflag,allrows, allcols; 2177 Mat_SeqBAIJ *aij; 2178 2179 PetscFunctionBegin; 2180 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2181 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2183 /* The compression and expansion should be avoided. Doesn't point 2184 out errors, might change the indices, hence buggey */ 2185 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 2186 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 2187 2188 /* Check for special case: each processor gets entire matrix columns */ 2189 ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 2190 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 2191 if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 2192 else allcols = PETSC_FALSE; 2193 2194 ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 2195 ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 2196 if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 2197 else allrows = PETSC_FALSE; 2198 2199 if (call == MAT_REUSE_MATRIX) { 2200 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2201 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2202 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2203 } else { 2204 ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 2205 } 2206 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 2207 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 2208 /* 2209 m - number of local rows 2210 n - number of columns (same on all processors) 2211 rstart - first row in new global matrix generated 2212 */ 2213 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2214 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2215 m = m/bs; 2216 n = n/bs; 2217 2218 if (call == MAT_INITIAL_MATRIX) { 2219 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2220 ii = aij->i; 2221 jj = aij->j; 2222 2223 /* 2224 Determine the number of non-zeros in the diagonal and off-diagonal 2225 portions of the matrix in order to do correct preallocation 2226 */ 2227 2228 /* first get start and end of "diagonal" columns */ 2229 if (csize == PETSC_DECIDE) { 2230 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2231 if (mglobal == n*bs) { /* square matrix */ 2232 nlocal = m; 2233 } else { 2234 nlocal = n/size + ((n % size) > rank); 2235 } 2236 } else { 2237 nlocal = csize/bs; 2238 } 2239 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2240 rstart = rend - nlocal; 2241 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2242 2243 /* next, compute all the lengths */ 2244 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2245 for (i=0; i<m; i++) { 2246 jend = ii[i+1] - ii[i]; 2247 olen = 0; 2248 dlen = 0; 2249 for (j=0; j<jend; j++) { 2250 if (*jj < rstart || *jj >= rend) olen++; 2251 else dlen++; 2252 jj++; 2253 } 2254 olens[i] = olen; 2255 dlens[i] = dlen; 2256 } 2257 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2258 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2259 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2260 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2261 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2262 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2263 } else { 2264 PetscInt ml,nl; 2265 2266 M = *newmat; 2267 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2268 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2269 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2270 /* 2271 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2272 rather than the slower MatSetValues(). 2273 */ 2274 M->was_assembled = PETSC_TRUE; 2275 M->assembled = PETSC_FALSE; 2276 } 2277 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2278 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2279 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2280 ii = aij->i; 2281 jj = aij->j; 2282 aa = aij->a; 2283 for (i=0; i<m; i++) { 2284 row = rstart/bs + i; 2285 nz = ii[i+1] - ii[i]; 2286 cwork = jj; jj += nz; 2287 vwork = aa; aa += nz*bs*bs; 2288 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2289 } 2290 2291 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2292 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2293 *newmat = M; 2294 2295 /* save submatrix used in processor for next request */ 2296 if (call == MAT_INITIAL_MATRIX) { 2297 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2298 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2299 } 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatPermute_MPIBAIJ" 2305 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2306 { 2307 MPI_Comm comm,pcomm; 2308 PetscInt clocal_size,nrows; 2309 const PetscInt *rows; 2310 PetscMPIInt size; 2311 IS crowp,lcolp; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2316 /* make a collective version of 'rowp' */ 2317 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2318 if (pcomm==comm) { 2319 crowp = rowp; 2320 } else { 2321 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2322 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2323 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2324 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2325 } 2326 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2327 /* make a local version of 'colp' */ 2328 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2329 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2330 if (size==1) { 2331 lcolp = colp; 2332 } else { 2333 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2334 } 2335 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2336 /* now we just get the submatrix */ 2337 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2338 ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2339 /* clean up */ 2340 if (pcomm!=comm) { 2341 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2342 } 2343 if (size>1) { 2344 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 2351 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2352 { 2353 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2354 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2355 2356 PetscFunctionBegin; 2357 if (nghosts) *nghosts = B->nbs; 2358 if (ghosts) *ghosts = baij->garray; 2359 PetscFunctionReturn(0); 2360 } 2361 2362 #undef __FUNCT__ 2363 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2364 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2365 { 2366 Mat B; 2367 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2368 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2369 Mat_SeqAIJ *b; 2370 PetscErrorCode ierr; 2371 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2372 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2373 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2374 2375 PetscFunctionBegin; 2376 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2377 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2378 2379 /* ---------------------------------------------------------------- 2380 Tell every processor the number of nonzeros per row 2381 */ 2382 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2383 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2384 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2385 } 2386 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2387 displs = recvcounts + size; 2388 for (i=0; i<size; i++) { 2389 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2390 displs[i] = A->rmap->range[i]/bs; 2391 } 2392 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2393 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2394 #else 2395 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2396 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2397 #endif 2398 /* --------------------------------------------------------------- 2399 Create the sequential matrix of the same type as the local block diagonal 2400 */ 2401 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2402 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2403 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2404 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2405 b = (Mat_SeqAIJ*)B->data; 2406 2407 /*-------------------------------------------------------------------- 2408 Copy my part of matrix column indices over 2409 */ 2410 sendcount = ad->nz + bd->nz; 2411 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2412 a_jsendbuf = ad->j; 2413 b_jsendbuf = bd->j; 2414 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2415 cnt = 0; 2416 for (i=0; i<n; i++) { 2417 2418 /* put in lower diagonal portion */ 2419 m = bd->i[i+1] - bd->i[i]; 2420 while (m > 0) { 2421 /* is it above diagonal (in bd (compressed) numbering) */ 2422 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2423 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2424 m--; 2425 } 2426 2427 /* put in diagonal portion */ 2428 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2429 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2430 } 2431 2432 /* put in upper diagonal portion */ 2433 while (m-- > 0) { 2434 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2435 } 2436 } 2437 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2438 2439 /*-------------------------------------------------------------------- 2440 Gather all column indices to all processors 2441 */ 2442 for (i=0; i<size; i++) { 2443 recvcounts[i] = 0; 2444 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2445 recvcounts[i] += lens[j]; 2446 } 2447 } 2448 displs[0] = 0; 2449 for (i=1; i<size; i++) { 2450 displs[i] = displs[i-1] + recvcounts[i-1]; 2451 } 2452 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2453 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2454 #else 2455 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2456 #endif 2457 /*-------------------------------------------------------------------- 2458 Assemble the matrix into useable form (note numerical values not yet set) 2459 */ 2460 /* set the b->ilen (length of each row) values */ 2461 ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2462 /* set the b->i indices */ 2463 b->i[0] = 0; 2464 for (i=1; i<=A->rmap->N/bs; i++) { 2465 b->i[i] = b->i[i-1] + lens[i-1]; 2466 } 2467 ierr = PetscFree(lens);CHKERRQ(ierr); 2468 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2471 2472 if (A->symmetric) { 2473 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2474 } else if (A->hermitian) { 2475 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2476 } else if (A->structurally_symmetric) { 2477 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2478 } 2479 *newmat = B; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatSOR_MPIBAIJ" 2485 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2486 { 2487 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2488 PetscErrorCode ierr; 2489 Vec bb1 = 0; 2490 2491 PetscFunctionBegin; 2492 if (flag == SOR_APPLY_UPPER) { 2493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2498 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2499 } 2500 2501 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2502 if (flag & SOR_ZERO_INITIAL_GUESS) { 2503 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2504 its--; 2505 } 2506 2507 while (its--) { 2508 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 2511 /* update rhs: bb1 = bb - B*x */ 2512 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2513 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2514 2515 /* local sweep */ 2516 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2517 } 2518 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2519 if (flag & SOR_ZERO_INITIAL_GUESS) { 2520 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2521 its--; 2522 } 2523 while (its--) { 2524 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2525 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2526 2527 /* update rhs: bb1 = bb - B*x */ 2528 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2529 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2530 2531 /* local sweep */ 2532 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2533 } 2534 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2535 if (flag & SOR_ZERO_INITIAL_GUESS) { 2536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2537 its--; 2538 } 2539 while (its--) { 2540 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2541 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2542 2543 /* update rhs: bb1 = bb - B*x */ 2544 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2545 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2546 2547 /* local sweep */ 2548 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2549 } 2550 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2551 2552 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 #undef __FUNCT__ 2557 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 2558 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2559 { 2560 PetscErrorCode ierr; 2561 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2562 PetscInt N,i,*garray = aij->garray; 2563 PetscInt ib,jb,bs = A->rmap->bs; 2564 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2565 MatScalar *a_val = a_aij->a; 2566 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2567 MatScalar *b_val = b_aij->a; 2568 PetscReal *work; 2569 2570 PetscFunctionBegin; 2571 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2572 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2573 if (type == NORM_2) { 2574 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2575 for (jb=0; jb<bs; jb++) { 2576 for (ib=0; ib<bs; ib++) { 2577 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2578 a_val++; 2579 } 2580 } 2581 } 2582 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2583 for (jb=0; jb<bs; jb++) { 2584 for (ib=0; ib<bs; ib++) { 2585 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2586 b_val++; 2587 } 2588 } 2589 } 2590 } else if (type == NORM_1) { 2591 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2592 for (jb=0; jb<bs; jb++) { 2593 for (ib=0; ib<bs; ib++) { 2594 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2595 a_val++; 2596 } 2597 } 2598 } 2599 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2600 for (jb=0; jb<bs; jb++) { 2601 for (ib=0; ib<bs; ib++) { 2602 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2603 b_val++; 2604 } 2605 } 2606 } 2607 } else if (type == NORM_INFINITY) { 2608 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2609 for (jb=0; jb<bs; jb++) { 2610 for (ib=0; ib<bs; ib++) { 2611 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2612 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2613 a_val++; 2614 } 2615 } 2616 } 2617 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2618 for (jb=0; jb<bs; jb++) { 2619 for (ib=0; ib<bs; ib++) { 2620 int col = garray[b_aij->j[i]] * bs + jb; 2621 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2622 b_val++; 2623 } 2624 } 2625 } 2626 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2627 if (type == NORM_INFINITY) { 2628 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2629 } else { 2630 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2631 } 2632 ierr = PetscFree(work);CHKERRQ(ierr); 2633 if (type == NORM_2) { 2634 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2635 } 2636 PetscFunctionReturn(0); 2637 } 2638 2639 #undef __FUNCT__ 2640 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2641 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2642 { 2643 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2644 PetscErrorCode ierr; 2645 2646 PetscFunctionBegin; 2647 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2648 A->factorerrortype = a->A->factorerrortype; 2649 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2650 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2651 PetscFunctionReturn(0); 2652 } 2653 2654 #undef __FUNCT__ 2655 #define __FUNCT__ "MatShift_MPIBAIJ" 2656 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2657 { 2658 PetscErrorCode ierr; 2659 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2660 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2661 2662 PetscFunctionBegin; 2663 if (!Y->preallocated) { 2664 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2665 } else if (!aij->nz) { 2666 PetscInt nonew = aij->nonew; 2667 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2668 aij->nonew = nonew; 2669 } 2670 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2671 PetscFunctionReturn(0); 2672 } 2673 2674 #undef __FUNCT__ 2675 #define __FUNCT__ "MatMissingDiagonal_MPIBAIJ" 2676 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2677 { 2678 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2679 PetscErrorCode ierr; 2680 2681 PetscFunctionBegin; 2682 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2683 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2684 if (d) { 2685 PetscInt rstart; 2686 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2687 *d += rstart/A->rmap->bs; 2688 2689 } 2690 PetscFunctionReturn(0); 2691 } 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 2695 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2696 { 2697 PetscFunctionBegin; 2698 *a = ((Mat_MPIBAIJ*)A->data)->A; 2699 PetscFunctionReturn(0); 2700 } 2701 2702 /* -------------------------------------------------------------------*/ 2703 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2704 MatGetRow_MPIBAIJ, 2705 MatRestoreRow_MPIBAIJ, 2706 MatMult_MPIBAIJ, 2707 /* 4*/ MatMultAdd_MPIBAIJ, 2708 MatMultTranspose_MPIBAIJ, 2709 MatMultTransposeAdd_MPIBAIJ, 2710 0, 2711 0, 2712 0, 2713 /*10*/ 0, 2714 0, 2715 0, 2716 MatSOR_MPIBAIJ, 2717 MatTranspose_MPIBAIJ, 2718 /*15*/ MatGetInfo_MPIBAIJ, 2719 MatEqual_MPIBAIJ, 2720 MatGetDiagonal_MPIBAIJ, 2721 MatDiagonalScale_MPIBAIJ, 2722 MatNorm_MPIBAIJ, 2723 /*20*/ MatAssemblyBegin_MPIBAIJ, 2724 MatAssemblyEnd_MPIBAIJ, 2725 MatSetOption_MPIBAIJ, 2726 MatZeroEntries_MPIBAIJ, 2727 /*24*/ MatZeroRows_MPIBAIJ, 2728 0, 2729 0, 2730 0, 2731 0, 2732 /*29*/ MatSetUp_MPIBAIJ, 2733 0, 2734 0, 2735 MatGetDiagonalBlock_MPIBAIJ, 2736 0, 2737 /*34*/ MatDuplicate_MPIBAIJ, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /*39*/ MatAXPY_MPIBAIJ, 2743 MatGetSubMatrices_MPIBAIJ, 2744 MatIncreaseOverlap_MPIBAIJ, 2745 MatGetValues_MPIBAIJ, 2746 MatCopy_MPIBAIJ, 2747 /*44*/ 0, 2748 MatScale_MPIBAIJ, 2749 MatShift_MPIBAIJ, 2750 0, 2751 MatZeroRowsColumns_MPIBAIJ, 2752 /*49*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*54*/ MatFDColoringCreate_MPIXAIJ, 2758 0, 2759 MatSetUnfactored_MPIBAIJ, 2760 MatPermute_MPIBAIJ, 2761 MatSetValuesBlocked_MPIBAIJ, 2762 /*59*/ MatGetSubMatrix_MPIBAIJ, 2763 MatDestroy_MPIBAIJ, 2764 MatView_MPIBAIJ, 2765 0, 2766 0, 2767 /*64*/ 0, 2768 0, 2769 0, 2770 0, 2771 0, 2772 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2773 0, 2774 0, 2775 0, 2776 0, 2777 /*74*/ 0, 2778 MatFDColoringApply_BAIJ, 2779 0, 2780 0, 2781 0, 2782 /*79*/ 0, 2783 0, 2784 0, 2785 0, 2786 MatLoad_MPIBAIJ, 2787 /*84*/ 0, 2788 0, 2789 0, 2790 0, 2791 0, 2792 /*89*/ 0, 2793 0, 2794 0, 2795 0, 2796 0, 2797 /*94*/ 0, 2798 0, 2799 0, 2800 0, 2801 0, 2802 /*99*/ 0, 2803 0, 2804 0, 2805 0, 2806 0, 2807 /*104*/0, 2808 MatRealPart_MPIBAIJ, 2809 MatImaginaryPart_MPIBAIJ, 2810 0, 2811 0, 2812 /*109*/0, 2813 0, 2814 0, 2815 0, 2816 MatMissingDiagonal_MPIBAIJ, 2817 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2818 0, 2819 MatGetGhosts_MPIBAIJ, 2820 0, 2821 0, 2822 /*119*/0, 2823 0, 2824 0, 2825 0, 2826 MatGetMultiProcBlock_MPIBAIJ, 2827 /*124*/0, 2828 MatGetColumnNorms_MPIBAIJ, 2829 MatInvertBlockDiagonal_MPIBAIJ, 2830 0, 2831 0, 2832 /*129*/ 0, 2833 0, 2834 0, 2835 0, 2836 0, 2837 /*134*/ 0, 2838 0, 2839 0, 2840 0, 2841 0, 2842 /*139*/ MatSetBlockSizes_Default, 2843 0, 2844 0, 2845 MatFDColoringSetUp_MPIXAIJ, 2846 0, 2847 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2848 }; 2849 2850 2851 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2852 2853 #undef __FUNCT__ 2854 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2855 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2856 { 2857 PetscInt m,rstart,cstart,cend; 2858 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2859 const PetscInt *JJ =0; 2860 PetscScalar *values=0; 2861 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2862 PetscErrorCode ierr; 2863 2864 PetscFunctionBegin; 2865 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2866 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2867 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2868 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2869 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2870 m = B->rmap->n/bs; 2871 rstart = B->rmap->rstart/bs; 2872 cstart = B->cmap->rstart/bs; 2873 cend = B->cmap->rend/bs; 2874 2875 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2876 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2877 for (i=0; i<m; i++) { 2878 nz = ii[i+1] - ii[i]; 2879 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2880 nz_max = PetscMax(nz_max,nz); 2881 JJ = jj + ii[i]; 2882 for (j=0; j<nz; j++) { 2883 if (*JJ >= cstart) break; 2884 JJ++; 2885 } 2886 d = 0; 2887 for (; j<nz; j++) { 2888 if (*JJ++ >= cend) break; 2889 d++; 2890 } 2891 d_nnz[i] = d; 2892 o_nnz[i] = nz - d; 2893 } 2894 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2895 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2896 2897 values = (PetscScalar*)V; 2898 if (!values) { 2899 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2900 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2901 } 2902 for (i=0; i<m; i++) { 2903 PetscInt row = i + rstart; 2904 PetscInt ncols = ii[i+1] - ii[i]; 2905 const PetscInt *icols = jj + ii[i]; 2906 if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2907 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2908 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2909 } else { /* block ordering does not match so we can only insert one block at a time. */ 2910 PetscInt j; 2911 for (j=0; j<ncols; j++) { 2912 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2913 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2914 } 2915 } 2916 } 2917 2918 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2919 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2920 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2921 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2927 /*@C 2928 MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2929 (the default parallel PETSc format). 2930 2931 Collective on MPI_Comm 2932 2933 Input Parameters: 2934 + B - the matrix 2935 . bs - the block size 2936 . i - the indices into j for the start of each local row (starts with zero) 2937 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2938 - v - optional values in the matrix 2939 2940 Level: developer 2941 2942 Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2943 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2944 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2945 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2946 block column and the second index is over columns within a block. 2947 2948 .keywords: matrix, aij, compressed row, sparse, parallel 2949 2950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2951 @*/ 2952 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2953 { 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2958 PetscValidType(B,1); 2959 PetscValidLogicalCollectiveInt(B,bs,2); 2960 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2966 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2967 { 2968 Mat_MPIBAIJ *b; 2969 PetscErrorCode ierr; 2970 PetscInt i; 2971 2972 PetscFunctionBegin; 2973 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2974 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2975 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2976 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2977 2978 if (d_nnz) { 2979 for (i=0; i<B->rmap->n/bs; i++) { 2980 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2981 } 2982 } 2983 if (o_nnz) { 2984 for (i=0; i<B->rmap->n/bs; i++) { 2985 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2986 } 2987 } 2988 2989 b = (Mat_MPIBAIJ*)B->data; 2990 b->bs2 = bs*bs; 2991 b->mbs = B->rmap->n/bs; 2992 b->nbs = B->cmap->n/bs; 2993 b->Mbs = B->rmap->N/bs; 2994 b->Nbs = B->cmap->N/bs; 2995 2996 for (i=0; i<=b->size; i++) { 2997 b->rangebs[i] = B->rmap->range[i]/bs; 2998 } 2999 b->rstartbs = B->rmap->rstart/bs; 3000 b->rendbs = B->rmap->rend/bs; 3001 b->cstartbs = B->cmap->rstart/bs; 3002 b->cendbs = B->cmap->rend/bs; 3003 3004 #if defined(PETSC_USE_CTABLE) 3005 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 3006 #else 3007 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 3008 #endif 3009 ierr = PetscFree(b->garray);CHKERRQ(ierr); 3010 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 3011 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 3012 3013 /* Because the B will have been resized we simply destroy it and create a new one each time */ 3014 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3015 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3016 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3017 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 3018 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3019 3020 if (!B->preallocated) { 3021 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3022 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3023 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 3024 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3025 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 3026 } 3027 3028 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3029 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 3030 B->preallocated = PETSC_TRUE; 3031 B->was_assembled = PETSC_FALSE; 3032 B->assembled = PETSC_FALSE; 3033 PetscFunctionReturn(0); 3034 } 3035 3036 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 3037 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 3038 3039 #undef __FUNCT__ 3040 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 3041 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 3042 { 3043 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 3044 PetscErrorCode ierr; 3045 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 3046 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 3047 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 3048 3049 PetscFunctionBegin; 3050 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 3051 ii[0] = 0; 3052 for (i=0; i<M; i++) { 3053 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 3054 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 3055 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 3056 /* remove one from count of matrix has diagonal */ 3057 for (j=id[i]; j<id[i+1]; j++) { 3058 if (jd[j] == i) {ii[i+1]--;break;} 3059 } 3060 } 3061 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 3062 cnt = 0; 3063 for (i=0; i<M; i++) { 3064 for (j=io[i]; j<io[i+1]; j++) { 3065 if (garray[jo[j]] > rstart) break; 3066 jj[cnt++] = garray[jo[j]]; 3067 } 3068 for (k=id[i]; k<id[i+1]; k++) { 3069 if (jd[k] != i) { 3070 jj[cnt++] = rstart + jd[k]; 3071 } 3072 } 3073 for (; j<io[i+1]; j++) { 3074 jj[cnt++] = garray[jo[j]]; 3075 } 3076 } 3077 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3082 3083 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 3084 3085 #undef __FUNCT__ 3086 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 3087 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 3088 { 3089 PetscErrorCode ierr; 3090 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3091 Mat B; 3092 Mat_MPIAIJ *b; 3093 3094 PetscFunctionBegin; 3095 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 3096 3097 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 3098 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 3099 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 3100 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3101 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3102 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 3103 b = (Mat_MPIAIJ*) B->data; 3104 3105 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 3106 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 3107 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 3108 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 3109 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 3110 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3111 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3112 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3113 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3114 if (reuse == MAT_INPLACE_MATRIX) { 3115 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 3116 } else { 3117 *newmat = B; 3118 } 3119 PetscFunctionReturn(0); 3120 } 3121 3122 /*MC 3123 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 3124 3125 Options Database Keys: 3126 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 3127 . -mat_block_size <bs> - set the blocksize used to store the matrix 3128 - -mat_use_hash_table <fact> 3129 3130 Level: beginner 3131 3132 .seealso: MatCreateMPIBAIJ 3133 M*/ 3134 3135 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 3136 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "MatCreate_MPIBAIJ" 3139 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3140 { 3141 Mat_MPIBAIJ *b; 3142 PetscErrorCode ierr; 3143 PetscBool flg = PETSC_FALSE; 3144 3145 PetscFunctionBegin; 3146 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3147 B->data = (void*)b; 3148 3149 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3150 B->assembled = PETSC_FALSE; 3151 3152 B->insertmode = NOT_SET_VALUES; 3153 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3154 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3155 3156 /* build local table of row and column ownerships */ 3157 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3158 3159 /* build cache for off array entries formed */ 3160 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 3161 3162 b->donotstash = PETSC_FALSE; 3163 b->colmap = NULL; 3164 b->garray = NULL; 3165 b->roworiented = PETSC_TRUE; 3166 3167 /* stuff used in block assembly */ 3168 b->barray = 0; 3169 3170 /* stuff used for matrix vector multiply */ 3171 b->lvec = 0; 3172 b->Mvctx = 0; 3173 3174 /* stuff for MatGetRow() */ 3175 b->rowindices = 0; 3176 b->rowvalues = 0; 3177 b->getrowactive = PETSC_FALSE; 3178 3179 /* hash table stuff */ 3180 b->ht = 0; 3181 b->hd = 0; 3182 b->ht_size = 0; 3183 b->ht_flag = PETSC_FALSE; 3184 b->ht_fact = 0; 3185 b->ht_total_ct = 0; 3186 b->ht_insert_ct = 0; 3187 3188 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 3189 b->ijonly = PETSC_FALSE; 3190 3191 3192 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3193 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3194 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3195 #if defined(PETSC_HAVE_HYPRE) 3196 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3197 #endif 3198 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3199 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3200 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3201 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3202 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3203 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3204 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3205 3206 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3207 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 3208 if (flg) { 3209 PetscReal fact = 1.39; 3210 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 3211 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3212 if (fact <= 1.0) fact = 1.39; 3213 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 3214 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3215 } 3216 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 /*MC 3221 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3222 3223 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3224 and MATMPIBAIJ otherwise. 3225 3226 Options Database Keys: 3227 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3228 3229 Level: beginner 3230 3231 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3232 M*/ 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3236 /*@C 3237 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3238 (block compressed row). For good matrix assembly performance 3239 the user should preallocate the matrix storage by setting the parameters 3240 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3241 performance can be increased by more than a factor of 50. 3242 3243 Collective on Mat 3244 3245 Input Parameters: 3246 + B - the matrix 3247 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3248 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3249 . d_nz - number of block nonzeros per block row in diagonal portion of local 3250 submatrix (same for all local rows) 3251 . d_nnz - array containing the number of block nonzeros in the various block rows 3252 of the in diagonal portion of the local (possibly different for each block 3253 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3254 set it even if it is zero. 3255 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3256 submatrix (same for all local rows). 3257 - o_nnz - array containing the number of nonzeros in the various block rows of the 3258 off-diagonal portion of the local submatrix (possibly different for 3259 each block row) or NULL. 3260 3261 If the *_nnz parameter is given then the *_nz parameter is ignored 3262 3263 Options Database Keys: 3264 + -mat_block_size - size of the blocks to use 3265 - -mat_use_hash_table <fact> 3266 3267 Notes: 3268 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3269 than it must be used on all processors that share the object for that argument. 3270 3271 Storage Information: 3272 For a square global matrix we define each processor's diagonal portion 3273 to be its local rows and the corresponding columns (a square submatrix); 3274 each processor's off-diagonal portion encompasses the remainder of the 3275 local matrix (a rectangular submatrix). 3276 3277 The user can specify preallocated storage for the diagonal part of 3278 the local submatrix with either d_nz or d_nnz (not both). Set 3279 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3280 memory allocation. Likewise, specify preallocated storage for the 3281 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3282 3283 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3284 the figure below we depict these three local rows and all columns (0-11). 3285 3286 .vb 3287 0 1 2 3 4 5 6 7 8 9 10 11 3288 -------------------------- 3289 row 3 |o o o d d d o o o o o o 3290 row 4 |o o o d d d o o o o o o 3291 row 5 |o o o d d d o o o o o o 3292 -------------------------- 3293 .ve 3294 3295 Thus, any entries in the d locations are stored in the d (diagonal) 3296 submatrix, and any entries in the o locations are stored in the 3297 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3298 stored simply in the MATSEQBAIJ format for compressed row storage. 3299 3300 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3301 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3302 In general, for PDE problems in which most nonzeros are near the diagonal, 3303 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3304 or you will get TERRIBLE performance; see the users' manual chapter on 3305 matrices. 3306 3307 You can call MatGetInfo() to get information on how effective the preallocation was; 3308 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3309 You can also run with the option -info and look for messages with the string 3310 malloc in them to see if additional memory allocation was needed. 3311 3312 Level: intermediate 3313 3314 .keywords: matrix, block, aij, compressed row, sparse, parallel 3315 3316 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3317 @*/ 3318 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3319 { 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3324 PetscValidType(B,1); 3325 PetscValidLogicalCollectiveInt(B,bs,2); 3326 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatCreateBAIJ" 3332 /*@C 3333 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3334 (block compressed row). For good matrix assembly performance 3335 the user should preallocate the matrix storage by setting the parameters 3336 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3337 performance can be increased by more than a factor of 50. 3338 3339 Collective on MPI_Comm 3340 3341 Input Parameters: 3342 + comm - MPI communicator 3343 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3344 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3345 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3346 This value should be the same as the local size used in creating the 3347 y vector for the matrix-vector product y = Ax. 3348 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3349 This value should be the same as the local size used in creating the 3350 x vector for the matrix-vector product y = Ax. 3351 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3352 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3353 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3354 submatrix (same for all local rows) 3355 . d_nnz - array containing the number of nonzero blocks in the various block rows 3356 of the in diagonal portion of the local (possibly different for each block 3357 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3358 and set it even if it is zero. 3359 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3360 submatrix (same for all local rows). 3361 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3362 off-diagonal portion of the local submatrix (possibly different for 3363 each block row) or NULL. 3364 3365 Output Parameter: 3366 . A - the matrix 3367 3368 Options Database Keys: 3369 + -mat_block_size - size of the blocks to use 3370 - -mat_use_hash_table <fact> 3371 3372 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3373 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3374 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3375 3376 Notes: 3377 If the *_nnz parameter is given then the *_nz parameter is ignored 3378 3379 A nonzero block is any block that as 1 or more nonzeros in it 3380 3381 The user MUST specify either the local or global matrix dimensions 3382 (possibly both). 3383 3384 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3385 than it must be used on all processors that share the object for that argument. 3386 3387 Storage Information: 3388 For a square global matrix we define each processor's diagonal portion 3389 to be its local rows and the corresponding columns (a square submatrix); 3390 each processor's off-diagonal portion encompasses the remainder of the 3391 local matrix (a rectangular submatrix). 3392 3393 The user can specify preallocated storage for the diagonal part of 3394 the local submatrix with either d_nz or d_nnz (not both). Set 3395 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3396 memory allocation. Likewise, specify preallocated storage for the 3397 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3398 3399 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3400 the figure below we depict these three local rows and all columns (0-11). 3401 3402 .vb 3403 0 1 2 3 4 5 6 7 8 9 10 11 3404 -------------------------- 3405 row 3 |o o o d d d o o o o o o 3406 row 4 |o o o d d d o o o o o o 3407 row 5 |o o o d d d o o o o o o 3408 -------------------------- 3409 .ve 3410 3411 Thus, any entries in the d locations are stored in the d (diagonal) 3412 submatrix, and any entries in the o locations are stored in the 3413 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3414 stored simply in the MATSEQBAIJ format for compressed row storage. 3415 3416 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3417 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3418 In general, for PDE problems in which most nonzeros are near the diagonal, 3419 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3420 or you will get TERRIBLE performance; see the users' manual chapter on 3421 matrices. 3422 3423 Level: intermediate 3424 3425 .keywords: matrix, block, aij, compressed row, sparse, parallel 3426 3427 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3428 @*/ 3429 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3430 { 3431 PetscErrorCode ierr; 3432 PetscMPIInt size; 3433 3434 PetscFunctionBegin; 3435 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3436 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3437 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3438 if (size > 1) { 3439 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3440 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3441 } else { 3442 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3443 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3444 } 3445 PetscFunctionReturn(0); 3446 } 3447 3448 #undef __FUNCT__ 3449 #define __FUNCT__ "MatDuplicate_MPIBAIJ" 3450 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3451 { 3452 Mat mat; 3453 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3454 PetscErrorCode ierr; 3455 PetscInt len=0; 3456 3457 PetscFunctionBegin; 3458 *newmat = 0; 3459 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3460 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3461 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3462 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3463 3464 mat->factortype = matin->factortype; 3465 mat->preallocated = PETSC_TRUE; 3466 mat->assembled = PETSC_TRUE; 3467 mat->insertmode = NOT_SET_VALUES; 3468 3469 a = (Mat_MPIBAIJ*)mat->data; 3470 mat->rmap->bs = matin->rmap->bs; 3471 a->bs2 = oldmat->bs2; 3472 a->mbs = oldmat->mbs; 3473 a->nbs = oldmat->nbs; 3474 a->Mbs = oldmat->Mbs; 3475 a->Nbs = oldmat->Nbs; 3476 3477 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3478 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3479 3480 a->size = oldmat->size; 3481 a->rank = oldmat->rank; 3482 a->donotstash = oldmat->donotstash; 3483 a->roworiented = oldmat->roworiented; 3484 a->rowindices = 0; 3485 a->rowvalues = 0; 3486 a->getrowactive = PETSC_FALSE; 3487 a->barray = 0; 3488 a->rstartbs = oldmat->rstartbs; 3489 a->rendbs = oldmat->rendbs; 3490 a->cstartbs = oldmat->cstartbs; 3491 a->cendbs = oldmat->cendbs; 3492 3493 /* hash table stuff */ 3494 a->ht = 0; 3495 a->hd = 0; 3496 a->ht_size = 0; 3497 a->ht_flag = oldmat->ht_flag; 3498 a->ht_fact = oldmat->ht_fact; 3499 a->ht_total_ct = 0; 3500 a->ht_insert_ct = 0; 3501 3502 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 3503 if (oldmat->colmap) { 3504 #if defined(PETSC_USE_CTABLE) 3505 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3506 #else 3507 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3508 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3509 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3510 #endif 3511 } else a->colmap = 0; 3512 3513 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3514 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3515 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3516 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 3517 } else a->garray = 0; 3518 3519 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3520 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3521 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3522 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3523 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3524 3525 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3526 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3527 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3528 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3529 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3530 *newmat = mat; 3531 PetscFunctionReturn(0); 3532 } 3533 3534 #undef __FUNCT__ 3535 #define __FUNCT__ "MatLoad_MPIBAIJ" 3536 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 3537 { 3538 PetscErrorCode ierr; 3539 int fd; 3540 PetscInt i,nz,j,rstart,rend; 3541 PetscScalar *vals,*buf; 3542 MPI_Comm comm; 3543 MPI_Status status; 3544 PetscMPIInt rank,size,maxnz; 3545 PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 3546 PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 3547 PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 3548 PetscMPIInt tag = ((PetscObject)viewer)->tag; 3549 PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 3550 PetscInt dcount,kmax,k,nzcount,tmp,mend; 3551 3552 PetscFunctionBegin; 3553 /* force binary viewer to load .info file if it has not yet done so */ 3554 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3555 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3556 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 3557 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3559 if (bs < 0) bs = 1; 3560 3561 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3562 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3563 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3564 if (!rank) { 3565 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3566 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3567 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3568 } 3569 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3570 M = header[1]; N = header[2]; 3571 3572 /* If global sizes are set, check if they are consistent with that given in the file */ 3573 if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M); 3574 if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N); 3575 3576 if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 3577 3578 /* 3579 This code adds extra rows to make sure the number of rows is 3580 divisible by the blocksize 3581 */ 3582 Mbs = M/bs; 3583 extra_rows = bs - M + bs*Mbs; 3584 if (extra_rows == bs) extra_rows = 0; 3585 else Mbs++; 3586 if (extra_rows && !rank) { 3587 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3588 } 3589 3590 /* determine ownership of all rows */ 3591 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 3592 mbs = Mbs/size + ((Mbs % size) > rank); 3593 m = mbs*bs; 3594 } else { /* User set */ 3595 m = newmat->rmap->n; 3596 mbs = m/bs; 3597 } 3598 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 3599 ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3600 3601 /* process 0 needs enough room for process with most rows */ 3602 if (!rank) { 3603 mmax = rowners[1]; 3604 for (i=2; i<=size; i++) { 3605 mmax = PetscMax(mmax,rowners[i]); 3606 } 3607 mmax*=bs; 3608 } else mmax = -1; /* unused, but compiler warns anyway */ 3609 3610 rowners[0] = 0; 3611 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 3612 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 3613 rstart = rowners[rank]; 3614 rend = rowners[rank+1]; 3615 3616 /* distribute row lengths to all processors */ 3617 ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 3618 if (!rank) { 3619 mend = m; 3620 if (size == 1) mend = mend - extra_rows; 3621 ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 3622 for (j=mend; j<m; j++) locrowlens[j] = 1; 3623 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3624 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3625 for (j=0; j<m; j++) { 3626 procsnz[0] += locrowlens[j]; 3627 } 3628 for (i=1; i<size; i++) { 3629 mend = browners[i+1] - browners[i]; 3630 if (i == size-1) mend = mend - extra_rows; 3631 ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 3632 for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 3633 /* calculate the number of nonzeros on each processor */ 3634 for (j=0; j<browners[i+1]-browners[i]; j++) { 3635 procsnz[i] += rowlengths[j]; 3636 } 3637 ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3638 } 3639 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3640 } else { 3641 ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3642 } 3643 3644 if (!rank) { 3645 /* determine max buffer needed and allocate it */ 3646 maxnz = procsnz[0]; 3647 for (i=1; i<size; i++) { 3648 maxnz = PetscMax(maxnz,procsnz[i]); 3649 } 3650 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3651 3652 /* read in my part of the matrix column indices */ 3653 nz = procsnz[0]; 3654 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3655 mycols = ibuf; 3656 if (size == 1) nz -= extra_rows; 3657 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3658 if (size == 1) { 3659 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 3660 } 3661 3662 /* read in every ones (except the last) and ship off */ 3663 for (i=1; i<size-1; i++) { 3664 nz = procsnz[i]; 3665 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3666 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3667 } 3668 /* read in the stuff for the last proc */ 3669 if (size != 1) { 3670 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 3671 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3672 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 3673 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 3674 } 3675 ierr = PetscFree(cols);CHKERRQ(ierr); 3676 } else { 3677 /* determine buffer space needed for message */ 3678 nz = 0; 3679 for (i=0; i<m; i++) { 3680 nz += locrowlens[i]; 3681 } 3682 ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 3683 mycols = ibuf; 3684 /* receive message of column indices*/ 3685 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 3686 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 3687 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 3688 } 3689 3690 /* loop over local rows, determining number of off diagonal entries */ 3691 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 3692 ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 3693 rowcount = 0; nzcount = 0; 3694 for (i=0; i<mbs; i++) { 3695 dcount = 0; 3696 odcount = 0; 3697 for (j=0; j<bs; j++) { 3698 kmax = locrowlens[rowcount]; 3699 for (k=0; k<kmax; k++) { 3700 tmp = mycols[nzcount++]/bs; 3701 if (!mask[tmp]) { 3702 mask[tmp] = 1; 3703 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 3704 else masked1[dcount++] = tmp; 3705 } 3706 } 3707 rowcount++; 3708 } 3709 3710 dlens[i] = dcount; 3711 odlens[i] = odcount; 3712 3713 /* zero out the mask elements we set */ 3714 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 3715 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 3716 } 3717 3718 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3719 ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 3720 3721 if (!rank) { 3722 ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 3723 /* read in my part of the matrix numerical values */ 3724 nz = procsnz[0]; 3725 vals = buf; 3726 mycols = ibuf; 3727 if (size == 1) nz -= extra_rows; 3728 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3729 if (size == 1) { 3730 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 3731 } 3732 3733 /* insert into matrix */ 3734 jj = rstart*bs; 3735 for (i=0; i<m; i++) { 3736 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3737 mycols += locrowlens[i]; 3738 vals += locrowlens[i]; 3739 jj++; 3740 } 3741 /* read in other processors (except the last one) and ship out */ 3742 for (i=1; i<size-1; i++) { 3743 nz = procsnz[i]; 3744 vals = buf; 3745 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3746 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3747 } 3748 /* the last proc */ 3749 if (size != 1) { 3750 nz = procsnz[i] - extra_rows; 3751 vals = buf; 3752 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3753 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3754 ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3755 } 3756 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3757 } else { 3758 /* receive numeric values */ 3759 ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 3760 3761 /* receive message of values*/ 3762 vals = buf; 3763 mycols = ibuf; 3764 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 3765 3766 /* insert into matrix */ 3767 jj = rstart*bs; 3768 for (i=0; i<m; i++) { 3769 ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 3770 mycols += locrowlens[i]; 3771 vals += locrowlens[i]; 3772 jj++; 3773 } 3774 } 3775 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 3776 ierr = PetscFree(buf);CHKERRQ(ierr); 3777 ierr = PetscFree(ibuf);CHKERRQ(ierr); 3778 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 3779 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 3780 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 3781 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3783 PetscFunctionReturn(0); 3784 } 3785 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3788 /*@ 3789 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3790 3791 Input Parameters: 3792 . mat - the matrix 3793 . fact - factor 3794 3795 Not Collective, each process can use a different factor 3796 3797 Level: advanced 3798 3799 Notes: 3800 This can also be set by the command line option: -mat_use_hash_table <fact> 3801 3802 .keywords: matrix, hashtable, factor, HT 3803 3804 .seealso: MatSetOption() 3805 @*/ 3806 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3807 { 3808 PetscErrorCode ierr; 3809 3810 PetscFunctionBegin; 3811 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3812 PetscFunctionReturn(0); 3813 } 3814 3815 #undef __FUNCT__ 3816 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 3817 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3818 { 3819 Mat_MPIBAIJ *baij; 3820 3821 PetscFunctionBegin; 3822 baij = (Mat_MPIBAIJ*)mat->data; 3823 baij->ht_fact = fact; 3824 PetscFunctionReturn(0); 3825 } 3826 3827 #undef __FUNCT__ 3828 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 3829 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3830 { 3831 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3832 3833 PetscFunctionBegin; 3834 if (Ad) *Ad = a->A; 3835 if (Ao) *Ao = a->B; 3836 if (colmap) *colmap = a->garray; 3837 PetscFunctionReturn(0); 3838 } 3839 3840 /* 3841 Special version for direct calls from Fortran (to eliminate two function call overheads 3842 */ 3843 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3844 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3845 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3846 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3847 #endif 3848 3849 #undef __FUNCT__ 3850 #define __FUNCT__ "matmpibiajsetvaluesblocked" 3851 /*@C 3852 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3853 3854 Collective on Mat 3855 3856 Input Parameters: 3857 + mat - the matrix 3858 . min - number of input rows 3859 . im - input rows 3860 . nin - number of input columns 3861 . in - input columns 3862 . v - numerical values input 3863 - addvin - INSERT_VALUES or ADD_VALUES 3864 3865 Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3866 3867 Level: advanced 3868 3869 .seealso: MatSetValuesBlocked() 3870 @*/ 3871 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3872 { 3873 /* convert input arguments to C version */ 3874 Mat mat = *matin; 3875 PetscInt m = *min, n = *nin; 3876 InsertMode addv = *addvin; 3877 3878 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3879 const MatScalar *value; 3880 MatScalar *barray = baij->barray; 3881 PetscBool roworiented = baij->roworiented; 3882 PetscErrorCode ierr; 3883 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3884 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3885 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3886 3887 PetscFunctionBegin; 3888 /* tasks normally handled by MatSetValuesBlocked() */ 3889 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3890 #if defined(PETSC_USE_DEBUG) 3891 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3892 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3893 #endif 3894 if (mat->assembled) { 3895 mat->was_assembled = PETSC_TRUE; 3896 mat->assembled = PETSC_FALSE; 3897 } 3898 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3899 3900 3901 if (!barray) { 3902 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3903 baij->barray = barray; 3904 } 3905 3906 if (roworiented) stepval = (n-1)*bs; 3907 else stepval = (m-1)*bs; 3908 3909 for (i=0; i<m; i++) { 3910 if (im[i] < 0) continue; 3911 #if defined(PETSC_USE_DEBUG) 3912 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3913 #endif 3914 if (im[i] >= rstart && im[i] < rend) { 3915 row = im[i] - rstart; 3916 for (j=0; j<n; j++) { 3917 /* If NumCol = 1 then a copy is not required */ 3918 if ((roworiented) && (n == 1)) { 3919 barray = (MatScalar*)v + i*bs2; 3920 } else if ((!roworiented) && (m == 1)) { 3921 barray = (MatScalar*)v + j*bs2; 3922 } else { /* Here a copy is required */ 3923 if (roworiented) { 3924 value = v + i*(stepval+bs)*bs + j*bs; 3925 } else { 3926 value = v + j*(stepval+bs)*bs + i*bs; 3927 } 3928 for (ii=0; ii<bs; ii++,value+=stepval) { 3929 for (jj=0; jj<bs; jj++) { 3930 *barray++ = *value++; 3931 } 3932 } 3933 barray -=bs2; 3934 } 3935 3936 if (in[j] >= cstart && in[j] < cend) { 3937 col = in[j] - cstart; 3938 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3939 } else if (in[j] < 0) continue; 3940 #if defined(PETSC_USE_DEBUG) 3941 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3942 #endif 3943 else { 3944 if (mat->was_assembled) { 3945 if (!baij->colmap) { 3946 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3947 } 3948 3949 #if defined(PETSC_USE_DEBUG) 3950 #if defined(PETSC_USE_CTABLE) 3951 { PetscInt data; 3952 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3953 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3954 } 3955 #else 3956 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3957 #endif 3958 #endif 3959 #if defined(PETSC_USE_CTABLE) 3960 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3961 col = (col - 1)/bs; 3962 #else 3963 col = (baij->colmap[in[j]] - 1)/bs; 3964 #endif 3965 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3966 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3967 col = in[j]; 3968 } 3969 } else col = in[j]; 3970 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3971 } 3972 } 3973 } else { 3974 if (!baij->donotstash) { 3975 if (roworiented) { 3976 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3977 } else { 3978 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3979 } 3980 } 3981 } 3982 } 3983 3984 /* task normally handled by MatSetValuesBlocked() */ 3985 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3986 PetscFunctionReturn(0); 3987 } 3988 3989 #undef __FUNCT__ 3990 #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3991 /*@ 3992 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3993 CSR format the local rows. 3994 3995 Collective on MPI_Comm 3996 3997 Input Parameters: 3998 + comm - MPI communicator 3999 . bs - the block size, only a block size of 1 is supported 4000 . m - number of local rows (Cannot be PETSC_DECIDE) 4001 . n - This value should be the same as the local size used in creating the 4002 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4003 calculated if N is given) For square matrices n is almost always m. 4004 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4005 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4006 . i - row indices 4007 . j - column indices 4008 - a - matrix values 4009 4010 Output Parameter: 4011 . mat - the matrix 4012 4013 Level: intermediate 4014 4015 Notes: 4016 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4017 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4018 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4019 4020 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 4021 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 4022 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 4023 with column-major ordering within blocks. 4024 4025 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4026 4027 .keywords: matrix, aij, compressed row, sparse, parallel 4028 4029 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4030 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4031 @*/ 4032 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4033 { 4034 PetscErrorCode ierr; 4035 4036 PetscFunctionBegin; 4037 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4038 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4039 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4040 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4041 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 4042 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4043 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 4044 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4045 PetscFunctionReturn(0); 4046 } 4047 4048 #undef __FUNCT__ 4049 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 4050 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4051 { 4052 PetscErrorCode ierr; 4053 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 4054 PetscInt *indx; 4055 PetscScalar *values; 4056 4057 PetscFunctionBegin; 4058 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4059 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4060 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 4061 PetscInt *dnz,*onz,sum,mbs,Nbs; 4062 PetscInt *bindx,rmax=a->rmax,j; 4063 4064 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4065 mbs = m/bs; Nbs = N/cbs; 4066 if (n == PETSC_DECIDE) { 4067 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 4068 } 4069 /* Check sum(n) = Nbs */ 4070 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4071 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 4072 4073 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4074 rstart -= mbs; 4075 4076 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 4077 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 4078 for (i=0; i<mbs; i++) { 4079 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 4080 nnz = nnz/bs; 4081 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 4082 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 4083 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 4084 } 4085 ierr = PetscFree(bindx);CHKERRQ(ierr); 4086 4087 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4088 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4089 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4090 ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 4091 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 4092 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4093 } 4094 4095 /* numeric phase */ 4096 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4097 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4098 4099 for (i=0; i<m; i++) { 4100 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4101 Ii = i + rstart; 4102 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4103 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4104 } 4105 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4106 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4107 PetscFunctionReturn(0); 4108 } 4109