1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 92 #endif 93 PetscFunctionReturn(0); 94 } 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 97 { \ 98 brow = row/bs; \ 99 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 100 rmax = aimax[brow]; nrow = ailen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto a_noinsert; \ 116 } \ 117 } \ 118 if (a->nonew == 1) goto a_noinsert; \ 119 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 120 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 124 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 125 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 a_noinsert:; \ 129 ailen[brow] = nrow; \ 130 } 131 132 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 133 { \ 134 brow = row/bs; \ 135 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 136 rmax = bimax[brow]; nrow = bilen[brow]; \ 137 bcol = col/bs; \ 138 ridx = row % bs; cidx = col % bs; \ 139 low = 0; high = nrow; \ 140 while (high-low > 3) { \ 141 t = (low+high)/2; \ 142 if (rp[t] > bcol) high = t; \ 143 else low = t; \ 144 } \ 145 for (_i=low; _i<high; _i++) { \ 146 if (rp[_i] > bcol) break; \ 147 if (rp[_i] == bcol) { \ 148 bap = ap + bs2*_i + bs*cidx + ridx; \ 149 if (addv == ADD_VALUES) *bap += value; \ 150 else *bap = value; \ 151 goto b_noinsert; \ 152 } \ 153 } \ 154 if (b->nonew == 1) goto b_noinsert; \ 155 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 156 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 160 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 161 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 b_noinsert:; \ 165 bilen[brow] = nrow; \ 166 } 167 168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 169 { 170 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 171 MatScalar value; 172 PetscBool roworiented = baij->roworiented; 173 PetscErrorCode ierr; 174 PetscInt i,j,row,col; 175 PetscInt rstart_orig=mat->rmap->rstart; 176 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 177 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 178 179 /* Some Variables required in the macro */ 180 Mat A = baij->A; 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 182 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 183 MatScalar *aa =a->a; 184 185 Mat B = baij->B; 186 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 187 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 188 MatScalar *ba =b->a; 189 190 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 191 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 192 MatScalar *ap,*bap; 193 194 PetscFunctionBegin; 195 for (i=0; i<m; i++) { 196 if (im[i] < 0) continue; 197 #if defined(PETSC_USE_DEBUG) 198 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 199 #endif 200 if (im[i] >= rstart_orig && im[i] < rend_orig) { 201 row = im[i] - rstart_orig; 202 for (j=0; j<n; j++) { 203 if (in[j] >= cstart_orig && in[j] < cend_orig) { 204 col = in[j] - cstart_orig; 205 if (roworiented) value = v[i*n+j]; 206 else value = v[i+j*m]; 207 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 208 } else if (in[j] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 211 #endif 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 238 } 239 } 240 } else { 241 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 246 } else { 247 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscErrorCode ierr; 261 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 262 PetscBool roworiented=a->roworiented; 263 const PetscScalar *value = v; 264 MatScalar *ap,*aa = a->a,*bap; 265 266 PetscFunctionBegin; 267 rp = aj + ai[row]; 268 ap = aa + bs2*ai[row]; 269 rmax = imax[row]; 270 nrow = ailen[row]; 271 value = v; 272 low = 0; 273 high = nrow; 274 while (high-low > 7) { 275 t = (low+high)/2; 276 if (rp[t] > col) high = t; 277 else low = t; 278 } 279 for (i=low; i<high; i++) { 280 if (rp[i] > col) break; 281 if (rp[i] == col) { 282 bap = ap + bs2*i; 283 if (roworiented) { 284 if (is == ADD_VALUES) { 285 for (ii=0; ii<bs; ii++) { 286 for (jj=ii; jj<bs2; jj+=bs) { 287 bap[jj] += *value++; 288 } 289 } 290 } else { 291 for (ii=0; ii<bs; ii++) { 292 for (jj=ii; jj<bs2; jj+=bs) { 293 bap[jj] = *value++; 294 } 295 } 296 } 297 } else { 298 if (is == ADD_VALUES) { 299 for (ii=0; ii<bs; ii++,value+=bs) { 300 for (jj=0; jj<bs; jj++) { 301 bap[jj] += value[jj]; 302 } 303 bap += bs; 304 } 305 } else { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] = value[jj]; 309 } 310 bap += bs; 311 } 312 } 313 } 314 goto noinsert2; 315 } 316 } 317 if (nonew == 1) goto noinsert2; 318 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 319 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 320 N = nrow++ - 1; high++; 321 /* shift up all the later entries in this row */ 322 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 323 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 324 rp[i] = col; 325 bap = ap + bs2*i; 326 if (roworiented) { 327 for (ii=0; ii<bs; ii++) { 328 for (jj=ii; jj<bs2; jj+=bs) { 329 bap[jj] = *value++; 330 } 331 } 332 } else { 333 for (ii=0; ii<bs; ii++) { 334 for (jj=0; jj<bs; jj++) { 335 *bap++ = *value++; 336 } 337 } 338 } 339 noinsert2:; 340 ailen[row] = nrow; 341 PetscFunctionReturn(0); 342 } 343 344 /* 345 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 346 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 347 */ 348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 349 { 350 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 351 const PetscScalar *value; 352 MatScalar *barray = baij->barray; 353 PetscBool roworiented = baij->roworiented; 354 PetscErrorCode ierr; 355 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 356 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 357 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 358 359 PetscFunctionBegin; 360 if (!barray) { 361 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 362 baij->barray = barray; 363 } 364 365 if (roworiented) stepval = (n-1)*bs; 366 else stepval = (m-1)*bs; 367 368 for (i=0; i<m; i++) { 369 if (im[i] < 0) continue; 370 #if defined(PETSC_USE_DEBUG) 371 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 372 #endif 373 if (im[i] >= rstart && im[i] < rend) { 374 row = im[i] - rstart; 375 for (j=0; j<n; j++) { 376 /* If NumCol = 1 then a copy is not required */ 377 if ((roworiented) && (n == 1)) { 378 barray = (MatScalar*)v + i*bs2; 379 } else if ((!roworiented) && (m == 1)) { 380 barray = (MatScalar*)v + j*bs2; 381 } else { /* Here a copy is required */ 382 if (roworiented) { 383 value = v + (i*(stepval+bs) + j)*bs; 384 } else { 385 value = v + (j*(stepval+bs) + i)*bs; 386 } 387 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 388 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 389 barray += bs; 390 } 391 barray -= bs2; 392 } 393 394 if (in[j] >= cstart && in[j] < cend) { 395 col = in[j] - cstart; 396 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 397 } else if (in[j] < 0) continue; 398 #if defined(PETSC_USE_DEBUG) 399 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 400 #endif 401 else { 402 if (mat->was_assembled) { 403 if (!baij->colmap) { 404 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 405 } 406 407 #if defined(PETSC_USE_DEBUG) 408 #if defined(PETSC_USE_CTABLE) 409 { PetscInt data; 410 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 411 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 412 } 413 #else 414 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 415 #endif 416 #endif 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 419 col = (col - 1)/bs; 420 #else 421 col = (baij->colmap[in[j]] - 1)/bs; 422 #endif 423 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 424 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 425 col = in[j]; 426 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 427 } else col = in[j]; 428 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 429 } 430 } 431 } else { 432 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 433 if (!baij->donotstash) { 434 if (roworiented) { 435 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 436 } else { 437 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 438 } 439 } 440 } 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #define HASH_KEY 0.6180339887 446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 450 { 451 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 452 PetscBool roworiented = baij->roworiented; 453 PetscErrorCode ierr; 454 PetscInt i,j,row,col; 455 PetscInt rstart_orig=mat->rmap->rstart; 456 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 457 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 458 PetscReal tmp; 459 MatScalar **HD = baij->hd,value; 460 #if defined(PETSC_USE_DEBUG) 461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 462 #endif 463 464 PetscFunctionBegin; 465 for (i=0; i<m; i++) { 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 468 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 469 #endif 470 row = im[i]; 471 if (row >= rstart_orig && row < rend_orig) { 472 for (j=0; j<n; j++) { 473 col = in[j]; 474 if (roworiented) value = v[i*n+j]; 475 else value = v[i+j*m]; 476 /* Look up PetscInto the Hash Table */ 477 key = (row/bs)*Nbs+(col/bs)+1; 478 h1 = HASH(size,key,tmp); 479 480 481 idx = h1; 482 #if defined(PETSC_USE_DEBUG) 483 insert_ct++; 484 total_ct++; 485 if (HT[idx] != key) { 486 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487 if (idx == size) { 488 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490 } 491 } 492 #else 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #endif 501 /* A HASH table entry is found, so insert the values at the correct address */ 502 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 503 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 504 } 505 } else if (!baij->donotstash) { 506 if (roworiented) { 507 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 508 } else { 509 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 510 } 511 } 512 } 513 #if defined(PETSC_USE_DEBUG) 514 baij->ht_total_ct += total_ct; 515 baij->ht_insert_ct += insert_ct; 516 #endif 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 521 { 522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 523 PetscBool roworiented = baij->roworiented; 524 PetscErrorCode ierr; 525 PetscInt i,j,ii,jj,row,col; 526 PetscInt rstart=baij->rstartbs; 527 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 528 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 529 PetscReal tmp; 530 MatScalar **HD = baij->hd,*baij_a; 531 const PetscScalar *v_t,*value; 532 #if defined(PETSC_USE_DEBUG) 533 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 534 #endif 535 536 PetscFunctionBegin; 537 if (roworiented) stepval = (n-1)*bs; 538 else stepval = (m-1)*bs; 539 540 for (i=0; i<m; i++) { 541 #if defined(PETSC_USE_DEBUG) 542 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 543 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 544 #endif 545 row = im[i]; 546 v_t = v + i*nbs2; 547 if (row >= rstart && row < rend) { 548 for (j=0; j<n; j++) { 549 col = in[j]; 550 551 /* Look up into the Hash Table */ 552 key = row*Nbs+col+1; 553 h1 = HASH(size,key,tmp); 554 555 idx = h1; 556 #if defined(PETSC_USE_DEBUG) 557 total_ct++; 558 insert_ct++; 559 if (HT[idx] != key) { 560 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 561 if (idx == size) { 562 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 563 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 564 } 565 } 566 #else 567 if (HT[idx] != key) { 568 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 569 if (idx == size) { 570 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 571 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 572 } 573 } 574 #endif 575 baij_a = HD[idx]; 576 if (roworiented) { 577 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 578 /* value = v + (i*(stepval+bs)+j)*bs; */ 579 value = v_t; 580 v_t += bs; 581 if (addv == ADD_VALUES) { 582 for (ii=0; ii<bs; ii++,value+=stepval) { 583 for (jj=ii; jj<bs2; jj+=bs) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for (ii=0; ii<bs; ii++,value+=stepval) { 589 for (jj=ii; jj<bs2; jj+=bs) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } else { 595 value = v + j*(stepval+bs)*bs + i*bs; 596 if (addv == ADD_VALUES) { 597 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 598 for (jj=0; jj<bs; jj++) { 599 baij_a[jj] += *value++; 600 } 601 } 602 } else { 603 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 604 for (jj=0; jj<bs; jj++) { 605 baij_a[jj] = *value++; 606 } 607 } 608 } 609 } 610 } 611 } else { 612 if (!baij->donotstash) { 613 if (roworiented) { 614 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 615 } else { 616 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 617 } 618 } 619 } 620 } 621 #if defined(PETSC_USE_DEBUG) 622 baij->ht_total_ct += total_ct; 623 baij->ht_insert_ct += insert_ct; 624 #endif 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 670 { 671 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 672 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 673 PetscErrorCode ierr; 674 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 675 PetscReal sum = 0.0; 676 MatScalar *v; 677 678 PetscFunctionBegin; 679 if (baij->size == 1) { 680 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 681 } else { 682 if (type == NORM_FROBENIUS) { 683 v = amat->a; 684 nz = amat->nz*bs2; 685 for (i=0; i<nz; i++) { 686 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 687 } 688 v = bmat->a; 689 nz = bmat->nz*bs2; 690 for (i=0; i<nz; i++) { 691 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 692 } 693 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 694 *nrm = PetscSqrtReal(*nrm); 695 } else if (type == NORM_1) { /* max column sum */ 696 PetscReal *tmp,*tmp2; 697 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 698 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 699 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 700 v = amat->a; jj = amat->j; 701 for (i=0; i<amat->nz; i++) { 702 for (j=0; j<bs; j++) { 703 col = bs*(cstart + *jj) + j; /* column index */ 704 for (row=0; row<bs; row++) { 705 tmp[col] += PetscAbsScalar(*v); v++; 706 } 707 } 708 jj++; 709 } 710 v = bmat->a; jj = bmat->j; 711 for (i=0; i<bmat->nz; i++) { 712 for (j=0; j<bs; j++) { 713 col = bs*garray[*jj] + j; 714 for (row=0; row<bs; row++) { 715 tmp[col] += PetscAbsScalar(*v); v++; 716 } 717 } 718 jj++; 719 } 720 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 721 *nrm = 0.0; 722 for (j=0; j<mat->cmap->N; j++) { 723 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 724 } 725 ierr = PetscFree(tmp);CHKERRQ(ierr); 726 ierr = PetscFree(tmp2);CHKERRQ(ierr); 727 } else if (type == NORM_INFINITY) { /* max row sum */ 728 PetscReal *sums; 729 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 730 sum = 0.0; 731 for (j=0; j<amat->mbs; j++) { 732 for (row=0; row<bs; row++) sums[row] = 0.0; 733 v = amat->a + bs2*amat->i[j]; 734 nz = amat->i[j+1]-amat->i[j]; 735 for (i=0; i<nz; i++) { 736 for (col=0; col<bs; col++) { 737 for (row=0; row<bs; row++) { 738 sums[row] += PetscAbsScalar(*v); v++; 739 } 740 } 741 } 742 v = bmat->a + bs2*bmat->i[j]; 743 nz = bmat->i[j+1]-bmat->i[j]; 744 for (i=0; i<nz; i++) { 745 for (col=0; col<bs; col++) { 746 for (row=0; row<bs; row++) { 747 sums[row] += PetscAbsScalar(*v); v++; 748 } 749 } 750 } 751 for (row=0; row<bs; row++) { 752 if (sums[row] > sum) sum = sums[row]; 753 } 754 } 755 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 756 ierr = PetscFree(sums);CHKERRQ(ierr); 757 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 /* 763 Creates the hash table, and sets the table 764 This table is created only once. 765 If new entried need to be added to the matrix 766 then the hash table has to be destroyed and 767 recreated. 768 */ 769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 770 { 771 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772 Mat A = baij->A,B=baij->B; 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 774 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 775 PetscErrorCode ierr; 776 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 777 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 778 PetscInt *HT,key; 779 MatScalar **HD; 780 PetscReal tmp; 781 #if defined(PETSC_USE_INFO) 782 PetscInt ct=0,max=0; 783 #endif 784 785 PetscFunctionBegin; 786 if (baij->ht) PetscFunctionReturn(0); 787 788 baij->ht_size = (PetscInt)(factor*nz); 789 ht_size = baij->ht_size; 790 791 /* Allocate Memory for Hash Table */ 792 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 793 HD = baij->hd; 794 HT = baij->ht; 795 796 /* Loop Over A */ 797 for (i=0; i<a->mbs; i++) { 798 for (j=ai[i]; j<ai[i+1]; j++) { 799 row = i+rstart; 800 col = aj[j]+cstart; 801 802 key = row*Nbs + col + 1; 803 h1 = HASH(ht_size,key,tmp); 804 for (k=0; k<ht_size; k++) { 805 if (!HT[(h1+k)%ht_size]) { 806 HT[(h1+k)%ht_size] = key; 807 HD[(h1+k)%ht_size] = a->a + j*bs2; 808 break; 809 #if defined(PETSC_USE_INFO) 810 } else { 811 ct++; 812 #endif 813 } 814 } 815 #if defined(PETSC_USE_INFO) 816 if (k> max) max = k; 817 #endif 818 } 819 } 820 /* Loop Over B */ 821 for (i=0; i<b->mbs; i++) { 822 for (j=bi[i]; j<bi[i+1]; j++) { 823 row = i+rstart; 824 col = garray[bj[j]]; 825 key = row*Nbs + col + 1; 826 h1 = HASH(ht_size,key,tmp); 827 for (k=0; k<ht_size; k++) { 828 if (!HT[(h1+k)%ht_size]) { 829 HT[(h1+k)%ht_size] = key; 830 HD[(h1+k)%ht_size] = b->a + j*bs2; 831 break; 832 #if defined(PETSC_USE_INFO) 833 } else { 834 ct++; 835 #endif 836 } 837 } 838 #if defined(PETSC_USE_INFO) 839 if (k> max) max = k; 840 #endif 841 } 842 } 843 844 /* Print Summary */ 845 #if defined(PETSC_USE_INFO) 846 for (i=0,j=0; i<ht_size; i++) { 847 if (HT[i]) j++; 848 } 849 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 850 #endif 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 857 PetscErrorCode ierr; 858 PetscInt nstash,reallocs; 859 860 PetscFunctionBegin; 861 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 862 863 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 864 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 865 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 866 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 867 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 868 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 873 { 874 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 875 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 876 PetscErrorCode ierr; 877 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 878 PetscInt *row,*col; 879 PetscBool r1,r2,r3,other_disassembled; 880 MatScalar *val; 881 PetscMPIInt n; 882 883 PetscFunctionBegin; 884 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 885 if (!baij->donotstash && !mat->nooffprocentries) { 886 while (1) { 887 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 888 if (!flg) break; 889 890 for (i=0; i<n;) { 891 /* Now identify the consecutive vals belonging to the same row */ 892 for (j=i,rstart=row[j]; j<n; j++) { 893 if (row[j] != rstart) break; 894 } 895 if (j < n) ncols = j-i; 896 else ncols = n-i; 897 /* Now assemble all these values with a single function call */ 898 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 903 /* Now process the block-stash. Since the values are stashed column-oriented, 904 set the roworiented flag to column oriented, and after MatSetValues() 905 restore the original flags */ 906 r1 = baij->roworiented; 907 r2 = a->roworiented; 908 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 909 910 baij->roworiented = PETSC_FALSE; 911 a->roworiented = PETSC_FALSE; 912 913 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 914 while (1) { 915 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 916 if (!flg) break; 917 918 for (i=0; i<n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j=i,rstart=row[j]; j<n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j-i; 924 else ncols = n-i; 925 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 926 i = j; 927 } 928 } 929 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 930 931 baij->roworiented = r1; 932 a->roworiented = r2; 933 934 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 935 } 936 937 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 938 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 939 940 /* determine if any processor has disassembled, if so we must 941 also disassemble ourselfs, in order that we may reassemble. */ 942 /* 943 if nonzero structure of submatrix B cannot change then we know that 944 no processor disassembled thus we can skip this stuff 945 */ 946 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 947 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 948 if (mat->was_assembled && !other_disassembled) { 949 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 950 } 951 } 952 953 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 954 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 955 } 956 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 957 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 958 959 #if defined(PETSC_USE_INFO) 960 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 961 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 962 963 baij->ht_total_ct = 0; 964 baij->ht_insert_ct = 0; 965 } 966 #endif 967 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 968 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 969 970 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 971 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 972 } 973 974 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 975 976 baij->rowvalues = 0; 977 978 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 979 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 980 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 981 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 987 #include <petscdraw.h> 988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 989 { 990 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 991 PetscErrorCode ierr; 992 PetscMPIInt rank = baij->rank; 993 PetscInt bs = mat->rmap->bs; 994 PetscBool iascii,isdraw; 995 PetscViewer sviewer; 996 PetscViewerFormat format; 997 998 PetscFunctionBegin; 999 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1000 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1001 if (iascii) { 1002 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1003 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1004 MatInfo info; 1005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1006 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1007 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1008 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1009 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1010 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1012 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1014 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1015 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1016 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1017 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1020 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1023 PetscFunctionReturn(0); 1024 } 1025 } 1026 1027 if (isdraw) { 1028 PetscDraw draw; 1029 PetscBool isnull; 1030 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1031 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1032 if (isnull) PetscFunctionReturn(0); 1033 } 1034 1035 { 1036 /* assemble the entire matrix onto first processor. */ 1037 Mat A; 1038 Mat_SeqBAIJ *Aloc; 1039 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1040 MatScalar *a; 1041 const char *matname; 1042 1043 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1044 /* Perhaps this should be the type of mat? */ 1045 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1048 } else { 1049 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1050 } 1051 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1052 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1053 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1054 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1055 1056 /* copy over the A part */ 1057 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1058 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1059 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1060 1061 for (i=0; i<mbs; i++) { 1062 rvals[0] = bs*(baij->rstartbs + i); 1063 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1064 for (j=ai[i]; j<ai[i+1]; j++) { 1065 col = (baij->cstartbs+aj[j])*bs; 1066 for (k=0; k<bs; k++) { 1067 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068 col++; a += bs; 1069 } 1070 } 1071 } 1072 /* copy over the B part */ 1073 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1074 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1075 for (i=0; i<mbs; i++) { 1076 rvals[0] = bs*(baij->rstartbs + i); 1077 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1078 for (j=ai[i]; j<ai[i+1]; j++) { 1079 col = baij->garray[aj[j]]*bs; 1080 for (k=0; k<bs; k++) { 1081 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1082 col++; a += bs; 1083 } 1084 } 1085 } 1086 ierr = PetscFree(rvals);CHKERRQ(ierr); 1087 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1088 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1089 /* 1090 Everyone has to call to draw the matrix since the graphics waits are 1091 synchronized across all processors that share the PetscDraw object 1092 */ 1093 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1094 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1095 if (!rank) { 1096 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1097 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1098 } 1099 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A);CHKERRQ(ierr); 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1107 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1108 { 1109 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1110 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1111 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1112 const PetscInt *garray = aij->garray; 1113 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1114 PetscInt *rowlens,*colidxs; 1115 PetscScalar *matvals; 1116 PetscErrorCode ierr; 1117 1118 PetscFunctionBegin; 1119 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1120 1121 M = mat->rmap->N; 1122 N = mat->cmap->N; 1123 m = mat->rmap->n; 1124 rs = mat->rmap->rstart; 1125 cs = mat->cmap->rstart; 1126 bs = mat->rmap->bs; 1127 nz = bs*bs*(A->nz + B->nz); 1128 1129 /* write matrix header */ 1130 header[0] = MAT_FILE_CLASSID; 1131 header[1] = M; header[2] = N; header[3] = nz; 1132 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1133 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1134 1135 /* fill in and store row lengths */ 1136 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1137 for (cnt=0, i=0; i<A->mbs; i++) 1138 for (j=0; j<bs; j++) 1139 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1140 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1141 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1142 1143 /* fill in and store column indices */ 1144 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1145 for (cnt=0, i=0; i<A->mbs; i++) { 1146 for (k=0; k<bs; k++) { 1147 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1148 if (garray[B->j[jb]] > cs/bs) break; 1149 for (l=0; l<bs; l++) 1150 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1151 } 1152 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1153 for (l=0; l<bs; l++) 1154 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1155 for (; jb<B->i[i+1]; jb++) 1156 for (l=0; l<bs; l++) 1157 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1158 } 1159 } 1160 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1161 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1162 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1163 1164 /* fill in and store nonzero values */ 1165 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1166 for (cnt=0, i=0; i<A->mbs; i++) { 1167 for (k=0; k<bs; k++) { 1168 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1169 if (garray[B->j[jb]] > cs/bs) break; 1170 for (l=0; l<bs; l++) 1171 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1172 } 1173 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1174 for (l=0; l<bs; l++) 1175 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1176 for (; jb<B->i[i+1]; jb++) 1177 for (l=0; l<bs; l++) 1178 matvals[cnt++] = B->a[bs*(bs*jb + l) + k];; 1179 } 1180 } 1181 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1182 ierr = PetscFree(matvals);CHKERRQ(ierr); 1183 1184 /* write block size option to the viewer's .info file */ 1185 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1190 { 1191 PetscErrorCode ierr; 1192 PetscBool iascii,isdraw,issocket,isbinary; 1193 1194 PetscFunctionBegin; 1195 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1196 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1197 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1199 if (iascii || isdraw || issocket) { 1200 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1201 } else if (isbinary) { 1202 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1208 { 1209 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 #if defined(PETSC_USE_LOG) 1214 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1215 #endif 1216 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1217 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1218 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1219 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1220 #if defined(PETSC_USE_CTABLE) 1221 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1222 #else 1223 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1224 #endif 1225 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1226 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1227 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1228 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1229 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1230 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1231 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1232 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1233 1234 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1243 #if defined(PETSC_HAVE_HYPRE) 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1245 #endif 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1254 PetscErrorCode ierr; 1255 PetscInt nt; 1256 1257 PetscFunctionBegin; 1258 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1259 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1260 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1262 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1276 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1277 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1278 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1283 { 1284 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1285 PetscErrorCode ierr; 1286 1287 PetscFunctionBegin; 1288 /* do nondiagonal part */ 1289 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1290 /* do local part */ 1291 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1292 /* add partial results together */ 1293 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 PetscErrorCode ierr; 1302 1303 PetscFunctionBegin; 1304 /* do nondiagonal part */ 1305 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1306 /* do local part */ 1307 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1308 /* add partial results together */ 1309 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* 1315 This only works correctly for square matrices where the subblock A->A is the 1316 diagonal block 1317 */ 1318 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1319 { 1320 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1321 PetscErrorCode ierr; 1322 1323 PetscFunctionBegin; 1324 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1325 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1330 { 1331 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1332 PetscErrorCode ierr; 1333 1334 PetscFunctionBegin; 1335 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1336 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1341 { 1342 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1343 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1344 PetscErrorCode ierr; 1345 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1346 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1347 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1348 1349 PetscFunctionBegin; 1350 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1351 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1352 mat->getrowactive = PETSC_TRUE; 1353 1354 if (!mat->rowvalues && (idx || v)) { 1355 /* 1356 allocate enough space to hold information from the longest row. 1357 */ 1358 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1359 PetscInt max = 1,mbs = mat->mbs,tmp; 1360 for (i=0; i<mbs; i++) { 1361 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1362 if (max < tmp) max = tmp; 1363 } 1364 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1365 } 1366 lrow = row - brstart; 1367 1368 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1369 if (!v) {pvA = 0; pvB = 0;} 1370 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1371 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1372 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1373 nztot = nzA + nzB; 1374 1375 cmap = mat->garray; 1376 if (v || idx) { 1377 if (nztot) { 1378 /* Sort by increasing column numbers, assuming A and B already sorted */ 1379 PetscInt imark = -1; 1380 if (v) { 1381 *v = v_p = mat->rowvalues; 1382 for (i=0; i<nzB; i++) { 1383 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1384 else break; 1385 } 1386 imark = i; 1387 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1388 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1389 } 1390 if (idx) { 1391 *idx = idx_p = mat->rowindices; 1392 if (imark > -1) { 1393 for (i=0; i<imark; i++) { 1394 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1395 } 1396 } else { 1397 for (i=0; i<nzB; i++) { 1398 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1399 else break; 1400 } 1401 imark = i; 1402 } 1403 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1404 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1405 } 1406 } else { 1407 if (idx) *idx = 0; 1408 if (v) *v = 0; 1409 } 1410 } 1411 *nz = nztot; 1412 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1413 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1418 { 1419 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1420 1421 PetscFunctionBegin; 1422 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1423 baij->getrowactive = PETSC_FALSE; 1424 PetscFunctionReturn(0); 1425 } 1426 1427 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1428 { 1429 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1430 PetscErrorCode ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1434 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1439 { 1440 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1441 Mat A = a->A,B = a->B; 1442 PetscErrorCode ierr; 1443 PetscLogDouble isend[5],irecv[5]; 1444 1445 PetscFunctionBegin; 1446 info->block_size = (PetscReal)matin->rmap->bs; 1447 1448 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1449 1450 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1451 isend[3] = info->memory; isend[4] = info->mallocs; 1452 1453 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1454 1455 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1456 isend[3] += info->memory; isend[4] += info->mallocs; 1457 1458 if (flag == MAT_LOCAL) { 1459 info->nz_used = isend[0]; 1460 info->nz_allocated = isend[1]; 1461 info->nz_unneeded = isend[2]; 1462 info->memory = isend[3]; 1463 info->mallocs = isend[4]; 1464 } else if (flag == MAT_GLOBAL_MAX) { 1465 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1466 1467 info->nz_used = irecv[0]; 1468 info->nz_allocated = irecv[1]; 1469 info->nz_unneeded = irecv[2]; 1470 info->memory = irecv[3]; 1471 info->mallocs = irecv[4]; 1472 } else if (flag == MAT_GLOBAL_SUM) { 1473 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1474 1475 info->nz_used = irecv[0]; 1476 info->nz_allocated = irecv[1]; 1477 info->nz_unneeded = irecv[2]; 1478 info->memory = irecv[3]; 1479 info->mallocs = irecv[4]; 1480 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1481 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1482 info->fill_ratio_needed = 0; 1483 info->factor_mallocs = 0; 1484 PetscFunctionReturn(0); 1485 } 1486 1487 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1488 { 1489 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1490 PetscErrorCode ierr; 1491 1492 PetscFunctionBegin; 1493 switch (op) { 1494 case MAT_NEW_NONZERO_LOCATIONS: 1495 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1496 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1497 case MAT_KEEP_NONZERO_PATTERN: 1498 case MAT_NEW_NONZERO_LOCATION_ERR: 1499 MatCheckPreallocated(A,1); 1500 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1501 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1502 break; 1503 case MAT_ROW_ORIENTED: 1504 MatCheckPreallocated(A,1); 1505 a->roworiented = flg; 1506 1507 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1508 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1509 break; 1510 case MAT_NEW_DIAGONALS: 1511 case MAT_SORTED_FULL: 1512 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1513 break; 1514 case MAT_IGNORE_OFF_PROC_ENTRIES: 1515 a->donotstash = flg; 1516 break; 1517 case MAT_USE_HASH_TABLE: 1518 a->ht_flag = flg; 1519 a->ht_fact = 1.39; 1520 break; 1521 case MAT_SYMMETRIC: 1522 case MAT_STRUCTURALLY_SYMMETRIC: 1523 case MAT_HERMITIAN: 1524 case MAT_SUBMAT_SINGLEIS: 1525 case MAT_SYMMETRY_ETERNAL: 1526 MatCheckPreallocated(A,1); 1527 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1528 break; 1529 default: 1530 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 1535 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1536 { 1537 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1538 Mat_SeqBAIJ *Aloc; 1539 Mat B; 1540 PetscErrorCode ierr; 1541 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1542 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1543 MatScalar *a; 1544 1545 PetscFunctionBegin; 1546 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1547 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1548 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1549 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1550 /* Do not know preallocation information, but must set block size */ 1551 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1552 } else { 1553 B = *matout; 1554 } 1555 1556 /* copy over the A part */ 1557 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1558 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1559 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1560 1561 for (i=0; i<mbs; i++) { 1562 rvals[0] = bs*(baij->rstartbs + i); 1563 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1564 for (j=ai[i]; j<ai[i+1]; j++) { 1565 col = (baij->cstartbs+aj[j])*bs; 1566 for (k=0; k<bs; k++) { 1567 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1568 1569 col++; a += bs; 1570 } 1571 } 1572 } 1573 /* copy over the B part */ 1574 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1575 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1576 for (i=0; i<mbs; i++) { 1577 rvals[0] = bs*(baij->rstartbs + i); 1578 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1579 for (j=ai[i]; j<ai[i+1]; j++) { 1580 col = baij->garray[aj[j]]*bs; 1581 for (k=0; k<bs; k++) { 1582 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1583 col++; 1584 a += bs; 1585 } 1586 } 1587 } 1588 ierr = PetscFree(rvals);CHKERRQ(ierr); 1589 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1590 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1591 1592 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1593 else { 1594 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1595 } 1596 PetscFunctionReturn(0); 1597 } 1598 1599 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1600 { 1601 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1602 Mat a = baij->A,b = baij->B; 1603 PetscErrorCode ierr; 1604 PetscInt s1,s2,s3; 1605 1606 PetscFunctionBegin; 1607 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1608 if (rr) { 1609 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1610 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1611 /* Overlap communication with computation. */ 1612 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 } 1614 if (ll) { 1615 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1616 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1617 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1618 } 1619 /* scale the diagonal block */ 1620 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1621 1622 if (rr) { 1623 /* Do a scatter end and then right scale the off-diagonal block */ 1624 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1631 { 1632 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1633 PetscInt *lrows; 1634 PetscInt r, len; 1635 PetscBool cong; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 /* get locally owned rows */ 1640 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1641 /* fix right hand side if needed */ 1642 if (x && b) { 1643 const PetscScalar *xx; 1644 PetscScalar *bb; 1645 1646 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1647 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1648 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1649 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1650 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1651 } 1652 1653 /* actually zap the local rows */ 1654 /* 1655 Zero the required rows. If the "diagonal block" of the matrix 1656 is square and the user wishes to set the diagonal we use separate 1657 code so that MatSetValues() is not called for each diagonal allocating 1658 new memory, thus calling lots of mallocs and slowing things down. 1659 1660 */ 1661 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1662 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1663 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1664 if ((diag != 0.0) && cong) { 1665 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1666 } else if (diag != 0.0) { 1667 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1668 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1669 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1670 for (r = 0; r < len; ++r) { 1671 const PetscInt row = lrows[r] + A->rmap->rstart; 1672 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1673 } 1674 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1676 } else { 1677 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1678 } 1679 ierr = PetscFree(lrows);CHKERRQ(ierr); 1680 1681 /* only change matrix nonzero state if pattern was allowed to be changed */ 1682 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1683 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1684 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1685 } 1686 PetscFunctionReturn(0); 1687 } 1688 1689 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1690 { 1691 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1692 PetscErrorCode ierr; 1693 PetscMPIInt n = A->rmap->n,p = 0; 1694 PetscInt i,j,k,r,len = 0,row,col,count; 1695 PetscInt *lrows,*owners = A->rmap->range; 1696 PetscSFNode *rrows; 1697 PetscSF sf; 1698 const PetscScalar *xx; 1699 PetscScalar *bb,*mask; 1700 Vec xmask,lmask; 1701 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1702 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1703 PetscScalar *aa; 1704 1705 PetscFunctionBegin; 1706 /* Create SF where leaves are input rows and roots are owned rows */ 1707 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1708 for (r = 0; r < n; ++r) lrows[r] = -1; 1709 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1710 for (r = 0; r < N; ++r) { 1711 const PetscInt idx = rows[r]; 1712 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1713 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1714 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1715 } 1716 rrows[r].rank = p; 1717 rrows[r].index = rows[r] - owners[p]; 1718 } 1719 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1720 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1721 /* Collect flags for rows to be zeroed */ 1722 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1723 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1724 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1725 /* Compress and put in row numbers */ 1726 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1727 /* zero diagonal part of matrix */ 1728 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1729 /* handle off diagonal part of matrix */ 1730 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1731 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1732 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1733 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1734 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1735 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1737 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1738 if (x) { 1739 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1740 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1741 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1742 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1743 } 1744 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1745 /* remove zeroed rows of off diagonal matrix */ 1746 for (i = 0; i < len; ++i) { 1747 row = lrows[i]; 1748 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1749 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1750 for (k = 0; k < count; ++k) { 1751 aa[0] = 0.0; 1752 aa += bs; 1753 } 1754 } 1755 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1756 for (i = 0; i < l->B->rmap->N; ++i) { 1757 row = i/bs; 1758 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1759 for (k = 0; k < bs; ++k) { 1760 col = bs*baij->j[j] + k; 1761 if (PetscAbsScalar(mask[col])) { 1762 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1763 if (x) bb[i] -= aa[0]*xx[col]; 1764 aa[0] = 0.0; 1765 } 1766 } 1767 } 1768 } 1769 if (x) { 1770 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1771 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1772 } 1773 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1774 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1775 ierr = PetscFree(lrows);CHKERRQ(ierr); 1776 1777 /* only change matrix nonzero state if pattern was allowed to be changed */ 1778 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1779 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1780 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1786 { 1787 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1788 PetscErrorCode ierr; 1789 1790 PetscFunctionBegin; 1791 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1792 PetscFunctionReturn(0); 1793 } 1794 1795 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1796 1797 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1798 { 1799 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1800 Mat a,b,c,d; 1801 PetscBool flg; 1802 PetscErrorCode ierr; 1803 1804 PetscFunctionBegin; 1805 a = matA->A; b = matA->B; 1806 c = matB->A; d = matB->B; 1807 1808 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1809 if (flg) { 1810 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1811 } 1812 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 1816 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1817 { 1818 PetscErrorCode ierr; 1819 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1820 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1821 1822 PetscFunctionBegin; 1823 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1824 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1825 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1826 } else { 1827 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1828 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1829 } 1830 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1835 { 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1844 { 1845 PetscErrorCode ierr; 1846 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1847 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1848 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1849 1850 PetscFunctionBegin; 1851 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1856 { 1857 PetscErrorCode ierr; 1858 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1859 PetscBLASInt bnz,one=1; 1860 Mat_SeqBAIJ *x,*y; 1861 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1862 1863 PetscFunctionBegin; 1864 if (str == SAME_NONZERO_PATTERN) { 1865 PetscScalar alpha = a; 1866 x = (Mat_SeqBAIJ*)xx->A->data; 1867 y = (Mat_SeqBAIJ*)yy->A->data; 1868 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1869 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1870 x = (Mat_SeqBAIJ*)xx->B->data; 1871 y = (Mat_SeqBAIJ*)yy->B->data; 1872 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1873 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1874 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1875 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1876 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1877 } else { 1878 Mat B; 1879 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1880 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1881 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1882 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1883 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1884 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1885 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1886 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1888 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1889 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1890 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1891 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1892 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1893 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1894 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1900 { 1901 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1906 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1911 { 1912 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1913 PetscErrorCode ierr; 1914 1915 PetscFunctionBegin; 1916 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1917 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1922 { 1923 PetscErrorCode ierr; 1924 IS iscol_local; 1925 PetscInt csize; 1926 1927 PetscFunctionBegin; 1928 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1929 if (call == MAT_REUSE_MATRIX) { 1930 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1931 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1932 } else { 1933 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1934 } 1935 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1936 if (call == MAT_INITIAL_MATRIX) { 1937 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1938 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 /* 1944 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1945 in local and then by concatenating the local matrices the end result. 1946 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1947 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1948 */ 1949 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1950 { 1951 PetscErrorCode ierr; 1952 PetscMPIInt rank,size; 1953 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1954 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1955 Mat M,Mreuse; 1956 MatScalar *vwork,*aa; 1957 MPI_Comm comm; 1958 IS isrow_new, iscol_new; 1959 Mat_SeqBAIJ *aij; 1960 1961 PetscFunctionBegin; 1962 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1963 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1964 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1965 /* The compression and expansion should be avoided. Doesn't point 1966 out errors, might change the indices, hence buggey */ 1967 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1968 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1969 1970 if (call == MAT_REUSE_MATRIX) { 1971 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1972 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1973 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1974 } else { 1975 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1976 } 1977 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1978 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1979 /* 1980 m - number of local rows 1981 n - number of columns (same on all processors) 1982 rstart - first row in new global matrix generated 1983 */ 1984 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1985 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1986 m = m/bs; 1987 n = n/bs; 1988 1989 if (call == MAT_INITIAL_MATRIX) { 1990 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1991 ii = aij->i; 1992 jj = aij->j; 1993 1994 /* 1995 Determine the number of non-zeros in the diagonal and off-diagonal 1996 portions of the matrix in order to do correct preallocation 1997 */ 1998 1999 /* first get start and end of "diagonal" columns */ 2000 if (csize == PETSC_DECIDE) { 2001 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2002 if (mglobal == n*bs) { /* square matrix */ 2003 nlocal = m; 2004 } else { 2005 nlocal = n/size + ((n % size) > rank); 2006 } 2007 } else { 2008 nlocal = csize/bs; 2009 } 2010 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2011 rstart = rend - nlocal; 2012 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2013 2014 /* next, compute all the lengths */ 2015 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2016 for (i=0; i<m; i++) { 2017 jend = ii[i+1] - ii[i]; 2018 olen = 0; 2019 dlen = 0; 2020 for (j=0; j<jend; j++) { 2021 if (*jj < rstart || *jj >= rend) olen++; 2022 else dlen++; 2023 jj++; 2024 } 2025 olens[i] = olen; 2026 dlens[i] = dlen; 2027 } 2028 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2029 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2030 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2031 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2032 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2033 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2034 } else { 2035 PetscInt ml,nl; 2036 2037 M = *newmat; 2038 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2039 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2040 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2041 /* 2042 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2043 rather than the slower MatSetValues(). 2044 */ 2045 M->was_assembled = PETSC_TRUE; 2046 M->assembled = PETSC_FALSE; 2047 } 2048 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2049 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2050 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2051 ii = aij->i; 2052 jj = aij->j; 2053 aa = aij->a; 2054 for (i=0; i<m; i++) { 2055 row = rstart/bs + i; 2056 nz = ii[i+1] - ii[i]; 2057 cwork = jj; jj += nz; 2058 vwork = aa; aa += nz*bs*bs; 2059 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2060 } 2061 2062 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 *newmat = M; 2065 2066 /* save submatrix used in processor for next request */ 2067 if (call == MAT_INITIAL_MATRIX) { 2068 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2069 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2075 { 2076 MPI_Comm comm,pcomm; 2077 PetscInt clocal_size,nrows; 2078 const PetscInt *rows; 2079 PetscMPIInt size; 2080 IS crowp,lcolp; 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2085 /* make a collective version of 'rowp' */ 2086 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2087 if (pcomm==comm) { 2088 crowp = rowp; 2089 } else { 2090 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2091 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2092 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2093 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2094 } 2095 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2096 /* make a local version of 'colp' */ 2097 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2098 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2099 if (size==1) { 2100 lcolp = colp; 2101 } else { 2102 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2103 } 2104 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2105 /* now we just get the submatrix */ 2106 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2107 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2108 /* clean up */ 2109 if (pcomm!=comm) { 2110 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2111 } 2112 if (size>1) { 2113 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2119 { 2120 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2121 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2122 2123 PetscFunctionBegin; 2124 if (nghosts) *nghosts = B->nbs; 2125 if (ghosts) *ghosts = baij->garray; 2126 PetscFunctionReturn(0); 2127 } 2128 2129 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2130 { 2131 Mat B; 2132 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2133 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2134 Mat_SeqAIJ *b; 2135 PetscErrorCode ierr; 2136 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2137 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2138 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2139 2140 PetscFunctionBegin; 2141 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2142 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2143 2144 /* ---------------------------------------------------------------- 2145 Tell every processor the number of nonzeros per row 2146 */ 2147 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2148 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2149 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2150 } 2151 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2152 displs = recvcounts + size; 2153 for (i=0; i<size; i++) { 2154 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2155 displs[i] = A->rmap->range[i]/bs; 2156 } 2157 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2158 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2159 #else 2160 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2161 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2162 #endif 2163 /* --------------------------------------------------------------- 2164 Create the sequential matrix of the same type as the local block diagonal 2165 */ 2166 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2167 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2168 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2169 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2170 b = (Mat_SeqAIJ*)B->data; 2171 2172 /*-------------------------------------------------------------------- 2173 Copy my part of matrix column indices over 2174 */ 2175 sendcount = ad->nz + bd->nz; 2176 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2177 a_jsendbuf = ad->j; 2178 b_jsendbuf = bd->j; 2179 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2180 cnt = 0; 2181 for (i=0; i<n; i++) { 2182 2183 /* put in lower diagonal portion */ 2184 m = bd->i[i+1] - bd->i[i]; 2185 while (m > 0) { 2186 /* is it above diagonal (in bd (compressed) numbering) */ 2187 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2188 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2189 m--; 2190 } 2191 2192 /* put in diagonal portion */ 2193 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2194 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2195 } 2196 2197 /* put in upper diagonal portion */ 2198 while (m-- > 0) { 2199 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2200 } 2201 } 2202 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2203 2204 /*-------------------------------------------------------------------- 2205 Gather all column indices to all processors 2206 */ 2207 for (i=0; i<size; i++) { 2208 recvcounts[i] = 0; 2209 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2210 recvcounts[i] += lens[j]; 2211 } 2212 } 2213 displs[0] = 0; 2214 for (i=1; i<size; i++) { 2215 displs[i] = displs[i-1] + recvcounts[i-1]; 2216 } 2217 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2218 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2219 #else 2220 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2221 #endif 2222 /*-------------------------------------------------------------------- 2223 Assemble the matrix into useable form (note numerical values not yet set) 2224 */ 2225 /* set the b->ilen (length of each row) values */ 2226 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2227 /* set the b->i indices */ 2228 b->i[0] = 0; 2229 for (i=1; i<=A->rmap->N/bs; i++) { 2230 b->i[i] = b->i[i-1] + lens[i-1]; 2231 } 2232 ierr = PetscFree(lens);CHKERRQ(ierr); 2233 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2235 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2236 2237 if (A->symmetric) { 2238 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2239 } else if (A->hermitian) { 2240 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2241 } else if (A->structurally_symmetric) { 2242 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2243 } 2244 *newmat = B; 2245 PetscFunctionReturn(0); 2246 } 2247 2248 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2249 { 2250 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2251 PetscErrorCode ierr; 2252 Vec bb1 = 0; 2253 2254 PetscFunctionBegin; 2255 if (flag == SOR_APPLY_UPPER) { 2256 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2261 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2262 } 2263 2264 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2265 if (flag & SOR_ZERO_INITIAL_GUESS) { 2266 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2267 its--; 2268 } 2269 2270 while (its--) { 2271 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2273 2274 /* update rhs: bb1 = bb - B*x */ 2275 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2276 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2277 2278 /* local sweep */ 2279 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2280 } 2281 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2282 if (flag & SOR_ZERO_INITIAL_GUESS) { 2283 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2284 its--; 2285 } 2286 while (its--) { 2287 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2289 2290 /* update rhs: bb1 = bb - B*x */ 2291 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2292 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2293 2294 /* local sweep */ 2295 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2296 } 2297 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2298 if (flag & SOR_ZERO_INITIAL_GUESS) { 2299 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2300 its--; 2301 } 2302 while (its--) { 2303 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2304 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2305 2306 /* update rhs: bb1 = bb - B*x */ 2307 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2308 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2309 2310 /* local sweep */ 2311 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2312 } 2313 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2314 2315 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2320 { 2321 PetscErrorCode ierr; 2322 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2323 PetscInt N,i,*garray = aij->garray; 2324 PetscInt ib,jb,bs = A->rmap->bs; 2325 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2326 MatScalar *a_val = a_aij->a; 2327 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2328 MatScalar *b_val = b_aij->a; 2329 PetscReal *work; 2330 2331 PetscFunctionBegin; 2332 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2333 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2334 if (type == NORM_2) { 2335 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2336 for (jb=0; jb<bs; jb++) { 2337 for (ib=0; ib<bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2344 for (jb=0; jb<bs; jb++) { 2345 for (ib=0; ib<bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else if (type == NORM_1) { 2352 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2353 for (jb=0; jb<bs; jb++) { 2354 for (ib=0; ib<bs; ib++) { 2355 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2356 a_val++; 2357 } 2358 } 2359 } 2360 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2361 for (jb=0; jb<bs; jb++) { 2362 for (ib=0; ib<bs; ib++) { 2363 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2364 b_val++; 2365 } 2366 } 2367 } 2368 } else if (type == NORM_INFINITY) { 2369 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2370 for (jb=0; jb<bs; jb++) { 2371 for (ib=0; ib<bs; ib++) { 2372 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2373 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2374 a_val++; 2375 } 2376 } 2377 } 2378 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2379 for (jb=0; jb<bs; jb++) { 2380 for (ib=0; ib<bs; ib++) { 2381 int col = garray[b_aij->j[i]] * bs + jb; 2382 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2383 b_val++; 2384 } 2385 } 2386 } 2387 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2388 if (type == NORM_INFINITY) { 2389 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2390 } else { 2391 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2392 } 2393 ierr = PetscFree(work);CHKERRQ(ierr); 2394 if (type == NORM_2) { 2395 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2401 { 2402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2403 PetscErrorCode ierr; 2404 2405 PetscFunctionBegin; 2406 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2407 A->factorerrortype = a->A->factorerrortype; 2408 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2409 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2410 PetscFunctionReturn(0); 2411 } 2412 2413 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2414 { 2415 PetscErrorCode ierr; 2416 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2417 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2418 2419 PetscFunctionBegin; 2420 if (!Y->preallocated) { 2421 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2422 } else if (!aij->nz) { 2423 PetscInt nonew = aij->nonew; 2424 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2425 aij->nonew = nonew; 2426 } 2427 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2432 { 2433 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2434 PetscErrorCode ierr; 2435 2436 PetscFunctionBegin; 2437 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2438 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2439 if (d) { 2440 PetscInt rstart; 2441 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2442 *d += rstart/A->rmap->bs; 2443 2444 } 2445 PetscFunctionReturn(0); 2446 } 2447 2448 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2449 { 2450 PetscFunctionBegin; 2451 *a = ((Mat_MPIBAIJ*)A->data)->A; 2452 PetscFunctionReturn(0); 2453 } 2454 2455 /* -------------------------------------------------------------------*/ 2456 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2457 MatGetRow_MPIBAIJ, 2458 MatRestoreRow_MPIBAIJ, 2459 MatMult_MPIBAIJ, 2460 /* 4*/ MatMultAdd_MPIBAIJ, 2461 MatMultTranspose_MPIBAIJ, 2462 MatMultTransposeAdd_MPIBAIJ, 2463 0, 2464 0, 2465 0, 2466 /*10*/ 0, 2467 0, 2468 0, 2469 MatSOR_MPIBAIJ, 2470 MatTranspose_MPIBAIJ, 2471 /*15*/ MatGetInfo_MPIBAIJ, 2472 MatEqual_MPIBAIJ, 2473 MatGetDiagonal_MPIBAIJ, 2474 MatDiagonalScale_MPIBAIJ, 2475 MatNorm_MPIBAIJ, 2476 /*20*/ MatAssemblyBegin_MPIBAIJ, 2477 MatAssemblyEnd_MPIBAIJ, 2478 MatSetOption_MPIBAIJ, 2479 MatZeroEntries_MPIBAIJ, 2480 /*24*/ MatZeroRows_MPIBAIJ, 2481 0, 2482 0, 2483 0, 2484 0, 2485 /*29*/ MatSetUp_MPIBAIJ, 2486 0, 2487 0, 2488 MatGetDiagonalBlock_MPIBAIJ, 2489 0, 2490 /*34*/ MatDuplicate_MPIBAIJ, 2491 0, 2492 0, 2493 0, 2494 0, 2495 /*39*/ MatAXPY_MPIBAIJ, 2496 MatCreateSubMatrices_MPIBAIJ, 2497 MatIncreaseOverlap_MPIBAIJ, 2498 MatGetValues_MPIBAIJ, 2499 MatCopy_MPIBAIJ, 2500 /*44*/ 0, 2501 MatScale_MPIBAIJ, 2502 MatShift_MPIBAIJ, 2503 0, 2504 MatZeroRowsColumns_MPIBAIJ, 2505 /*49*/ 0, 2506 0, 2507 0, 2508 0, 2509 0, 2510 /*54*/ MatFDColoringCreate_MPIXAIJ, 2511 0, 2512 MatSetUnfactored_MPIBAIJ, 2513 MatPermute_MPIBAIJ, 2514 MatSetValuesBlocked_MPIBAIJ, 2515 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2516 MatDestroy_MPIBAIJ, 2517 MatView_MPIBAIJ, 2518 0, 2519 0, 2520 /*64*/ 0, 2521 0, 2522 0, 2523 0, 2524 0, 2525 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2526 0, 2527 0, 2528 0, 2529 0, 2530 /*74*/ 0, 2531 MatFDColoringApply_BAIJ, 2532 0, 2533 0, 2534 0, 2535 /*79*/ 0, 2536 0, 2537 0, 2538 0, 2539 MatLoad_MPIBAIJ, 2540 /*84*/ 0, 2541 0, 2542 0, 2543 0, 2544 0, 2545 /*89*/ 0, 2546 0, 2547 0, 2548 0, 2549 0, 2550 /*94*/ 0, 2551 0, 2552 0, 2553 0, 2554 0, 2555 /*99*/ 0, 2556 0, 2557 0, 2558 0, 2559 0, 2560 /*104*/0, 2561 MatRealPart_MPIBAIJ, 2562 MatImaginaryPart_MPIBAIJ, 2563 0, 2564 0, 2565 /*109*/0, 2566 0, 2567 0, 2568 0, 2569 MatMissingDiagonal_MPIBAIJ, 2570 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2571 0, 2572 MatGetGhosts_MPIBAIJ, 2573 0, 2574 0, 2575 /*119*/0, 2576 0, 2577 0, 2578 0, 2579 MatGetMultiProcBlock_MPIBAIJ, 2580 /*124*/0, 2581 MatGetColumnNorms_MPIBAIJ, 2582 MatInvertBlockDiagonal_MPIBAIJ, 2583 0, 2584 0, 2585 /*129*/ 0, 2586 0, 2587 0, 2588 0, 2589 0, 2590 /*134*/ 0, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*139*/ MatSetBlockSizes_Default, 2596 0, 2597 0, 2598 MatFDColoringSetUp_MPIXAIJ, 2599 0, 2600 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2601 }; 2602 2603 2604 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2605 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2606 2607 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2608 { 2609 PetscInt m,rstart,cstart,cend; 2610 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2611 const PetscInt *JJ =0; 2612 PetscScalar *values=0; 2613 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2614 PetscErrorCode ierr; 2615 PetscBool nooffprocentries; 2616 2617 PetscFunctionBegin; 2618 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2619 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2620 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2621 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2622 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2623 m = B->rmap->n/bs; 2624 rstart = B->rmap->rstart/bs; 2625 cstart = B->cmap->rstart/bs; 2626 cend = B->cmap->rend/bs; 2627 2628 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2629 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2630 for (i=0; i<m; i++) { 2631 nz = ii[i+1] - ii[i]; 2632 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2633 nz_max = PetscMax(nz_max,nz); 2634 dlen = 0; 2635 olen = 0; 2636 JJ = jj + ii[i]; 2637 for (j=0; j<nz; j++) { 2638 if (*JJ < cstart || *JJ >= cend) olen++; 2639 else dlen++; 2640 JJ++; 2641 } 2642 d_nnz[i] = dlen; 2643 o_nnz[i] = olen; 2644 } 2645 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2646 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2647 2648 values = (PetscScalar*)V; 2649 if (!values) { 2650 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2651 } 2652 for (i=0; i<m; i++) { 2653 PetscInt row = i + rstart; 2654 PetscInt ncols = ii[i+1] - ii[i]; 2655 const PetscInt *icols = jj + ii[i]; 2656 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2657 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2658 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2659 } else { /* block ordering does not match so we can only insert one block at a time. */ 2660 PetscInt j; 2661 for (j=0; j<ncols; j++) { 2662 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2663 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2664 } 2665 } 2666 } 2667 2668 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2669 nooffprocentries = B->nooffprocentries; 2670 B->nooffprocentries = PETSC_TRUE; 2671 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2673 B->nooffprocentries = nooffprocentries; 2674 2675 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2676 PetscFunctionReturn(0); 2677 } 2678 2679 /*@C 2680 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2681 2682 Collective 2683 2684 Input Parameters: 2685 + B - the matrix 2686 . bs - the block size 2687 . i - the indices into j for the start of each local row (starts with zero) 2688 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2689 - v - optional values in the matrix 2690 2691 Level: advanced 2692 2693 Notes: 2694 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2695 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2696 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2697 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2698 block column and the second index is over columns within a block. 2699 2700 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2701 2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2703 @*/ 2704 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2705 { 2706 PetscErrorCode ierr; 2707 2708 PetscFunctionBegin; 2709 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2710 PetscValidType(B,1); 2711 PetscValidLogicalCollectiveInt(B,bs,2); 2712 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2717 { 2718 Mat_MPIBAIJ *b; 2719 PetscErrorCode ierr; 2720 PetscInt i; 2721 PetscMPIInt size; 2722 2723 PetscFunctionBegin; 2724 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2725 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2726 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2727 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2728 2729 if (d_nnz) { 2730 for (i=0; i<B->rmap->n/bs; i++) { 2731 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2732 } 2733 } 2734 if (o_nnz) { 2735 for (i=0; i<B->rmap->n/bs; i++) { 2736 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2737 } 2738 } 2739 2740 b = (Mat_MPIBAIJ*)B->data; 2741 b->bs2 = bs*bs; 2742 b->mbs = B->rmap->n/bs; 2743 b->nbs = B->cmap->n/bs; 2744 b->Mbs = B->rmap->N/bs; 2745 b->Nbs = B->cmap->N/bs; 2746 2747 for (i=0; i<=b->size; i++) { 2748 b->rangebs[i] = B->rmap->range[i]/bs; 2749 } 2750 b->rstartbs = B->rmap->rstart/bs; 2751 b->rendbs = B->rmap->rend/bs; 2752 b->cstartbs = B->cmap->rstart/bs; 2753 b->cendbs = B->cmap->rend/bs; 2754 2755 #if defined(PETSC_USE_CTABLE) 2756 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2757 #else 2758 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2759 #endif 2760 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2761 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2762 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2763 2764 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2765 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2766 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2767 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2768 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2769 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2770 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2771 2772 if (!B->preallocated) { 2773 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2774 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2775 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2777 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2778 } 2779 2780 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2781 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2782 B->preallocated = PETSC_TRUE; 2783 B->was_assembled = PETSC_FALSE; 2784 B->assembled = PETSC_FALSE; 2785 PetscFunctionReturn(0); 2786 } 2787 2788 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2789 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2790 2791 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2792 { 2793 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2794 PetscErrorCode ierr; 2795 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2796 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2797 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2798 2799 PetscFunctionBegin; 2800 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2801 ii[0] = 0; 2802 for (i=0; i<M; i++) { 2803 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2804 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2805 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2806 /* remove one from count of matrix has diagonal */ 2807 for (j=id[i]; j<id[i+1]; j++) { 2808 if (jd[j] == i) {ii[i+1]--;break;} 2809 } 2810 } 2811 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2812 cnt = 0; 2813 for (i=0; i<M; i++) { 2814 for (j=io[i]; j<io[i+1]; j++) { 2815 if (garray[jo[j]] > rstart) break; 2816 jj[cnt++] = garray[jo[j]]; 2817 } 2818 for (k=id[i]; k<id[i+1]; k++) { 2819 if (jd[k] != i) { 2820 jj[cnt++] = rstart + jd[k]; 2821 } 2822 } 2823 for (; j<io[i+1]; j++) { 2824 jj[cnt++] = garray[jo[j]]; 2825 } 2826 } 2827 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2832 2833 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2834 2835 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2836 { 2837 PetscErrorCode ierr; 2838 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2839 Mat B; 2840 Mat_MPIAIJ *b; 2841 2842 PetscFunctionBegin; 2843 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2844 2845 if (reuse == MAT_REUSE_MATRIX) { 2846 B = *newmat; 2847 } else { 2848 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2849 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2850 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2851 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2852 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2853 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2854 } 2855 b = (Mat_MPIAIJ*) B->data; 2856 2857 if (reuse == MAT_REUSE_MATRIX) { 2858 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2859 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2860 } else { 2861 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2862 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2863 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2864 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2865 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2866 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 } 2869 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 2872 if (reuse == MAT_INPLACE_MATRIX) { 2873 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2874 } else { 2875 *newmat = B; 2876 } 2877 PetscFunctionReturn(0); 2878 } 2879 2880 /*MC 2881 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2882 2883 Options Database Keys: 2884 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2885 . -mat_block_size <bs> - set the blocksize used to store the matrix 2886 - -mat_use_hash_table <fact> 2887 2888 Level: beginner 2889 2890 Notes: 2891 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2892 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2893 2894 .seealso: MatCreateBAIJ 2895 M*/ 2896 2897 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2898 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 2899 2900 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2901 { 2902 Mat_MPIBAIJ *b; 2903 PetscErrorCode ierr; 2904 PetscBool flg = PETSC_FALSE; 2905 2906 PetscFunctionBegin; 2907 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2908 B->data = (void*)b; 2909 2910 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2911 B->assembled = PETSC_FALSE; 2912 2913 B->insertmode = NOT_SET_VALUES; 2914 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2915 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2916 2917 /* build local table of row and column ownerships */ 2918 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2919 2920 /* build cache for off array entries formed */ 2921 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2922 2923 b->donotstash = PETSC_FALSE; 2924 b->colmap = NULL; 2925 b->garray = NULL; 2926 b->roworiented = PETSC_TRUE; 2927 2928 /* stuff used in block assembly */ 2929 b->barray = 0; 2930 2931 /* stuff used for matrix vector multiply */ 2932 b->lvec = 0; 2933 b->Mvctx = 0; 2934 2935 /* stuff for MatGetRow() */ 2936 b->rowindices = 0; 2937 b->rowvalues = 0; 2938 b->getrowactive = PETSC_FALSE; 2939 2940 /* hash table stuff */ 2941 b->ht = 0; 2942 b->hd = 0; 2943 b->ht_size = 0; 2944 b->ht_flag = PETSC_FALSE; 2945 b->ht_fact = 0; 2946 b->ht_total_ct = 0; 2947 b->ht_insert_ct = 0; 2948 2949 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2950 b->ijonly = PETSC_FALSE; 2951 2952 2953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2954 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2955 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2956 #if defined(PETSC_HAVE_HYPRE) 2957 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2958 #endif 2959 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2960 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2961 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2962 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2963 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2964 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2965 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 2966 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2967 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2968 2969 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2970 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2971 if (flg) { 2972 PetscReal fact = 1.39; 2973 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2974 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2975 if (fact <= 1.0) fact = 1.39; 2976 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2977 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2978 } 2979 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /*MC 2984 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2985 2986 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2987 and MATMPIBAIJ otherwise. 2988 2989 Options Database Keys: 2990 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2991 2992 Level: beginner 2993 2994 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2995 M*/ 2996 2997 /*@C 2998 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2999 (block compressed row). For good matrix assembly performance 3000 the user should preallocate the matrix storage by setting the parameters 3001 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3002 performance can be increased by more than a factor of 50. 3003 3004 Collective on Mat 3005 3006 Input Parameters: 3007 + B - the matrix 3008 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3009 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3010 . d_nz - number of block nonzeros per block row in diagonal portion of local 3011 submatrix (same for all local rows) 3012 . d_nnz - array containing the number of block nonzeros in the various block rows 3013 of the in diagonal portion of the local (possibly different for each block 3014 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3015 set it even if it is zero. 3016 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3017 submatrix (same for all local rows). 3018 - o_nnz - array containing the number of nonzeros in the various block rows of the 3019 off-diagonal portion of the local submatrix (possibly different for 3020 each block row) or NULL. 3021 3022 If the *_nnz parameter is given then the *_nz parameter is ignored 3023 3024 Options Database Keys: 3025 + -mat_block_size - size of the blocks to use 3026 - -mat_use_hash_table <fact> 3027 3028 Notes: 3029 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3030 than it must be used on all processors that share the object for that argument. 3031 3032 Storage Information: 3033 For a square global matrix we define each processor's diagonal portion 3034 to be its local rows and the corresponding columns (a square submatrix); 3035 each processor's off-diagonal portion encompasses the remainder of the 3036 local matrix (a rectangular submatrix). 3037 3038 The user can specify preallocated storage for the diagonal part of 3039 the local submatrix with either d_nz or d_nnz (not both). Set 3040 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3041 memory allocation. Likewise, specify preallocated storage for the 3042 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3043 3044 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3045 the figure below we depict these three local rows and all columns (0-11). 3046 3047 .vb 3048 0 1 2 3 4 5 6 7 8 9 10 11 3049 -------------------------- 3050 row 3 |o o o d d d o o o o o o 3051 row 4 |o o o d d d o o o o o o 3052 row 5 |o o o d d d o o o o o o 3053 -------------------------- 3054 .ve 3055 3056 Thus, any entries in the d locations are stored in the d (diagonal) 3057 submatrix, and any entries in the o locations are stored in the 3058 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3059 stored simply in the MATSEQBAIJ format for compressed row storage. 3060 3061 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3062 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3063 In general, for PDE problems in which most nonzeros are near the diagonal, 3064 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3065 or you will get TERRIBLE performance; see the users' manual chapter on 3066 matrices. 3067 3068 You can call MatGetInfo() to get information on how effective the preallocation was; 3069 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3070 You can also run with the option -info and look for messages with the string 3071 malloc in them to see if additional memory allocation was needed. 3072 3073 Level: intermediate 3074 3075 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3076 @*/ 3077 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3078 { 3079 PetscErrorCode ierr; 3080 3081 PetscFunctionBegin; 3082 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3083 PetscValidType(B,1); 3084 PetscValidLogicalCollectiveInt(B,bs,2); 3085 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3086 PetscFunctionReturn(0); 3087 } 3088 3089 /*@C 3090 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3091 (block compressed row). For good matrix assembly performance 3092 the user should preallocate the matrix storage by setting the parameters 3093 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3094 performance can be increased by more than a factor of 50. 3095 3096 Collective 3097 3098 Input Parameters: 3099 + comm - MPI communicator 3100 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3101 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3102 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3103 This value should be the same as the local size used in creating the 3104 y vector for the matrix-vector product y = Ax. 3105 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3106 This value should be the same as the local size used in creating the 3107 x vector for the matrix-vector product y = Ax. 3108 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3109 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3110 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3111 submatrix (same for all local rows) 3112 . d_nnz - array containing the number of nonzero blocks in the various block rows 3113 of the in diagonal portion of the local (possibly different for each block 3114 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3115 and set it even if it is zero. 3116 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3117 submatrix (same for all local rows). 3118 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3119 off-diagonal portion of the local submatrix (possibly different for 3120 each block row) or NULL. 3121 3122 Output Parameter: 3123 . A - the matrix 3124 3125 Options Database Keys: 3126 + -mat_block_size - size of the blocks to use 3127 - -mat_use_hash_table <fact> 3128 3129 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3130 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3131 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3132 3133 Notes: 3134 If the *_nnz parameter is given then the *_nz parameter is ignored 3135 3136 A nonzero block is any block that as 1 or more nonzeros in it 3137 3138 The user MUST specify either the local or global matrix dimensions 3139 (possibly both). 3140 3141 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3142 than it must be used on all processors that share the object for that argument. 3143 3144 Storage Information: 3145 For a square global matrix we define each processor's diagonal portion 3146 to be its local rows and the corresponding columns (a square submatrix); 3147 each processor's off-diagonal portion encompasses the remainder of the 3148 local matrix (a rectangular submatrix). 3149 3150 The user can specify preallocated storage for the diagonal part of 3151 the local submatrix with either d_nz or d_nnz (not both). Set 3152 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3153 memory allocation. Likewise, specify preallocated storage for the 3154 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3155 3156 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3157 the figure below we depict these three local rows and all columns (0-11). 3158 3159 .vb 3160 0 1 2 3 4 5 6 7 8 9 10 11 3161 -------------------------- 3162 row 3 |o o o d d d o o o o o o 3163 row 4 |o o o d d d o o o o o o 3164 row 5 |o o o d d d o o o o o o 3165 -------------------------- 3166 .ve 3167 3168 Thus, any entries in the d locations are stored in the d (diagonal) 3169 submatrix, and any entries in the o locations are stored in the 3170 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3171 stored simply in the MATSEQBAIJ format for compressed row storage. 3172 3173 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3174 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3175 In general, for PDE problems in which most nonzeros are near the diagonal, 3176 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3177 or you will get TERRIBLE performance; see the users' manual chapter on 3178 matrices. 3179 3180 Level: intermediate 3181 3182 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3183 @*/ 3184 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3185 { 3186 PetscErrorCode ierr; 3187 PetscMPIInt size; 3188 3189 PetscFunctionBegin; 3190 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3191 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3192 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3193 if (size > 1) { 3194 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3195 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3196 } else { 3197 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3198 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3199 } 3200 PetscFunctionReturn(0); 3201 } 3202 3203 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3204 { 3205 Mat mat; 3206 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3207 PetscErrorCode ierr; 3208 PetscInt len=0; 3209 3210 PetscFunctionBegin; 3211 *newmat = 0; 3212 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3213 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3214 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3215 3216 mat->factortype = matin->factortype; 3217 mat->preallocated = PETSC_TRUE; 3218 mat->assembled = PETSC_TRUE; 3219 mat->insertmode = NOT_SET_VALUES; 3220 3221 a = (Mat_MPIBAIJ*)mat->data; 3222 mat->rmap->bs = matin->rmap->bs; 3223 a->bs2 = oldmat->bs2; 3224 a->mbs = oldmat->mbs; 3225 a->nbs = oldmat->nbs; 3226 a->Mbs = oldmat->Mbs; 3227 a->Nbs = oldmat->Nbs; 3228 3229 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3230 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3231 3232 a->size = oldmat->size; 3233 a->rank = oldmat->rank; 3234 a->donotstash = oldmat->donotstash; 3235 a->roworiented = oldmat->roworiented; 3236 a->rowindices = 0; 3237 a->rowvalues = 0; 3238 a->getrowactive = PETSC_FALSE; 3239 a->barray = 0; 3240 a->rstartbs = oldmat->rstartbs; 3241 a->rendbs = oldmat->rendbs; 3242 a->cstartbs = oldmat->cstartbs; 3243 a->cendbs = oldmat->cendbs; 3244 3245 /* hash table stuff */ 3246 a->ht = 0; 3247 a->hd = 0; 3248 a->ht_size = 0; 3249 a->ht_flag = oldmat->ht_flag; 3250 a->ht_fact = oldmat->ht_fact; 3251 a->ht_total_ct = 0; 3252 a->ht_insert_ct = 0; 3253 3254 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3255 if (oldmat->colmap) { 3256 #if defined(PETSC_USE_CTABLE) 3257 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3258 #else 3259 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3260 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3261 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3262 #endif 3263 } else a->colmap = 0; 3264 3265 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3266 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3267 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3268 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3269 } else a->garray = 0; 3270 3271 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3272 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3273 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3274 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3275 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3276 3277 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3278 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3279 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3280 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3281 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3282 *newmat = mat; 3283 PetscFunctionReturn(0); 3284 } 3285 3286 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3287 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3288 { 3289 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3290 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3291 PetscScalar *matvals; 3292 PetscErrorCode ierr; 3293 3294 PetscFunctionBegin; 3295 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3296 3297 /* read in matrix header */ 3298 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3299 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3300 M = header[1]; N = header[2]; nz = header[3]; 3301 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3302 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3303 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3304 3305 /* set block sizes from the viewer's .info file */ 3306 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3307 /* set local sizes if not set already */ 3308 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3309 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3310 /* set global sizes if not set already */ 3311 if (mat->rmap->N < 0) mat->rmap->N = M; 3312 if (mat->cmap->N < 0) mat->cmap->N = N; 3313 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3314 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3315 3316 /* check if the matrix sizes are correct */ 3317 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3318 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3319 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3320 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3321 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3322 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3323 mbs = m/bs; nbs = n/bs; 3324 3325 /* read in row lengths and build row indices */ 3326 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3327 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3328 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3329 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3330 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3331 3332 /* read in column indices and matrix values */ 3333 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3334 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3335 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3336 3337 { /* preallocate matrix storage */ 3338 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3339 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3340 PetscBool sbaij,done; 3341 PetscInt *d_nnz,*o_nnz; 3342 3343 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3344 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3345 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3346 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3347 for (i=0; i<mbs; i++) { 3348 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3349 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3350 for (k=0; k<bs; k++) { 3351 PetscInt row = bs*i + k; 3352 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3353 PetscInt col = colidxs[j]; 3354 if (!sbaij || col >= row) { 3355 if (col >= cs && col < ce) { 3356 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3357 } else { 3358 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3359 if (done) o_nnz[i]++; 3360 } 3361 } 3362 } 3363 } 3364 } 3365 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3366 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3367 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3368 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3369 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3370 } 3371 3372 /* store matrix values */ 3373 for (i=0; i<m; i++) { 3374 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3375 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3376 } 3377 3378 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3379 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3380 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3381 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3382 PetscFunctionReturn(0); 3383 } 3384 3385 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3386 { 3387 PetscErrorCode ierr; 3388 PetscBool isbinary; 3389 3390 PetscFunctionBegin; 3391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3392 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3393 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 /*@ 3398 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3399 3400 Input Parameters: 3401 + mat - the matrix 3402 - fact - factor 3403 3404 Not Collective, each process can use a different factor 3405 3406 Level: advanced 3407 3408 Notes: 3409 This can also be set by the command line option: -mat_use_hash_table <fact> 3410 3411 .seealso: MatSetOption() 3412 @*/ 3413 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3414 { 3415 PetscErrorCode ierr; 3416 3417 PetscFunctionBegin; 3418 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3423 { 3424 Mat_MPIBAIJ *baij; 3425 3426 PetscFunctionBegin; 3427 baij = (Mat_MPIBAIJ*)mat->data; 3428 baij->ht_fact = fact; 3429 PetscFunctionReturn(0); 3430 } 3431 3432 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3433 { 3434 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3435 PetscBool flg; 3436 PetscErrorCode ierr; 3437 3438 PetscFunctionBegin; 3439 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3440 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3441 if (Ad) *Ad = a->A; 3442 if (Ao) *Ao = a->B; 3443 if (colmap) *colmap = a->garray; 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /* 3448 Special version for direct calls from Fortran (to eliminate two function call overheads 3449 */ 3450 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3451 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3452 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3453 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3454 #endif 3455 3456 /*@C 3457 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3458 3459 Collective on Mat 3460 3461 Input Parameters: 3462 + mat - the matrix 3463 . min - number of input rows 3464 . im - input rows 3465 . nin - number of input columns 3466 . in - input columns 3467 . v - numerical values input 3468 - addvin - INSERT_VALUES or ADD_VALUES 3469 3470 Notes: 3471 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3472 3473 Level: advanced 3474 3475 .seealso: MatSetValuesBlocked() 3476 @*/ 3477 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3478 { 3479 /* convert input arguments to C version */ 3480 Mat mat = *matin; 3481 PetscInt m = *min, n = *nin; 3482 InsertMode addv = *addvin; 3483 3484 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3485 const MatScalar *value; 3486 MatScalar *barray = baij->barray; 3487 PetscBool roworiented = baij->roworiented; 3488 PetscErrorCode ierr; 3489 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3490 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3491 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3492 3493 PetscFunctionBegin; 3494 /* tasks normally handled by MatSetValuesBlocked() */ 3495 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3496 #if defined(PETSC_USE_DEBUG) 3497 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3498 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3499 #endif 3500 if (mat->assembled) { 3501 mat->was_assembled = PETSC_TRUE; 3502 mat->assembled = PETSC_FALSE; 3503 } 3504 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3505 3506 3507 if (!barray) { 3508 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3509 baij->barray = barray; 3510 } 3511 3512 if (roworiented) stepval = (n-1)*bs; 3513 else stepval = (m-1)*bs; 3514 3515 for (i=0; i<m; i++) { 3516 if (im[i] < 0) continue; 3517 #if defined(PETSC_USE_DEBUG) 3518 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3519 #endif 3520 if (im[i] >= rstart && im[i] < rend) { 3521 row = im[i] - rstart; 3522 for (j=0; j<n; j++) { 3523 /* If NumCol = 1 then a copy is not required */ 3524 if ((roworiented) && (n == 1)) { 3525 barray = (MatScalar*)v + i*bs2; 3526 } else if ((!roworiented) && (m == 1)) { 3527 barray = (MatScalar*)v + j*bs2; 3528 } else { /* Here a copy is required */ 3529 if (roworiented) { 3530 value = v + i*(stepval+bs)*bs + j*bs; 3531 } else { 3532 value = v + j*(stepval+bs)*bs + i*bs; 3533 } 3534 for (ii=0; ii<bs; ii++,value+=stepval) { 3535 for (jj=0; jj<bs; jj++) { 3536 *barray++ = *value++; 3537 } 3538 } 3539 barray -=bs2; 3540 } 3541 3542 if (in[j] >= cstart && in[j] < cend) { 3543 col = in[j] - cstart; 3544 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3545 } else if (in[j] < 0) continue; 3546 #if defined(PETSC_USE_DEBUG) 3547 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3548 #endif 3549 else { 3550 if (mat->was_assembled) { 3551 if (!baij->colmap) { 3552 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3553 } 3554 3555 #if defined(PETSC_USE_DEBUG) 3556 #if defined(PETSC_USE_CTABLE) 3557 { PetscInt data; 3558 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3559 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3560 } 3561 #else 3562 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3563 #endif 3564 #endif 3565 #if defined(PETSC_USE_CTABLE) 3566 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3567 col = (col - 1)/bs; 3568 #else 3569 col = (baij->colmap[in[j]] - 1)/bs; 3570 #endif 3571 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3572 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3573 col = in[j]; 3574 } 3575 } else col = in[j]; 3576 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3577 } 3578 } 3579 } else { 3580 if (!baij->donotstash) { 3581 if (roworiented) { 3582 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3583 } else { 3584 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3585 } 3586 } 3587 } 3588 } 3589 3590 /* task normally handled by MatSetValuesBlocked() */ 3591 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 /*@ 3596 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3597 CSR format the local rows. 3598 3599 Collective 3600 3601 Input Parameters: 3602 + comm - MPI communicator 3603 . bs - the block size, only a block size of 1 is supported 3604 . m - number of local rows (Cannot be PETSC_DECIDE) 3605 . n - This value should be the same as the local size used in creating the 3606 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3607 calculated if N is given) For square matrices n is almost always m. 3608 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3609 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3610 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3611 . j - column indices 3612 - a - matrix values 3613 3614 Output Parameter: 3615 . mat - the matrix 3616 3617 Level: intermediate 3618 3619 Notes: 3620 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3621 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3622 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3623 3624 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3625 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3626 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3627 with column-major ordering within blocks. 3628 3629 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3630 3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3632 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3633 @*/ 3634 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3635 { 3636 PetscErrorCode ierr; 3637 3638 PetscFunctionBegin; 3639 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3640 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3641 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3642 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3643 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3644 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3645 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3646 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3647 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3648 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3649 PetscFunctionReturn(0); 3650 } 3651 3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3653 { 3654 PetscErrorCode ierr; 3655 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3656 PetscInt *indx; 3657 PetscScalar *values; 3658 3659 PetscFunctionBegin; 3660 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3661 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3663 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3664 PetscInt *bindx,rmax=a->rmax,j; 3665 PetscMPIInt rank,size; 3666 3667 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3668 mbs = m/bs; Nbs = N/cbs; 3669 if (n == PETSC_DECIDE) { 3670 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3671 } 3672 nbs = n/cbs; 3673 3674 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3675 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3676 3677 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3678 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3679 if (rank == size-1) { 3680 /* Check sum(nbs) = Nbs */ 3681 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3682 } 3683 3684 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3685 for (i=0; i<mbs; i++) { 3686 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3687 nnz = nnz/bs; 3688 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3689 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3690 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3691 } 3692 ierr = PetscFree(bindx);CHKERRQ(ierr); 3693 3694 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3695 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3696 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3697 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3698 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3699 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3700 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3701 } 3702 3703 /* numeric phase */ 3704 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3705 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3706 3707 for (i=0; i<m; i++) { 3708 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3709 Ii = i + rstart; 3710 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3711 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3712 } 3713 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3714 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3715 PetscFunctionReturn(0); 3716 } 3717