1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscErrorCode ierr; 15 PetscInt i,*idxb = 0; 16 PetscScalar *va,*vb; 17 Vec vtmp; 18 19 PetscFunctionBegin; 20 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 21 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 22 if (idx) { 23 for (i=0; i<A->rmap->n; i++) { 24 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 25 } 26 } 27 28 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 29 if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 30 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 31 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 32 33 for (i=0; i<A->rmap->n; i++) { 34 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 35 va[i] = vb[i]; 36 if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 37 } 38 } 39 40 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 41 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 42 ierr = PetscFree(idxb);CHKERRQ(ierr); 43 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 44 PetscFunctionReturn(0); 45 } 46 47 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 48 { 49 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 54 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 59 { 60 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 61 PetscErrorCode ierr; 62 63 PetscFunctionBegin; 64 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 65 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 66 PetscFunctionReturn(0); 67 } 68 69 /* 70 Local utility routine that creates a mapping from the global column 71 number to the local number in the off-diagonal part of the local 72 storage of the matrix. This is done in a non scalable way since the 73 length of colmap equals the global matrix length. 74 */ 75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 76 { 77 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 78 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 79 PetscErrorCode ierr; 80 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 85 for (i=0; i<nbs; i++) { 86 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 87 } 88 #else 89 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 90 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 91 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 92 #endif 93 PetscFunctionReturn(0); 94 } 95 96 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 97 { \ 98 brow = row/bs; \ 99 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 100 rmax = aimax[brow]; nrow = ailen[brow]; \ 101 bcol = col/bs; \ 102 ridx = row % bs; cidx = col % bs; \ 103 low = 0; high = nrow; \ 104 while (high-low > 3) { \ 105 t = (low+high)/2; \ 106 if (rp[t] > bcol) high = t; \ 107 else low = t; \ 108 } \ 109 for (_i=low; _i<high; _i++) { \ 110 if (rp[_i] > bcol) break; \ 111 if (rp[_i] == bcol) { \ 112 bap = ap + bs2*_i + bs*cidx + ridx; \ 113 if (addv == ADD_VALUES) *bap += value; \ 114 else *bap = value; \ 115 goto a_noinsert; \ 116 } \ 117 } \ 118 if (a->nonew == 1) goto a_noinsert; \ 119 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 120 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 121 N = nrow++ - 1; \ 122 /* shift up all the later entries in this row */ \ 123 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 124 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 125 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 126 rp[_i] = bcol; \ 127 ap[bs2*_i + bs*cidx + ridx] = value; \ 128 a_noinsert:; \ 129 ailen[brow] = nrow; \ 130 } 131 132 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 133 { \ 134 brow = row/bs; \ 135 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 136 rmax = bimax[brow]; nrow = bilen[brow]; \ 137 bcol = col/bs; \ 138 ridx = row % bs; cidx = col % bs; \ 139 low = 0; high = nrow; \ 140 while (high-low > 3) { \ 141 t = (low+high)/2; \ 142 if (rp[t] > bcol) high = t; \ 143 else low = t; \ 144 } \ 145 for (_i=low; _i<high; _i++) { \ 146 if (rp[_i] > bcol) break; \ 147 if (rp[_i] == bcol) { \ 148 bap = ap + bs2*_i + bs*cidx + ridx; \ 149 if (addv == ADD_VALUES) *bap += value; \ 150 else *bap = value; \ 151 goto b_noinsert; \ 152 } \ 153 } \ 154 if (b->nonew == 1) goto b_noinsert; \ 155 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 156 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 157 N = nrow++ - 1; \ 158 /* shift up all the later entries in this row */ \ 159 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 160 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 161 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 162 rp[_i] = bcol; \ 163 ap[bs2*_i + bs*cidx + ridx] = value; \ 164 b_noinsert:; \ 165 bilen[brow] = nrow; \ 166 } 167 168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 169 { 170 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 171 MatScalar value; 172 PetscBool roworiented = baij->roworiented; 173 PetscErrorCode ierr; 174 PetscInt i,j,row,col; 175 PetscInt rstart_orig=mat->rmap->rstart; 176 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 177 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 178 179 /* Some Variables required in the macro */ 180 Mat A = baij->A; 181 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 182 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 183 MatScalar *aa =a->a; 184 185 Mat B = baij->B; 186 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 187 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 188 MatScalar *ba =b->a; 189 190 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 191 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 192 MatScalar *ap,*bap; 193 194 PetscFunctionBegin; 195 for (i=0; i<m; i++) { 196 if (im[i] < 0) continue; 197 #if defined(PETSC_USE_DEBUG) 198 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 199 #endif 200 if (im[i] >= rstart_orig && im[i] < rend_orig) { 201 row = im[i] - rstart_orig; 202 for (j=0; j<n; j++) { 203 if (in[j] >= cstart_orig && in[j] < cend_orig) { 204 col = in[j] - cstart_orig; 205 if (roworiented) value = v[i*n+j]; 206 else value = v[i+j*m]; 207 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 208 } else if (in[j] < 0) continue; 209 #if defined(PETSC_USE_DEBUG) 210 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 211 #endif 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 238 } 239 } 240 } else { 241 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 246 } else { 247 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscErrorCode ierr; 261 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 262 PetscBool roworiented=a->roworiented; 263 const PetscScalar *value = v; 264 MatScalar *ap,*aa = a->a,*bap; 265 266 PetscFunctionBegin; 267 rp = aj + ai[row]; 268 ap = aa + bs2*ai[row]; 269 rmax = imax[row]; 270 nrow = ailen[row]; 271 value = v; 272 low = 0; 273 high = nrow; 274 while (high-low > 7) { 275 t = (low+high)/2; 276 if (rp[t] > col) high = t; 277 else low = t; 278 } 279 for (i=low; i<high; i++) { 280 if (rp[i] > col) break; 281 if (rp[i] == col) { 282 bap = ap + bs2*i; 283 if (roworiented) { 284 if (is == ADD_VALUES) { 285 for (ii=0; ii<bs; ii++) { 286 for (jj=ii; jj<bs2; jj+=bs) { 287 bap[jj] += *value++; 288 } 289 } 290 } else { 291 for (ii=0; ii<bs; ii++) { 292 for (jj=ii; jj<bs2; jj+=bs) { 293 bap[jj] = *value++; 294 } 295 } 296 } 297 } else { 298 if (is == ADD_VALUES) { 299 for (ii=0; ii<bs; ii++,value+=bs) { 300 for (jj=0; jj<bs; jj++) { 301 bap[jj] += value[jj]; 302 } 303 bap += bs; 304 } 305 } else { 306 for (ii=0; ii<bs; ii++,value+=bs) { 307 for (jj=0; jj<bs; jj++) { 308 bap[jj] = value[jj]; 309 } 310 bap += bs; 311 } 312 } 313 } 314 goto noinsert2; 315 } 316 } 317 if (nonew == 1) goto noinsert2; 318 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 319 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 320 N = nrow++ - 1; high++; 321 /* shift up all the later entries in this row */ 322 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 323 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 324 rp[i] = col; 325 bap = ap + bs2*i; 326 if (roworiented) { 327 for (ii=0; ii<bs; ii++) { 328 for (jj=ii; jj<bs2; jj+=bs) { 329 bap[jj] = *value++; 330 } 331 } 332 } else { 333 for (ii=0; ii<bs; ii++) { 334 for (jj=0; jj<bs; jj++) { 335 *bap++ = *value++; 336 } 337 } 338 } 339 noinsert2:; 340 ailen[row] = nrow; 341 PetscFunctionReturn(0); 342 } 343 344 /* 345 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 346 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 347 */ 348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 349 { 350 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 351 const PetscScalar *value; 352 MatScalar *barray = baij->barray; 353 PetscBool roworiented = baij->roworiented; 354 PetscErrorCode ierr; 355 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 356 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 357 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 358 359 PetscFunctionBegin; 360 if (!barray) { 361 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 362 baij->barray = barray; 363 } 364 365 if (roworiented) stepval = (n-1)*bs; 366 else stepval = (m-1)*bs; 367 368 for (i=0; i<m; i++) { 369 if (im[i] < 0) continue; 370 #if defined(PETSC_USE_DEBUG) 371 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 372 #endif 373 if (im[i] >= rstart && im[i] < rend) { 374 row = im[i] - rstart; 375 for (j=0; j<n; j++) { 376 /* If NumCol = 1 then a copy is not required */ 377 if ((roworiented) && (n == 1)) { 378 barray = (MatScalar*)v + i*bs2; 379 } else if ((!roworiented) && (m == 1)) { 380 barray = (MatScalar*)v + j*bs2; 381 } else { /* Here a copy is required */ 382 if (roworiented) { 383 value = v + (i*(stepval+bs) + j)*bs; 384 } else { 385 value = v + (j*(stepval+bs) + i)*bs; 386 } 387 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 388 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 389 barray += bs; 390 } 391 barray -= bs2; 392 } 393 394 if (in[j] >= cstart && in[j] < cend) { 395 col = in[j] - cstart; 396 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 397 } else if (in[j] < 0) continue; 398 #if defined(PETSC_USE_DEBUG) 399 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 400 #endif 401 else { 402 if (mat->was_assembled) { 403 if (!baij->colmap) { 404 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 405 } 406 407 #if defined(PETSC_USE_DEBUG) 408 #if defined(PETSC_USE_CTABLE) 409 { PetscInt data; 410 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 411 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 412 } 413 #else 414 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 415 #endif 416 #endif 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 419 col = (col - 1)/bs; 420 #else 421 col = (baij->colmap[in[j]] - 1)/bs; 422 #endif 423 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 424 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 425 col = in[j]; 426 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 427 } else col = in[j]; 428 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 429 } 430 } 431 } else { 432 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 433 if (!baij->donotstash) { 434 if (roworiented) { 435 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 436 } else { 437 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 438 } 439 } 440 } 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #define HASH_KEY 0.6180339887 446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 450 { 451 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 452 PetscBool roworiented = baij->roworiented; 453 PetscErrorCode ierr; 454 PetscInt i,j,row,col; 455 PetscInt rstart_orig=mat->rmap->rstart; 456 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 457 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 458 PetscReal tmp; 459 MatScalar **HD = baij->hd,value; 460 #if defined(PETSC_USE_DEBUG) 461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 462 #endif 463 464 PetscFunctionBegin; 465 for (i=0; i<m; i++) { 466 #if defined(PETSC_USE_DEBUG) 467 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 468 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 469 #endif 470 row = im[i]; 471 if (row >= rstart_orig && row < rend_orig) { 472 for (j=0; j<n; j++) { 473 col = in[j]; 474 if (roworiented) value = v[i*n+j]; 475 else value = v[i+j*m]; 476 /* Look up PetscInto the Hash Table */ 477 key = (row/bs)*Nbs+(col/bs)+1; 478 h1 = HASH(size,key,tmp); 479 480 481 idx = h1; 482 #if defined(PETSC_USE_DEBUG) 483 insert_ct++; 484 total_ct++; 485 if (HT[idx] != key) { 486 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487 if (idx == size) { 488 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490 } 491 } 492 #else 493 if (HT[idx] != key) { 494 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495 if (idx == size) { 496 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498 } 499 } 500 #endif 501 /* A HASH table entry is found, so insert the values at the correct address */ 502 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 503 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 504 } 505 } else if (!baij->donotstash) { 506 if (roworiented) { 507 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 508 } else { 509 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 510 } 511 } 512 } 513 #if defined(PETSC_USE_DEBUG) 514 baij->ht_total_ct += total_ct; 515 baij->ht_insert_ct += insert_ct; 516 #endif 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 521 { 522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 523 PetscBool roworiented = baij->roworiented; 524 PetscErrorCode ierr; 525 PetscInt i,j,ii,jj,row,col; 526 PetscInt rstart=baij->rstartbs; 527 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 528 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 529 PetscReal tmp; 530 MatScalar **HD = baij->hd,*baij_a; 531 const PetscScalar *v_t,*value; 532 #if defined(PETSC_USE_DEBUG) 533 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 534 #endif 535 536 PetscFunctionBegin; 537 if (roworiented) stepval = (n-1)*bs; 538 else stepval = (m-1)*bs; 539 540 for (i=0; i<m; i++) { 541 #if defined(PETSC_USE_DEBUG) 542 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 543 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 544 #endif 545 row = im[i]; 546 v_t = v + i*nbs2; 547 if (row >= rstart && row < rend) { 548 for (j=0; j<n; j++) { 549 col = in[j]; 550 551 /* Look up into the Hash Table */ 552 key = row*Nbs+col+1; 553 h1 = HASH(size,key,tmp); 554 555 idx = h1; 556 #if defined(PETSC_USE_DEBUG) 557 total_ct++; 558 insert_ct++; 559 if (HT[idx] != key) { 560 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 561 if (idx == size) { 562 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 563 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 564 } 565 } 566 #else 567 if (HT[idx] != key) { 568 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 569 if (idx == size) { 570 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 571 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 572 } 573 } 574 #endif 575 baij_a = HD[idx]; 576 if (roworiented) { 577 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 578 /* value = v + (i*(stepval+bs)+j)*bs; */ 579 value = v_t; 580 v_t += bs; 581 if (addv == ADD_VALUES) { 582 for (ii=0; ii<bs; ii++,value+=stepval) { 583 for (jj=ii; jj<bs2; jj+=bs) { 584 baij_a[jj] += *value++; 585 } 586 } 587 } else { 588 for (ii=0; ii<bs; ii++,value+=stepval) { 589 for (jj=ii; jj<bs2; jj+=bs) { 590 baij_a[jj] = *value++; 591 } 592 } 593 } 594 } else { 595 value = v + j*(stepval+bs)*bs + i*bs; 596 if (addv == ADD_VALUES) { 597 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 598 for (jj=0; jj<bs; jj++) { 599 baij_a[jj] += *value++; 600 } 601 } 602 } else { 603 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 604 for (jj=0; jj<bs; jj++) { 605 baij_a[jj] = *value++; 606 } 607 } 608 } 609 } 610 } 611 } else { 612 if (!baij->donotstash) { 613 if (roworiented) { 614 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 615 } else { 616 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 617 } 618 } 619 } 620 } 621 #if defined(PETSC_USE_DEBUG) 622 baij->ht_total_ct += total_ct; 623 baij->ht_insert_ct += insert_ct; 624 #endif 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 640 row = idxm[i] - bsrstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= bscstart && idxn[j] < bscend) { 645 col = idxn[j] - bscstart; 646 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!baij->colmap) { 649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 653 data--; 654 #else 655 data = baij->colmap[idxn[j]/bs]-1; 656 #endif 657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 658 else { 659 col = data + idxn[j]%bs; 660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 670 { 671 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 672 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 673 PetscErrorCode ierr; 674 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 675 PetscReal sum = 0.0; 676 MatScalar *v; 677 678 PetscFunctionBegin; 679 if (baij->size == 1) { 680 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 681 } else { 682 if (type == NORM_FROBENIUS) { 683 v = amat->a; 684 nz = amat->nz*bs2; 685 for (i=0; i<nz; i++) { 686 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 687 } 688 v = bmat->a; 689 nz = bmat->nz*bs2; 690 for (i=0; i<nz; i++) { 691 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 692 } 693 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 694 *nrm = PetscSqrtReal(*nrm); 695 } else if (type == NORM_1) { /* max column sum */ 696 PetscReal *tmp,*tmp2; 697 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 698 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 699 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 700 v = amat->a; jj = amat->j; 701 for (i=0; i<amat->nz; i++) { 702 for (j=0; j<bs; j++) { 703 col = bs*(cstart + *jj) + j; /* column index */ 704 for (row=0; row<bs; row++) { 705 tmp[col] += PetscAbsScalar(*v); v++; 706 } 707 } 708 jj++; 709 } 710 v = bmat->a; jj = bmat->j; 711 for (i=0; i<bmat->nz; i++) { 712 for (j=0; j<bs; j++) { 713 col = bs*garray[*jj] + j; 714 for (row=0; row<bs; row++) { 715 tmp[col] += PetscAbsScalar(*v); v++; 716 } 717 } 718 jj++; 719 } 720 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 721 *nrm = 0.0; 722 for (j=0; j<mat->cmap->N; j++) { 723 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 724 } 725 ierr = PetscFree(tmp);CHKERRQ(ierr); 726 ierr = PetscFree(tmp2);CHKERRQ(ierr); 727 } else if (type == NORM_INFINITY) { /* max row sum */ 728 PetscReal *sums; 729 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 730 sum = 0.0; 731 for (j=0; j<amat->mbs; j++) { 732 for (row=0; row<bs; row++) sums[row] = 0.0; 733 v = amat->a + bs2*amat->i[j]; 734 nz = amat->i[j+1]-amat->i[j]; 735 for (i=0; i<nz; i++) { 736 for (col=0; col<bs; col++) { 737 for (row=0; row<bs; row++) { 738 sums[row] += PetscAbsScalar(*v); v++; 739 } 740 } 741 } 742 v = bmat->a + bs2*bmat->i[j]; 743 nz = bmat->i[j+1]-bmat->i[j]; 744 for (i=0; i<nz; i++) { 745 for (col=0; col<bs; col++) { 746 for (row=0; row<bs; row++) { 747 sums[row] += PetscAbsScalar(*v); v++; 748 } 749 } 750 } 751 for (row=0; row<bs; row++) { 752 if (sums[row] > sum) sum = sums[row]; 753 } 754 } 755 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 756 ierr = PetscFree(sums);CHKERRQ(ierr); 757 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 758 } 759 PetscFunctionReturn(0); 760 } 761 762 /* 763 Creates the hash table, and sets the table 764 This table is created only once. 765 If new entried need to be added to the matrix 766 then the hash table has to be destroyed and 767 recreated. 768 */ 769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 770 { 771 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 772 Mat A = baij->A,B=baij->B; 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 774 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 775 PetscErrorCode ierr; 776 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 777 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 778 PetscInt *HT,key; 779 MatScalar **HD; 780 PetscReal tmp; 781 #if defined(PETSC_USE_INFO) 782 PetscInt ct=0,max=0; 783 #endif 784 785 PetscFunctionBegin; 786 if (baij->ht) PetscFunctionReturn(0); 787 788 baij->ht_size = (PetscInt)(factor*nz); 789 ht_size = baij->ht_size; 790 791 /* Allocate Memory for Hash Table */ 792 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 793 HD = baij->hd; 794 HT = baij->ht; 795 796 /* Loop Over A */ 797 for (i=0; i<a->mbs; i++) { 798 for (j=ai[i]; j<ai[i+1]; j++) { 799 row = i+rstart; 800 col = aj[j]+cstart; 801 802 key = row*Nbs + col + 1; 803 h1 = HASH(ht_size,key,tmp); 804 for (k=0; k<ht_size; k++) { 805 if (!HT[(h1+k)%ht_size]) { 806 HT[(h1+k)%ht_size] = key; 807 HD[(h1+k)%ht_size] = a->a + j*bs2; 808 break; 809 #if defined(PETSC_USE_INFO) 810 } else { 811 ct++; 812 #endif 813 } 814 } 815 #if defined(PETSC_USE_INFO) 816 if (k> max) max = k; 817 #endif 818 } 819 } 820 /* Loop Over B */ 821 for (i=0; i<b->mbs; i++) { 822 for (j=bi[i]; j<bi[i+1]; j++) { 823 row = i+rstart; 824 col = garray[bj[j]]; 825 key = row*Nbs + col + 1; 826 h1 = HASH(ht_size,key,tmp); 827 for (k=0; k<ht_size; k++) { 828 if (!HT[(h1+k)%ht_size]) { 829 HT[(h1+k)%ht_size] = key; 830 HD[(h1+k)%ht_size] = b->a + j*bs2; 831 break; 832 #if defined(PETSC_USE_INFO) 833 } else { 834 ct++; 835 #endif 836 } 837 } 838 #if defined(PETSC_USE_INFO) 839 if (k> max) max = k; 840 #endif 841 } 842 } 843 844 /* Print Summary */ 845 #if defined(PETSC_USE_INFO) 846 for (i=0,j=0; i<ht_size; i++) { 847 if (HT[i]) j++; 848 } 849 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 850 #endif 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 857 PetscErrorCode ierr; 858 PetscInt nstash,reallocs; 859 860 PetscFunctionBegin; 861 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 862 863 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 864 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 865 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 866 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 867 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 868 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 873 { 874 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 875 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 876 PetscErrorCode ierr; 877 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 878 PetscInt *row,*col; 879 PetscBool r1,r2,r3,other_disassembled; 880 MatScalar *val; 881 PetscMPIInt n; 882 883 PetscFunctionBegin; 884 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 885 if (!baij->donotstash && !mat->nooffprocentries) { 886 while (1) { 887 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 888 if (!flg) break; 889 890 for (i=0; i<n;) { 891 /* Now identify the consecutive vals belonging to the same row */ 892 for (j=i,rstart=row[j]; j<n; j++) { 893 if (row[j] != rstart) break; 894 } 895 if (j < n) ncols = j-i; 896 else ncols = n-i; 897 /* Now assemble all these values with a single function call */ 898 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 899 i = j; 900 } 901 } 902 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 903 /* Now process the block-stash. Since the values are stashed column-oriented, 904 set the roworiented flag to column oriented, and after MatSetValues() 905 restore the original flags */ 906 r1 = baij->roworiented; 907 r2 = a->roworiented; 908 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 909 910 baij->roworiented = PETSC_FALSE; 911 a->roworiented = PETSC_FALSE; 912 913 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 914 while (1) { 915 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 916 if (!flg) break; 917 918 for (i=0; i<n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j=i,rstart=row[j]; j<n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j-i; 924 else ncols = n-i; 925 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 926 i = j; 927 } 928 } 929 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 930 931 baij->roworiented = r1; 932 a->roworiented = r2; 933 934 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 935 } 936 937 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 938 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 939 940 /* determine if any processor has disassembled, if so we must 941 also disassemble ourselfs, in order that we may reassemble. */ 942 /* 943 if nonzero structure of submatrix B cannot change then we know that 944 no processor disassembled thus we can skip this stuff 945 */ 946 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 947 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 948 if (mat->was_assembled && !other_disassembled) { 949 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 950 } 951 } 952 953 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 954 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 955 } 956 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 957 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 958 959 #if defined(PETSC_USE_INFO) 960 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 961 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 962 963 baij->ht_total_ct = 0; 964 baij->ht_insert_ct = 0; 965 } 966 #endif 967 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 968 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 969 970 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 971 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 972 } 973 974 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 975 976 baij->rowvalues = 0; 977 978 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 979 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 980 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 981 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 982 } 983 PetscFunctionReturn(0); 984 } 985 986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 987 #include <petscdraw.h> 988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 989 { 990 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 991 PetscErrorCode ierr; 992 PetscMPIInt rank = baij->rank; 993 PetscInt bs = mat->rmap->bs; 994 PetscBool iascii,isdraw; 995 PetscViewer sviewer; 996 PetscViewerFormat format; 997 998 PetscFunctionBegin; 999 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1000 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1001 if (iascii) { 1002 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1003 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1004 MatInfo info; 1005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1006 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1007 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1008 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1009 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1010 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1011 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1012 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1013 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1014 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1015 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1016 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1017 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1020 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1023 PetscFunctionReturn(0); 1024 } 1025 } 1026 1027 if (isdraw) { 1028 PetscDraw draw; 1029 PetscBool isnull; 1030 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1031 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1032 if (isnull) PetscFunctionReturn(0); 1033 } 1034 1035 { 1036 /* assemble the entire matrix onto first processor. */ 1037 Mat A; 1038 Mat_SeqBAIJ *Aloc; 1039 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1040 MatScalar *a; 1041 const char *matname; 1042 1043 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1044 /* Perhaps this should be the type of mat? */ 1045 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1046 if (!rank) { 1047 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1048 } else { 1049 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1050 } 1051 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 1052 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 1053 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1054 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1055 1056 /* copy over the A part */ 1057 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1058 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1059 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1060 1061 for (i=0; i<mbs; i++) { 1062 rvals[0] = bs*(baij->rstartbs + i); 1063 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1064 for (j=ai[i]; j<ai[i+1]; j++) { 1065 col = (baij->cstartbs+aj[j])*bs; 1066 for (k=0; k<bs; k++) { 1067 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1068 col++; a += bs; 1069 } 1070 } 1071 } 1072 /* copy over the B part */ 1073 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1074 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1075 for (i=0; i<mbs; i++) { 1076 rvals[0] = bs*(baij->rstartbs + i); 1077 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1078 for (j=ai[i]; j<ai[i+1]; j++) { 1079 col = baij->garray[aj[j]]*bs; 1080 for (k=0; k<bs; k++) { 1081 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1082 col++; a += bs; 1083 } 1084 } 1085 } 1086 ierr = PetscFree(rvals);CHKERRQ(ierr); 1087 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1088 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1089 /* 1090 Everyone has to call to draw the matrix since the graphics waits are 1091 synchronized across all processors that share the PetscDraw object 1092 */ 1093 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1094 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1095 if (!rank) { 1096 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1097 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1098 } 1099 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1100 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A);CHKERRQ(ierr); 1102 } 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1107 { 1108 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1109 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1110 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1111 const PetscInt *garray = aij->garray; 1112 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1113 PetscInt *rowlens,*colidxs; 1114 PetscScalar *matvals; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1119 1120 M = mat->rmap->N; 1121 N = mat->cmap->N; 1122 m = mat->rmap->n; 1123 rs = mat->rmap->rstart; 1124 cs = mat->cmap->rstart; 1125 bs = mat->rmap->bs; 1126 nz = bs*bs*(A->nz + B->nz); 1127 1128 /* write matrix header */ 1129 header[0] = MAT_FILE_CLASSID; 1130 header[1] = M; header[2] = N; header[3] = nz; 1131 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1132 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1133 1134 /* fill in and store row lengths */ 1135 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1136 for (cnt=0, i=0; i<A->mbs; i++) 1137 for (j=0; j<bs; j++) 1138 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1139 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1140 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1141 1142 /* fill in and store column indices */ 1143 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1144 for (cnt=0, i=0; i<A->mbs; i++) { 1145 for (k=0; k<bs; k++) { 1146 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1147 if (garray[B->j[jb]] > cs/bs) break; 1148 for (l=0; l<bs; l++) 1149 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1150 } 1151 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1152 for (l=0; l<bs; l++) 1153 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1154 for (; jb<B->i[i+1]; jb++) 1155 for (l=0; l<bs; l++) 1156 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1157 } 1158 } 1159 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1160 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1161 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1162 1163 /* fill in and store nonzero values */ 1164 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1165 for (cnt=0, i=0; i<A->mbs; i++) { 1166 for (k=0; k<bs; k++) { 1167 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1168 if (garray[B->j[jb]] > cs/bs) break; 1169 for (l=0; l<bs; l++) 1170 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1171 } 1172 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1173 for (l=0; l<bs; l++) 1174 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1175 for (; jb<B->i[i+1]; jb++) 1176 for (l=0; l<bs; l++) 1177 matvals[cnt++] = B->a[bs*(bs*jb + l) + k];; 1178 } 1179 } 1180 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1181 ierr = PetscFree(matvals);CHKERRQ(ierr); 1182 1183 /* write block size option to the viewer's .info file */ 1184 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1189 { 1190 PetscErrorCode ierr; 1191 PetscBool iascii,isdraw,issocket,isbinary; 1192 1193 PetscFunctionBegin; 1194 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1195 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1196 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1197 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1198 if (iascii || isdraw || issocket) { 1199 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1200 } else if (isbinary) { 1201 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1202 } 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1207 { 1208 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 #if defined(PETSC_USE_LOG) 1213 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 1214 #endif 1215 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1216 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1217 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 1218 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1219 #if defined(PETSC_USE_CTABLE) 1220 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 1221 #else 1222 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 1223 #endif 1224 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 1225 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 1226 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1227 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 1228 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1229 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1230 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1231 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1232 1233 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1238 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 1242 #if defined(PETSC_HAVE_HYPRE) 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 1244 #endif 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1251 { 1252 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 PetscInt nt; 1255 1256 PetscFunctionBegin; 1257 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1258 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1259 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1260 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1261 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1262 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1269 { 1270 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1271 PetscErrorCode ierr; 1272 1273 PetscFunctionBegin; 1274 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1275 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1276 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1277 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1282 { 1283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1284 PetscErrorCode ierr; 1285 1286 PetscFunctionBegin; 1287 /* do nondiagonal part */ 1288 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1289 /* do local part */ 1290 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1291 /* add partial results together */ 1292 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1293 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1300 PetscErrorCode ierr; 1301 1302 PetscFunctionBegin; 1303 /* do nondiagonal part */ 1304 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1305 /* do local part */ 1306 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1307 /* add partial results together */ 1308 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1309 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1310 PetscFunctionReturn(0); 1311 } 1312 1313 /* 1314 This only works correctly for square matrices where the subblock A->A is the 1315 diagonal block 1316 */ 1317 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1318 { 1319 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1324 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1329 { 1330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1335 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1342 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1343 PetscErrorCode ierr; 1344 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1345 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1346 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1347 1348 PetscFunctionBegin; 1349 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1350 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1351 mat->getrowactive = PETSC_TRUE; 1352 1353 if (!mat->rowvalues && (idx || v)) { 1354 /* 1355 allocate enough space to hold information from the longest row. 1356 */ 1357 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1358 PetscInt max = 1,mbs = mat->mbs,tmp; 1359 for (i=0; i<mbs; i++) { 1360 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1361 if (max < tmp) max = tmp; 1362 } 1363 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1364 } 1365 lrow = row - brstart; 1366 1367 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1368 if (!v) {pvA = 0; pvB = 0;} 1369 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1370 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1371 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1372 nztot = nzA + nzB; 1373 1374 cmap = mat->garray; 1375 if (v || idx) { 1376 if (nztot) { 1377 /* Sort by increasing column numbers, assuming A and B already sorted */ 1378 PetscInt imark = -1; 1379 if (v) { 1380 *v = v_p = mat->rowvalues; 1381 for (i=0; i<nzB; i++) { 1382 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1383 else break; 1384 } 1385 imark = i; 1386 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1387 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1388 } 1389 if (idx) { 1390 *idx = idx_p = mat->rowindices; 1391 if (imark > -1) { 1392 for (i=0; i<imark; i++) { 1393 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1394 } 1395 } else { 1396 for (i=0; i<nzB; i++) { 1397 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1398 else break; 1399 } 1400 imark = i; 1401 } 1402 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1403 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1404 } 1405 } else { 1406 if (idx) *idx = 0; 1407 if (v) *v = 0; 1408 } 1409 } 1410 *nz = nztot; 1411 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1412 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1417 { 1418 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1419 1420 PetscFunctionBegin; 1421 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1422 baij->getrowactive = PETSC_FALSE; 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1427 { 1428 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 1431 PetscFunctionBegin; 1432 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1433 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1438 { 1439 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1440 Mat A = a->A,B = a->B; 1441 PetscErrorCode ierr; 1442 PetscLogDouble isend[5],irecv[5]; 1443 1444 PetscFunctionBegin; 1445 info->block_size = (PetscReal)matin->rmap->bs; 1446 1447 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1448 1449 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1450 isend[3] = info->memory; isend[4] = info->mallocs; 1451 1452 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1453 1454 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1455 isend[3] += info->memory; isend[4] += info->mallocs; 1456 1457 if (flag == MAT_LOCAL) { 1458 info->nz_used = isend[0]; 1459 info->nz_allocated = isend[1]; 1460 info->nz_unneeded = isend[2]; 1461 info->memory = isend[3]; 1462 info->mallocs = isend[4]; 1463 } else if (flag == MAT_GLOBAL_MAX) { 1464 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1465 1466 info->nz_used = irecv[0]; 1467 info->nz_allocated = irecv[1]; 1468 info->nz_unneeded = irecv[2]; 1469 info->memory = irecv[3]; 1470 info->mallocs = irecv[4]; 1471 } else if (flag == MAT_GLOBAL_SUM) { 1472 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1473 1474 info->nz_used = irecv[0]; 1475 info->nz_allocated = irecv[1]; 1476 info->nz_unneeded = irecv[2]; 1477 info->memory = irecv[3]; 1478 info->mallocs = irecv[4]; 1479 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1480 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1481 info->fill_ratio_needed = 0; 1482 info->factor_mallocs = 0; 1483 PetscFunctionReturn(0); 1484 } 1485 1486 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1487 { 1488 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1489 PetscErrorCode ierr; 1490 1491 PetscFunctionBegin; 1492 switch (op) { 1493 case MAT_NEW_NONZERO_LOCATIONS: 1494 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1495 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1496 case MAT_KEEP_NONZERO_PATTERN: 1497 case MAT_NEW_NONZERO_LOCATION_ERR: 1498 MatCheckPreallocated(A,1); 1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1500 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1501 break; 1502 case MAT_ROW_ORIENTED: 1503 MatCheckPreallocated(A,1); 1504 a->roworiented = flg; 1505 1506 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1507 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1508 break; 1509 case MAT_NEW_DIAGONALS: 1510 case MAT_SORTED_FULL: 1511 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1512 break; 1513 case MAT_IGNORE_OFF_PROC_ENTRIES: 1514 a->donotstash = flg; 1515 break; 1516 case MAT_USE_HASH_TABLE: 1517 a->ht_flag = flg; 1518 a->ht_fact = 1.39; 1519 break; 1520 case MAT_SYMMETRIC: 1521 case MAT_STRUCTURALLY_SYMMETRIC: 1522 case MAT_HERMITIAN: 1523 case MAT_SUBMAT_SINGLEIS: 1524 case MAT_SYMMETRY_ETERNAL: 1525 MatCheckPreallocated(A,1); 1526 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1527 break; 1528 default: 1529 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1535 { 1536 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1537 Mat_SeqBAIJ *Aloc; 1538 Mat B; 1539 PetscErrorCode ierr; 1540 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1541 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1542 MatScalar *a; 1543 1544 PetscFunctionBegin; 1545 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1546 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1547 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1548 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1549 /* Do not know preallocation information, but must set block size */ 1550 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1551 } else { 1552 B = *matout; 1553 } 1554 1555 /* copy over the A part */ 1556 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1557 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1558 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 1559 1560 for (i=0; i<mbs; i++) { 1561 rvals[0] = bs*(baij->rstartbs + i); 1562 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1563 for (j=ai[i]; j<ai[i+1]; j++) { 1564 col = (baij->cstartbs+aj[j])*bs; 1565 for (k=0; k<bs; k++) { 1566 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1567 1568 col++; a += bs; 1569 } 1570 } 1571 } 1572 /* copy over the B part */ 1573 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1574 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1575 for (i=0; i<mbs; i++) { 1576 rvals[0] = bs*(baij->rstartbs + i); 1577 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 col = baij->garray[aj[j]]*bs; 1580 for (k=0; k<bs; k++) { 1581 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1582 col++; 1583 a += bs; 1584 } 1585 } 1586 } 1587 ierr = PetscFree(rvals);CHKERRQ(ierr); 1588 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1589 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1590 1591 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1592 else { 1593 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1599 { 1600 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1601 Mat a = baij->A,b = baij->B; 1602 PetscErrorCode ierr; 1603 PetscInt s1,s2,s3; 1604 1605 PetscFunctionBegin; 1606 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1607 if (rr) { 1608 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1609 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1610 /* Overlap communication with computation. */ 1611 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 } 1613 if (ll) { 1614 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1615 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1616 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1617 } 1618 /* scale the diagonal block */ 1619 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1620 1621 if (rr) { 1622 /* Do a scatter end and then right scale the off-diagonal block */ 1623 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1630 { 1631 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1632 PetscInt *lrows; 1633 PetscInt r, len; 1634 PetscBool cong; 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 /* get locally owned rows */ 1639 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 1640 /* fix right hand side if needed */ 1641 if (x && b) { 1642 const PetscScalar *xx; 1643 PetscScalar *bb; 1644 1645 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 1646 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1647 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1648 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 1649 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1650 } 1651 1652 /* actually zap the local rows */ 1653 /* 1654 Zero the required rows. If the "diagonal block" of the matrix 1655 is square and the user wishes to set the diagonal we use separate 1656 code so that MatSetValues() is not called for each diagonal allocating 1657 new memory, thus calling lots of mallocs and slowing things down. 1658 1659 */ 1660 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1661 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1662 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1663 if ((diag != 0.0) && cong) { 1664 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1665 } else if (diag != 0.0) { 1666 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1667 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1668 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1669 for (r = 0; r < len; ++r) { 1670 const PetscInt row = lrows[r] + A->rmap->rstart; 1671 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1674 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1675 } else { 1676 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1677 } 1678 ierr = PetscFree(lrows);CHKERRQ(ierr); 1679 1680 /* only change matrix nonzero state if pattern was allowed to be changed */ 1681 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1682 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1683 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1689 { 1690 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 PetscMPIInt n = A->rmap->n,p = 0; 1693 PetscInt i,j,k,r,len = 0,row,col,count; 1694 PetscInt *lrows,*owners = A->rmap->range; 1695 PetscSFNode *rrows; 1696 PetscSF sf; 1697 const PetscScalar *xx; 1698 PetscScalar *bb,*mask; 1699 Vec xmask,lmask; 1700 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1701 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1702 PetscScalar *aa; 1703 1704 PetscFunctionBegin; 1705 /* Create SF where leaves are input rows and roots are owned rows */ 1706 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1707 for (r = 0; r < n; ++r) lrows[r] = -1; 1708 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1709 for (r = 0; r < N; ++r) { 1710 const PetscInt idx = rows[r]; 1711 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1712 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1713 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1714 } 1715 rrows[r].rank = p; 1716 rrows[r].index = rows[r] - owners[p]; 1717 } 1718 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1719 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1720 /* Collect flags for rows to be zeroed */ 1721 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1722 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1723 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1724 /* Compress and put in row numbers */ 1725 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1726 /* zero diagonal part of matrix */ 1727 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1728 /* handle off diagonal part of matrix */ 1729 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1730 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1731 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1732 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1733 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1734 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1735 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1736 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1737 if (x) { 1738 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1739 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1740 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1741 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1742 } 1743 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1744 /* remove zeroed rows of off diagonal matrix */ 1745 for (i = 0; i < len; ++i) { 1746 row = lrows[i]; 1747 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1748 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1749 for (k = 0; k < count; ++k) { 1750 aa[0] = 0.0; 1751 aa += bs; 1752 } 1753 } 1754 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1755 for (i = 0; i < l->B->rmap->N; ++i) { 1756 row = i/bs; 1757 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1758 for (k = 0; k < bs; ++k) { 1759 col = bs*baij->j[j] + k; 1760 if (PetscAbsScalar(mask[col])) { 1761 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1762 if (x) bb[i] -= aa[0]*xx[col]; 1763 aa[0] = 0.0; 1764 } 1765 } 1766 } 1767 } 1768 if (x) { 1769 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1770 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1771 } 1772 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1773 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1774 ierr = PetscFree(lrows);CHKERRQ(ierr); 1775 1776 /* only change matrix nonzero state if pattern was allowed to be changed */ 1777 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1778 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1779 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1785 { 1786 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1787 PetscErrorCode ierr; 1788 1789 PetscFunctionBegin; 1790 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1791 PetscFunctionReturn(0); 1792 } 1793 1794 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1795 1796 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1797 { 1798 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1799 Mat a,b,c,d; 1800 PetscBool flg; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 a = matA->A; b = matA->B; 1805 c = matB->A; d = matB->B; 1806 1807 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1808 if (flg) { 1809 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1810 } 1811 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1816 { 1817 PetscErrorCode ierr; 1818 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1819 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1820 1821 PetscFunctionBegin; 1822 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1823 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1824 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1825 } else { 1826 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1827 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1828 } 1829 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1834 { 1835 PetscErrorCode ierr; 1836 1837 PetscFunctionBegin; 1838 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1843 { 1844 PetscErrorCode ierr; 1845 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1846 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1847 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1848 1849 PetscFunctionBegin; 1850 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1855 { 1856 PetscErrorCode ierr; 1857 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1858 PetscBLASInt bnz,one=1; 1859 Mat_SeqBAIJ *x,*y; 1860 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1861 1862 PetscFunctionBegin; 1863 if (str == SAME_NONZERO_PATTERN) { 1864 PetscScalar alpha = a; 1865 x = (Mat_SeqBAIJ*)xx->A->data; 1866 y = (Mat_SeqBAIJ*)yy->A->data; 1867 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1868 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1869 x = (Mat_SeqBAIJ*)xx->B->data; 1870 y = (Mat_SeqBAIJ*)yy->B->data; 1871 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 1872 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1873 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1874 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1875 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1876 } else { 1877 Mat B; 1878 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1879 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1880 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1881 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1882 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1883 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1884 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1885 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 1886 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1887 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1888 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1889 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1890 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1891 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 1892 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1893 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1899 { 1900 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1901 PetscErrorCode ierr; 1902 1903 PetscFunctionBegin; 1904 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1905 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1906 PetscFunctionReturn(0); 1907 } 1908 1909 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1910 { 1911 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1916 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1921 { 1922 PetscErrorCode ierr; 1923 IS iscol_local; 1924 PetscInt csize; 1925 1926 PetscFunctionBegin; 1927 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1928 if (call == MAT_REUSE_MATRIX) { 1929 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1930 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1931 } else { 1932 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1933 } 1934 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1935 if (call == MAT_INITIAL_MATRIX) { 1936 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1937 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 /* 1943 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1944 in local and then by concatenating the local matrices the end result. 1945 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1946 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1947 */ 1948 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1949 { 1950 PetscErrorCode ierr; 1951 PetscMPIInt rank,size; 1952 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1953 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1954 Mat M,Mreuse; 1955 MatScalar *vwork,*aa; 1956 MPI_Comm comm; 1957 IS isrow_new, iscol_new; 1958 Mat_SeqBAIJ *aij; 1959 1960 PetscFunctionBegin; 1961 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1962 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1963 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1964 /* The compression and expansion should be avoided. Doesn't point 1965 out errors, might change the indices, hence buggey */ 1966 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 1967 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 1968 1969 if (call == MAT_REUSE_MATRIX) { 1970 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1971 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1972 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 1973 } else { 1974 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 1975 } 1976 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 1977 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 1978 /* 1979 m - number of local rows 1980 n - number of columns (same on all processors) 1981 rstart - first row in new global matrix generated 1982 */ 1983 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 1984 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1985 m = m/bs; 1986 n = n/bs; 1987 1988 if (call == MAT_INITIAL_MATRIX) { 1989 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1990 ii = aij->i; 1991 jj = aij->j; 1992 1993 /* 1994 Determine the number of non-zeros in the diagonal and off-diagonal 1995 portions of the matrix in order to do correct preallocation 1996 */ 1997 1998 /* first get start and end of "diagonal" columns */ 1999 if (csize == PETSC_DECIDE) { 2000 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2001 if (mglobal == n*bs) { /* square matrix */ 2002 nlocal = m; 2003 } else { 2004 nlocal = n/size + ((n % size) > rank); 2005 } 2006 } else { 2007 nlocal = csize/bs; 2008 } 2009 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2010 rstart = rend - nlocal; 2011 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2012 2013 /* next, compute all the lengths */ 2014 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 2015 for (i=0; i<m; i++) { 2016 jend = ii[i+1] - ii[i]; 2017 olen = 0; 2018 dlen = 0; 2019 for (j=0; j<jend; j++) { 2020 if (*jj < rstart || *jj >= rend) olen++; 2021 else dlen++; 2022 jj++; 2023 } 2024 olens[i] = olen; 2025 dlens[i] = dlen; 2026 } 2027 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2028 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 2029 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 2030 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2031 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2032 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 2033 } else { 2034 PetscInt ml,nl; 2035 2036 M = *newmat; 2037 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2038 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2039 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2040 /* 2041 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2042 rather than the slower MatSetValues(). 2043 */ 2044 M->was_assembled = PETSC_TRUE; 2045 M->assembled = PETSC_FALSE; 2046 } 2047 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2048 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2049 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2050 ii = aij->i; 2051 jj = aij->j; 2052 aa = aij->a; 2053 for (i=0; i<m; i++) { 2054 row = rstart/bs + i; 2055 nz = ii[i+1] - ii[i]; 2056 cwork = jj; jj += nz; 2057 vwork = aa; aa += nz*bs*bs; 2058 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 2061 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 *newmat = M; 2064 2065 /* save submatrix used in processor for next request */ 2066 if (call == MAT_INITIAL_MATRIX) { 2067 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2068 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2074 { 2075 MPI_Comm comm,pcomm; 2076 PetscInt clocal_size,nrows; 2077 const PetscInt *rows; 2078 PetscMPIInt size; 2079 IS crowp,lcolp; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2084 /* make a collective version of 'rowp' */ 2085 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 2086 if (pcomm==comm) { 2087 crowp = rowp; 2088 } else { 2089 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 2090 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 2091 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 2092 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 2093 } 2094 ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2095 /* make a local version of 'colp' */ 2096 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2097 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2098 if (size==1) { 2099 lcolp = colp; 2100 } else { 2101 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 2102 } 2103 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 2104 /* now we just get the submatrix */ 2105 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2106 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2107 /* clean up */ 2108 if (pcomm!=comm) { 2109 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2110 } 2111 if (size>1) { 2112 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 2113 } 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2118 { 2119 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2120 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2121 2122 PetscFunctionBegin; 2123 if (nghosts) *nghosts = B->nbs; 2124 if (ghosts) *ghosts = baij->garray; 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2129 { 2130 Mat B; 2131 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2132 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2133 Mat_SeqAIJ *b; 2134 PetscErrorCode ierr; 2135 PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2136 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2137 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2138 2139 PetscFunctionBegin; 2140 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2141 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2142 2143 /* ---------------------------------------------------------------- 2144 Tell every processor the number of nonzeros per row 2145 */ 2146 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2147 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2148 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2149 } 2150 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2151 displs = recvcounts + size; 2152 for (i=0; i<size; i++) { 2153 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2154 displs[i] = A->rmap->range[i]/bs; 2155 } 2156 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2157 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2158 #else 2159 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2160 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 #endif 2162 /* --------------------------------------------------------------- 2163 Create the sequential matrix of the same type as the local block diagonal 2164 */ 2165 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2166 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2167 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2168 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2169 b = (Mat_SeqAIJ*)B->data; 2170 2171 /*-------------------------------------------------------------------- 2172 Copy my part of matrix column indices over 2173 */ 2174 sendcount = ad->nz + bd->nz; 2175 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2176 a_jsendbuf = ad->j; 2177 b_jsendbuf = bd->j; 2178 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2179 cnt = 0; 2180 for (i=0; i<n; i++) { 2181 2182 /* put in lower diagonal portion */ 2183 m = bd->i[i+1] - bd->i[i]; 2184 while (m > 0) { 2185 /* is it above diagonal (in bd (compressed) numbering) */ 2186 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2187 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2188 m--; 2189 } 2190 2191 /* put in diagonal portion */ 2192 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2193 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2194 } 2195 2196 /* put in upper diagonal portion */ 2197 while (m-- > 0) { 2198 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2199 } 2200 } 2201 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2202 2203 /*-------------------------------------------------------------------- 2204 Gather all column indices to all processors 2205 */ 2206 for (i=0; i<size; i++) { 2207 recvcounts[i] = 0; 2208 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2209 recvcounts[i] += lens[j]; 2210 } 2211 } 2212 displs[0] = 0; 2213 for (i=1; i<size; i++) { 2214 displs[i] = displs[i-1] + recvcounts[i-1]; 2215 } 2216 #if defined(PETSC_HAVE_MPI_IN_PLACE) 2217 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2218 #else 2219 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2220 #endif 2221 /*-------------------------------------------------------------------- 2222 Assemble the matrix into useable form (note numerical values not yet set) 2223 */ 2224 /* set the b->ilen (length of each row) values */ 2225 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2226 /* set the b->i indices */ 2227 b->i[0] = 0; 2228 for (i=1; i<=A->rmap->N/bs; i++) { 2229 b->i[i] = b->i[i-1] + lens[i-1]; 2230 } 2231 ierr = PetscFree(lens);CHKERRQ(ierr); 2232 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2233 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2234 ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2235 2236 if (A->symmetric) { 2237 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2238 } else if (A->hermitian) { 2239 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2240 } else if (A->structurally_symmetric) { 2241 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2242 } 2243 *newmat = B; 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2248 { 2249 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2250 PetscErrorCode ierr; 2251 Vec bb1 = 0; 2252 2253 PetscFunctionBegin; 2254 if (flag == SOR_APPLY_UPPER) { 2255 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2260 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2261 } 2262 2263 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2264 if (flag & SOR_ZERO_INITIAL_GUESS) { 2265 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2266 its--; 2267 } 2268 2269 while (its--) { 2270 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2271 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2272 2273 /* update rhs: bb1 = bb - B*x */ 2274 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2275 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2276 2277 /* local sweep */ 2278 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2279 } 2280 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2281 if (flag & SOR_ZERO_INITIAL_GUESS) { 2282 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2283 its--; 2284 } 2285 while (its--) { 2286 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2287 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2288 2289 /* update rhs: bb1 = bb - B*x */ 2290 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2291 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2292 2293 /* local sweep */ 2294 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2295 } 2296 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2297 if (flag & SOR_ZERO_INITIAL_GUESS) { 2298 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2299 its--; 2300 } 2301 while (its--) { 2302 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2303 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2304 2305 /* update rhs: bb1 = bb - B*x */ 2306 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2307 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2308 2309 /* local sweep */ 2310 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2311 } 2312 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2313 2314 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2315 PetscFunctionReturn(0); 2316 } 2317 2318 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 2319 { 2320 PetscErrorCode ierr; 2321 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2322 PetscInt N,i,*garray = aij->garray; 2323 PetscInt ib,jb,bs = A->rmap->bs; 2324 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2325 MatScalar *a_val = a_aij->a; 2326 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2327 MatScalar *b_val = b_aij->a; 2328 PetscReal *work; 2329 2330 PetscFunctionBegin; 2331 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 2332 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2333 if (type == NORM_2) { 2334 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2335 for (jb=0; jb<bs; jb++) { 2336 for (ib=0; ib<bs; ib++) { 2337 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2338 a_val++; 2339 } 2340 } 2341 } 2342 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2343 for (jb=0; jb<bs; jb++) { 2344 for (ib=0; ib<bs; ib++) { 2345 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2346 b_val++; 2347 } 2348 } 2349 } 2350 } else if (type == NORM_1) { 2351 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2352 for (jb=0; jb<bs; jb++) { 2353 for (ib=0; ib<bs; ib++) { 2354 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2355 a_val++; 2356 } 2357 } 2358 } 2359 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2360 for (jb=0; jb<bs; jb++) { 2361 for (ib=0; ib<bs; ib++) { 2362 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2363 b_val++; 2364 } 2365 } 2366 } 2367 } else if (type == NORM_INFINITY) { 2368 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2369 for (jb=0; jb<bs; jb++) { 2370 for (ib=0; ib<bs; ib++) { 2371 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2372 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2373 a_val++; 2374 } 2375 } 2376 } 2377 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2378 for (jb=0; jb<bs; jb++) { 2379 for (ib=0; ib<bs; ib++) { 2380 int col = garray[b_aij->j[i]] * bs + jb; 2381 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2382 b_val++; 2383 } 2384 } 2385 } 2386 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 2387 if (type == NORM_INFINITY) { 2388 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2389 } else { 2390 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2391 } 2392 ierr = PetscFree(work);CHKERRQ(ierr); 2393 if (type == NORM_2) { 2394 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 2395 } 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2400 { 2401 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2406 A->factorerrortype = a->A->factorerrortype; 2407 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2408 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2413 { 2414 PetscErrorCode ierr; 2415 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2416 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2417 2418 PetscFunctionBegin; 2419 if (!Y->preallocated) { 2420 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 2421 } else if (!aij->nz) { 2422 PetscInt nonew = aij->nonew; 2423 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2424 aij->nonew = nonew; 2425 } 2426 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2431 { 2432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2433 PetscErrorCode ierr; 2434 2435 PetscFunctionBegin; 2436 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2437 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2438 if (d) { 2439 PetscInt rstart; 2440 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2441 *d += rstart/A->rmap->bs; 2442 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2448 { 2449 PetscFunctionBegin; 2450 *a = ((Mat_MPIBAIJ*)A->data)->A; 2451 PetscFunctionReturn(0); 2452 } 2453 2454 /* -------------------------------------------------------------------*/ 2455 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2456 MatGetRow_MPIBAIJ, 2457 MatRestoreRow_MPIBAIJ, 2458 MatMult_MPIBAIJ, 2459 /* 4*/ MatMultAdd_MPIBAIJ, 2460 MatMultTranspose_MPIBAIJ, 2461 MatMultTransposeAdd_MPIBAIJ, 2462 0, 2463 0, 2464 0, 2465 /*10*/ 0, 2466 0, 2467 0, 2468 MatSOR_MPIBAIJ, 2469 MatTranspose_MPIBAIJ, 2470 /*15*/ MatGetInfo_MPIBAIJ, 2471 MatEqual_MPIBAIJ, 2472 MatGetDiagonal_MPIBAIJ, 2473 MatDiagonalScale_MPIBAIJ, 2474 MatNorm_MPIBAIJ, 2475 /*20*/ MatAssemblyBegin_MPIBAIJ, 2476 MatAssemblyEnd_MPIBAIJ, 2477 MatSetOption_MPIBAIJ, 2478 MatZeroEntries_MPIBAIJ, 2479 /*24*/ MatZeroRows_MPIBAIJ, 2480 0, 2481 0, 2482 0, 2483 0, 2484 /*29*/ MatSetUp_MPIBAIJ, 2485 0, 2486 0, 2487 MatGetDiagonalBlock_MPIBAIJ, 2488 0, 2489 /*34*/ MatDuplicate_MPIBAIJ, 2490 0, 2491 0, 2492 0, 2493 0, 2494 /*39*/ MatAXPY_MPIBAIJ, 2495 MatCreateSubMatrices_MPIBAIJ, 2496 MatIncreaseOverlap_MPIBAIJ, 2497 MatGetValues_MPIBAIJ, 2498 MatCopy_MPIBAIJ, 2499 /*44*/ 0, 2500 MatScale_MPIBAIJ, 2501 MatShift_MPIBAIJ, 2502 0, 2503 MatZeroRowsColumns_MPIBAIJ, 2504 /*49*/ 0, 2505 0, 2506 0, 2507 0, 2508 0, 2509 /*54*/ MatFDColoringCreate_MPIXAIJ, 2510 0, 2511 MatSetUnfactored_MPIBAIJ, 2512 MatPermute_MPIBAIJ, 2513 MatSetValuesBlocked_MPIBAIJ, 2514 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2515 MatDestroy_MPIBAIJ, 2516 MatView_MPIBAIJ, 2517 0, 2518 0, 2519 /*64*/ 0, 2520 0, 2521 0, 2522 0, 2523 0, 2524 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2525 0, 2526 0, 2527 0, 2528 0, 2529 /*74*/ 0, 2530 MatFDColoringApply_BAIJ, 2531 0, 2532 0, 2533 0, 2534 /*79*/ 0, 2535 0, 2536 0, 2537 0, 2538 MatLoad_MPIBAIJ, 2539 /*84*/ 0, 2540 0, 2541 0, 2542 0, 2543 0, 2544 /*89*/ 0, 2545 0, 2546 0, 2547 0, 2548 0, 2549 /*94*/ 0, 2550 0, 2551 0, 2552 0, 2553 0, 2554 /*99*/ 0, 2555 0, 2556 0, 2557 0, 2558 0, 2559 /*104*/0, 2560 MatRealPart_MPIBAIJ, 2561 MatImaginaryPart_MPIBAIJ, 2562 0, 2563 0, 2564 /*109*/0, 2565 0, 2566 0, 2567 0, 2568 MatMissingDiagonal_MPIBAIJ, 2569 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2570 0, 2571 MatGetGhosts_MPIBAIJ, 2572 0, 2573 0, 2574 /*119*/0, 2575 0, 2576 0, 2577 0, 2578 MatGetMultiProcBlock_MPIBAIJ, 2579 /*124*/0, 2580 MatGetColumnNorms_MPIBAIJ, 2581 MatInvertBlockDiagonal_MPIBAIJ, 2582 0, 2583 0, 2584 /*129*/ 0, 2585 0, 2586 0, 2587 0, 2588 0, 2589 /*134*/ 0, 2590 0, 2591 0, 2592 0, 2593 0, 2594 /*139*/ MatSetBlockSizes_Default, 2595 0, 2596 0, 2597 MatFDColoringSetUp_MPIXAIJ, 2598 0, 2599 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 2600 }; 2601 2602 2603 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2604 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2605 2606 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2607 { 2608 PetscInt m,rstart,cstart,cend; 2609 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2610 const PetscInt *JJ =0; 2611 PetscScalar *values=0; 2612 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2613 PetscErrorCode ierr; 2614 PetscBool nooffprocentries; 2615 2616 PetscFunctionBegin; 2617 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2618 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2619 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2620 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2621 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2622 m = B->rmap->n/bs; 2623 rstart = B->rmap->rstart/bs; 2624 cstart = B->cmap->rstart/bs; 2625 cend = B->cmap->rend/bs; 2626 2627 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2628 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2629 for (i=0; i<m; i++) { 2630 nz = ii[i+1] - ii[i]; 2631 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2632 nz_max = PetscMax(nz_max,nz); 2633 dlen = 0; 2634 olen = 0; 2635 JJ = jj + ii[i]; 2636 for (j=0; j<nz; j++) { 2637 if (*JJ < cstart || *JJ >= cend) olen++; 2638 else dlen++; 2639 JJ++; 2640 } 2641 d_nnz[i] = dlen; 2642 o_nnz[i] = olen; 2643 } 2644 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2645 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2646 2647 values = (PetscScalar*)V; 2648 if (!values) { 2649 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2650 } 2651 for (i=0; i<m; i++) { 2652 PetscInt row = i + rstart; 2653 PetscInt ncols = ii[i+1] - ii[i]; 2654 const PetscInt *icols = jj + ii[i]; 2655 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2656 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2657 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2658 } else { /* block ordering does not match so we can only insert one block at a time. */ 2659 PetscInt j; 2660 for (j=0; j<ncols; j++) { 2661 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2662 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2663 } 2664 } 2665 } 2666 2667 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2668 nooffprocentries = B->nooffprocentries; 2669 B->nooffprocentries = PETSC_TRUE; 2670 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2671 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2672 B->nooffprocentries = nooffprocentries; 2673 2674 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 /*@C 2679 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2680 2681 Collective 2682 2683 Input Parameters: 2684 + B - the matrix 2685 . bs - the block size 2686 . i - the indices into j for the start of each local row (starts with zero) 2687 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2688 - v - optional values in the matrix 2689 2690 Level: advanced 2691 2692 Notes: 2693 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2694 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2695 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2696 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2697 block column and the second index is over columns within a block. 2698 2699 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2700 2701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2702 @*/ 2703 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2704 { 2705 PetscErrorCode ierr; 2706 2707 PetscFunctionBegin; 2708 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2709 PetscValidType(B,1); 2710 PetscValidLogicalCollectiveInt(B,bs,2); 2711 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2712 PetscFunctionReturn(0); 2713 } 2714 2715 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2716 { 2717 Mat_MPIBAIJ *b; 2718 PetscErrorCode ierr; 2719 PetscInt i; 2720 PetscMPIInt size; 2721 2722 PetscFunctionBegin; 2723 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2724 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2725 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2726 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2727 2728 if (d_nnz) { 2729 for (i=0; i<B->rmap->n/bs; i++) { 2730 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2731 } 2732 } 2733 if (o_nnz) { 2734 for (i=0; i<B->rmap->n/bs; i++) { 2735 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2736 } 2737 } 2738 2739 b = (Mat_MPIBAIJ*)B->data; 2740 b->bs2 = bs*bs; 2741 b->mbs = B->rmap->n/bs; 2742 b->nbs = B->cmap->n/bs; 2743 b->Mbs = B->rmap->N/bs; 2744 b->Nbs = B->cmap->N/bs; 2745 2746 for (i=0; i<=b->size; i++) { 2747 b->rangebs[i] = B->rmap->range[i]/bs; 2748 } 2749 b->rstartbs = B->rmap->rstart/bs; 2750 b->rendbs = B->rmap->rend/bs; 2751 b->cstartbs = B->cmap->rstart/bs; 2752 b->cendbs = B->cmap->rend/bs; 2753 2754 #if defined(PETSC_USE_CTABLE) 2755 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2756 #else 2757 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2758 #endif 2759 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2760 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2761 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2762 2763 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2765 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2766 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2767 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2768 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2769 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2770 2771 if (!B->preallocated) { 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2774 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2776 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2777 } 2778 2779 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2780 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2781 B->preallocated = PETSC_TRUE; 2782 B->was_assembled = PETSC_FALSE; 2783 B->assembled = PETSC_FALSE; 2784 PetscFunctionReturn(0); 2785 } 2786 2787 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2788 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2789 2790 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2791 { 2792 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2793 PetscErrorCode ierr; 2794 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2795 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2796 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2797 2798 PetscFunctionBegin; 2799 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 2800 ii[0] = 0; 2801 for (i=0; i<M; i++) { 2802 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2803 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 2804 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2805 /* remove one from count of matrix has diagonal */ 2806 for (j=id[i]; j<id[i+1]; j++) { 2807 if (jd[j] == i) {ii[i+1]--;break;} 2808 } 2809 } 2810 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 2811 cnt = 0; 2812 for (i=0; i<M; i++) { 2813 for (j=io[i]; j<io[i+1]; j++) { 2814 if (garray[jo[j]] > rstart) break; 2815 jj[cnt++] = garray[jo[j]]; 2816 } 2817 for (k=id[i]; k<id[i+1]; k++) { 2818 if (jd[k] != i) { 2819 jj[cnt++] = rstart + jd[k]; 2820 } 2821 } 2822 for (; j<io[i+1]; j++) { 2823 jj[cnt++] = garray[jo[j]]; 2824 } 2825 } 2826 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2831 2832 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2833 2834 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2835 { 2836 PetscErrorCode ierr; 2837 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2838 Mat B; 2839 Mat_MPIAIJ *b; 2840 2841 PetscFunctionBegin; 2842 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2843 2844 if (reuse == MAT_REUSE_MATRIX) { 2845 B = *newmat; 2846 } else { 2847 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2848 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2849 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2850 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2851 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 2852 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 2853 } 2854 b = (Mat_MPIAIJ*) B->data; 2855 2856 if (reuse == MAT_REUSE_MATRIX) { 2857 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 2858 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 2859 } else { 2860 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 2861 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2862 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 2863 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 2864 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 2865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 } 2868 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 2871 if (reuse == MAT_INPLACE_MATRIX) { 2872 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 2873 } else { 2874 *newmat = B; 2875 } 2876 PetscFunctionReturn(0); 2877 } 2878 2879 /*MC 2880 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2881 2882 Options Database Keys: 2883 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2884 . -mat_block_size <bs> - set the blocksize used to store the matrix 2885 - -mat_use_hash_table <fact> 2886 2887 Level: beginner 2888 2889 Notes: 2890 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2891 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2892 2893 .seealso: MatCreateBAIJ 2894 M*/ 2895 2896 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2897 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 2898 2899 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2900 { 2901 Mat_MPIBAIJ *b; 2902 PetscErrorCode ierr; 2903 PetscBool flg = PETSC_FALSE; 2904 2905 PetscFunctionBegin; 2906 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 2907 B->data = (void*)b; 2908 2909 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2910 B->assembled = PETSC_FALSE; 2911 2912 B->insertmode = NOT_SET_VALUES; 2913 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 2914 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 2915 2916 /* build local table of row and column ownerships */ 2917 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2918 2919 /* build cache for off array entries formed */ 2920 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 2921 2922 b->donotstash = PETSC_FALSE; 2923 b->colmap = NULL; 2924 b->garray = NULL; 2925 b->roworiented = PETSC_TRUE; 2926 2927 /* stuff used in block assembly */ 2928 b->barray = 0; 2929 2930 /* stuff used for matrix vector multiply */ 2931 b->lvec = 0; 2932 b->Mvctx = 0; 2933 2934 /* stuff for MatGetRow() */ 2935 b->rowindices = 0; 2936 b->rowvalues = 0; 2937 b->getrowactive = PETSC_FALSE; 2938 2939 /* hash table stuff */ 2940 b->ht = 0; 2941 b->hd = 0; 2942 b->ht_size = 0; 2943 b->ht_flag = PETSC_FALSE; 2944 b->ht_fact = 0; 2945 b->ht_total_ct = 0; 2946 b->ht_insert_ct = 0; 2947 2948 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2949 b->ijonly = PETSC_FALSE; 2950 2951 2952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2954 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 2955 #if defined(PETSC_HAVE_HYPRE) 2956 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 2957 #endif 2958 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2959 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2960 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2961 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2962 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2963 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2964 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 2965 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 2966 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 2967 2968 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2969 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 2970 if (flg) { 2971 PetscReal fact = 1.39; 2972 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 2973 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 2974 if (fact <= 1.0) fact = 1.39; 2975 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2976 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 2977 } 2978 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 /*MC 2983 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2984 2985 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2986 and MATMPIBAIJ otherwise. 2987 2988 Options Database Keys: 2989 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2990 2991 Level: beginner 2992 2993 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 2994 M*/ 2995 2996 /*@C 2997 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2998 (block compressed row). For good matrix assembly performance 2999 the user should preallocate the matrix storage by setting the parameters 3000 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3001 performance can be increased by more than a factor of 50. 3002 3003 Collective on Mat 3004 3005 Input Parameters: 3006 + B - the matrix 3007 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3008 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3009 . d_nz - number of block nonzeros per block row in diagonal portion of local 3010 submatrix (same for all local rows) 3011 . d_nnz - array containing the number of block nonzeros in the various block rows 3012 of the in diagonal portion of the local (possibly different for each block 3013 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 3014 set it even if it is zero. 3015 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3016 submatrix (same for all local rows). 3017 - o_nnz - array containing the number of nonzeros in the various block rows of the 3018 off-diagonal portion of the local submatrix (possibly different for 3019 each block row) or NULL. 3020 3021 If the *_nnz parameter is given then the *_nz parameter is ignored 3022 3023 Options Database Keys: 3024 + -mat_block_size - size of the blocks to use 3025 - -mat_use_hash_table <fact> 3026 3027 Notes: 3028 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3029 than it must be used on all processors that share the object for that argument. 3030 3031 Storage Information: 3032 For a square global matrix we define each processor's diagonal portion 3033 to be its local rows and the corresponding columns (a square submatrix); 3034 each processor's off-diagonal portion encompasses the remainder of the 3035 local matrix (a rectangular submatrix). 3036 3037 The user can specify preallocated storage for the diagonal part of 3038 the local submatrix with either d_nz or d_nnz (not both). Set 3039 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3040 memory allocation. Likewise, specify preallocated storage for the 3041 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3042 3043 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3044 the figure below we depict these three local rows and all columns (0-11). 3045 3046 .vb 3047 0 1 2 3 4 5 6 7 8 9 10 11 3048 -------------------------- 3049 row 3 |o o o d d d o o o o o o 3050 row 4 |o o o d d d o o o o o o 3051 row 5 |o o o d d d o o o o o o 3052 -------------------------- 3053 .ve 3054 3055 Thus, any entries in the d locations are stored in the d (diagonal) 3056 submatrix, and any entries in the o locations are stored in the 3057 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3058 stored simply in the MATSEQBAIJ format for compressed row storage. 3059 3060 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3061 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3062 In general, for PDE problems in which most nonzeros are near the diagonal, 3063 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3064 or you will get TERRIBLE performance; see the users' manual chapter on 3065 matrices. 3066 3067 You can call MatGetInfo() to get information on how effective the preallocation was; 3068 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3069 You can also run with the option -info and look for messages with the string 3070 malloc in them to see if additional memory allocation was needed. 3071 3072 Level: intermediate 3073 3074 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3075 @*/ 3076 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3077 { 3078 PetscErrorCode ierr; 3079 3080 PetscFunctionBegin; 3081 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3082 PetscValidType(B,1); 3083 PetscValidLogicalCollectiveInt(B,bs,2); 3084 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3085 PetscFunctionReturn(0); 3086 } 3087 3088 /*@C 3089 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3090 (block compressed row). For good matrix assembly performance 3091 the user should preallocate the matrix storage by setting the parameters 3092 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3093 performance can be increased by more than a factor of 50. 3094 3095 Collective 3096 3097 Input Parameters: 3098 + comm - MPI communicator 3099 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3100 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3101 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3102 This value should be the same as the local size used in creating the 3103 y vector for the matrix-vector product y = Ax. 3104 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3105 This value should be the same as the local size used in creating the 3106 x vector for the matrix-vector product y = Ax. 3107 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3108 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3109 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3110 submatrix (same for all local rows) 3111 . d_nnz - array containing the number of nonzero blocks in the various block rows 3112 of the in diagonal portion of the local (possibly different for each block 3113 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3114 and set it even if it is zero. 3115 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3116 submatrix (same for all local rows). 3117 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3118 off-diagonal portion of the local submatrix (possibly different for 3119 each block row) or NULL. 3120 3121 Output Parameter: 3122 . A - the matrix 3123 3124 Options Database Keys: 3125 + -mat_block_size - size of the blocks to use 3126 - -mat_use_hash_table <fact> 3127 3128 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3129 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3130 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3131 3132 Notes: 3133 If the *_nnz parameter is given then the *_nz parameter is ignored 3134 3135 A nonzero block is any block that as 1 or more nonzeros in it 3136 3137 The user MUST specify either the local or global matrix dimensions 3138 (possibly both). 3139 3140 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3141 than it must be used on all processors that share the object for that argument. 3142 3143 Storage Information: 3144 For a square global matrix we define each processor's diagonal portion 3145 to be its local rows and the corresponding columns (a square submatrix); 3146 each processor's off-diagonal portion encompasses the remainder of the 3147 local matrix (a rectangular submatrix). 3148 3149 The user can specify preallocated storage for the diagonal part of 3150 the local submatrix with either d_nz or d_nnz (not both). Set 3151 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3152 memory allocation. Likewise, specify preallocated storage for the 3153 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3154 3155 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3156 the figure below we depict these three local rows and all columns (0-11). 3157 3158 .vb 3159 0 1 2 3 4 5 6 7 8 9 10 11 3160 -------------------------- 3161 row 3 |o o o d d d o o o o o o 3162 row 4 |o o o d d d o o o o o o 3163 row 5 |o o o d d d o o o o o o 3164 -------------------------- 3165 .ve 3166 3167 Thus, any entries in the d locations are stored in the d (diagonal) 3168 submatrix, and any entries in the o locations are stored in the 3169 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3170 stored simply in the MATSEQBAIJ format for compressed row storage. 3171 3172 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3173 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3174 In general, for PDE problems in which most nonzeros are near the diagonal, 3175 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3176 or you will get TERRIBLE performance; see the users' manual chapter on 3177 matrices. 3178 3179 Level: intermediate 3180 3181 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3182 @*/ 3183 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3184 { 3185 PetscErrorCode ierr; 3186 PetscMPIInt size; 3187 3188 PetscFunctionBegin; 3189 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3190 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3191 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3192 if (size > 1) { 3193 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3194 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3195 } else { 3196 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3197 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 3198 } 3199 PetscFunctionReturn(0); 3200 } 3201 3202 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3203 { 3204 Mat mat; 3205 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3206 PetscErrorCode ierr; 3207 PetscInt len=0; 3208 3209 PetscFunctionBegin; 3210 *newmat = 0; 3211 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3212 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3213 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3214 3215 mat->factortype = matin->factortype; 3216 mat->preallocated = PETSC_TRUE; 3217 mat->assembled = PETSC_TRUE; 3218 mat->insertmode = NOT_SET_VALUES; 3219 3220 a = (Mat_MPIBAIJ*)mat->data; 3221 mat->rmap->bs = matin->rmap->bs; 3222 a->bs2 = oldmat->bs2; 3223 a->mbs = oldmat->mbs; 3224 a->nbs = oldmat->nbs; 3225 a->Mbs = oldmat->Mbs; 3226 a->Nbs = oldmat->Nbs; 3227 3228 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3229 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3230 3231 a->size = oldmat->size; 3232 a->rank = oldmat->rank; 3233 a->donotstash = oldmat->donotstash; 3234 a->roworiented = oldmat->roworiented; 3235 a->rowindices = 0; 3236 a->rowvalues = 0; 3237 a->getrowactive = PETSC_FALSE; 3238 a->barray = 0; 3239 a->rstartbs = oldmat->rstartbs; 3240 a->rendbs = oldmat->rendbs; 3241 a->cstartbs = oldmat->cstartbs; 3242 a->cendbs = oldmat->cendbs; 3243 3244 /* hash table stuff */ 3245 a->ht = 0; 3246 a->hd = 0; 3247 a->ht_size = 0; 3248 a->ht_flag = oldmat->ht_flag; 3249 a->ht_fact = oldmat->ht_fact; 3250 a->ht_total_ct = 0; 3251 a->ht_insert_ct = 0; 3252 3253 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 3254 if (oldmat->colmap) { 3255 #if defined(PETSC_USE_CTABLE) 3256 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3257 #else 3258 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 3259 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3260 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 3261 #endif 3262 } else a->colmap = 0; 3263 3264 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3265 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 3266 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3267 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3268 } else a->garray = 0; 3269 3270 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 3271 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3272 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3273 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3274 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3275 3276 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3277 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3278 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3280 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3281 *newmat = mat; 3282 PetscFunctionReturn(0); 3283 } 3284 3285 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3286 { 3287 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3288 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3289 PetscScalar *matvals; 3290 PetscErrorCode ierr; 3291 3292 PetscFunctionBegin; 3293 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3294 3295 /* read in matrix header */ 3296 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3297 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3298 M = header[1]; N = header[2]; nz = header[3]; 3299 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3300 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3301 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3302 3303 /* set block sizes from the viewer's .info file */ 3304 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3305 /* set global sizes if not set already */ 3306 if (mat->rmap->N < 0) mat->rmap->N = M; 3307 if (mat->cmap->N < 0) mat->cmap->N = N; 3308 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3309 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3310 3311 /* check if the matrix sizes are correct */ 3312 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3313 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3314 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3315 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3316 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3317 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3318 mbs = m/bs; nbs = n/bs; 3319 3320 /* read in row lengths and build row indices */ 3321 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3322 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3323 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3324 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3325 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3326 3327 /* read in column indices and matrix values */ 3328 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3329 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3330 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3331 3332 { /* preallocate matrix storage */ 3333 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3334 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3335 PetscBool done; 3336 PetscInt *d_nnz,*o_nnz; 3337 3338 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3339 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3340 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3341 for (i=0; i<mbs; i++) { 3342 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3343 ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3344 for (k=0; k<bs; k++) 3345 for (j=rowidxs[bs*i+k]; j<rowidxs[bs*i+k+1]; j++) 3346 if (colidxs[j] >= cs && colidxs[j] < ce) { 3347 if (!PetscBTLookupSet(bt,(colidxs[j]-cs)/bs)) d_nnz[i]++; 3348 } else { 3349 ierr = PetscHSetIQueryAdd(ht,colidxs[j]/bs,&done);CHKERRQ(ierr); 3350 if (done) o_nnz[i]++; 3351 } 3352 } 3353 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3354 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3355 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3356 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3357 } 3358 3359 /* store matrix values */ 3360 for (i=0; i<m; i++) { 3361 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3362 ierr = MatSetValues_MPIBAIJ(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3363 } 3364 3365 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3366 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3367 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3368 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3369 PetscFunctionReturn(0); 3370 } 3371 3372 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3373 { 3374 PetscErrorCode ierr; 3375 PetscBool isbinary; 3376 3377 PetscFunctionBegin; 3378 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3379 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3380 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3381 PetscFunctionReturn(0); 3382 } 3383 3384 /*@ 3385 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3386 3387 Input Parameters: 3388 + mat - the matrix 3389 - fact - factor 3390 3391 Not Collective, each process can use a different factor 3392 3393 Level: advanced 3394 3395 Notes: 3396 This can also be set by the command line option: -mat_use_hash_table <fact> 3397 3398 .seealso: MatSetOption() 3399 @*/ 3400 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3401 { 3402 PetscErrorCode ierr; 3403 3404 PetscFunctionBegin; 3405 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 3406 PetscFunctionReturn(0); 3407 } 3408 3409 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3410 { 3411 Mat_MPIBAIJ *baij; 3412 3413 PetscFunctionBegin; 3414 baij = (Mat_MPIBAIJ*)mat->data; 3415 baij->ht_fact = fact; 3416 PetscFunctionReturn(0); 3417 } 3418 3419 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3420 { 3421 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3422 PetscBool flg; 3423 PetscErrorCode ierr; 3424 3425 PetscFunctionBegin; 3426 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3427 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3428 if (Ad) *Ad = a->A; 3429 if (Ao) *Ao = a->B; 3430 if (colmap) *colmap = a->garray; 3431 PetscFunctionReturn(0); 3432 } 3433 3434 /* 3435 Special version for direct calls from Fortran (to eliminate two function call overheads 3436 */ 3437 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3438 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3439 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3440 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3441 #endif 3442 3443 /*@C 3444 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3445 3446 Collective on Mat 3447 3448 Input Parameters: 3449 + mat - the matrix 3450 . min - number of input rows 3451 . im - input rows 3452 . nin - number of input columns 3453 . in - input columns 3454 . v - numerical values input 3455 - addvin - INSERT_VALUES or ADD_VALUES 3456 3457 Notes: 3458 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3459 3460 Level: advanced 3461 3462 .seealso: MatSetValuesBlocked() 3463 @*/ 3464 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3465 { 3466 /* convert input arguments to C version */ 3467 Mat mat = *matin; 3468 PetscInt m = *min, n = *nin; 3469 InsertMode addv = *addvin; 3470 3471 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3472 const MatScalar *value; 3473 MatScalar *barray = baij->barray; 3474 PetscBool roworiented = baij->roworiented; 3475 PetscErrorCode ierr; 3476 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3477 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3478 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3479 3480 PetscFunctionBegin; 3481 /* tasks normally handled by MatSetValuesBlocked() */ 3482 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3483 #if defined(PETSC_USE_DEBUG) 3484 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3485 if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3486 #endif 3487 if (mat->assembled) { 3488 mat->was_assembled = PETSC_TRUE; 3489 mat->assembled = PETSC_FALSE; 3490 } 3491 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3492 3493 3494 if (!barray) { 3495 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 3496 baij->barray = barray; 3497 } 3498 3499 if (roworiented) stepval = (n-1)*bs; 3500 else stepval = (m-1)*bs; 3501 3502 for (i=0; i<m; i++) { 3503 if (im[i] < 0) continue; 3504 #if defined(PETSC_USE_DEBUG) 3505 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 3506 #endif 3507 if (im[i] >= rstart && im[i] < rend) { 3508 row = im[i] - rstart; 3509 for (j=0; j<n; j++) { 3510 /* If NumCol = 1 then a copy is not required */ 3511 if ((roworiented) && (n == 1)) { 3512 barray = (MatScalar*)v + i*bs2; 3513 } else if ((!roworiented) && (m == 1)) { 3514 barray = (MatScalar*)v + j*bs2; 3515 } else { /* Here a copy is required */ 3516 if (roworiented) { 3517 value = v + i*(stepval+bs)*bs + j*bs; 3518 } else { 3519 value = v + j*(stepval+bs)*bs + i*bs; 3520 } 3521 for (ii=0; ii<bs; ii++,value+=stepval) { 3522 for (jj=0; jj<bs; jj++) { 3523 *barray++ = *value++; 3524 } 3525 } 3526 barray -=bs2; 3527 } 3528 3529 if (in[j] >= cstart && in[j] < cend) { 3530 col = in[j] - cstart; 3531 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3532 } else if (in[j] < 0) continue; 3533 #if defined(PETSC_USE_DEBUG) 3534 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 3535 #endif 3536 else { 3537 if (mat->was_assembled) { 3538 if (!baij->colmap) { 3539 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 3540 } 3541 3542 #if defined(PETSC_USE_DEBUG) 3543 #if defined(PETSC_USE_CTABLE) 3544 { PetscInt data; 3545 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3546 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3547 } 3548 #else 3549 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3550 #endif 3551 #endif 3552 #if defined(PETSC_USE_CTABLE) 3553 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 3554 col = (col - 1)/bs; 3555 #else 3556 col = (baij->colmap[in[j]] - 1)/bs; 3557 #endif 3558 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3559 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 3560 col = in[j]; 3561 } 3562 } else col = in[j]; 3563 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 3564 } 3565 } 3566 } else { 3567 if (!baij->donotstash) { 3568 if (roworiented) { 3569 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3570 } else { 3571 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 3572 } 3573 } 3574 } 3575 } 3576 3577 /* task normally handled by MatSetValuesBlocked() */ 3578 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 3579 PetscFunctionReturn(0); 3580 } 3581 3582 /*@ 3583 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3584 CSR format the local rows. 3585 3586 Collective 3587 3588 Input Parameters: 3589 + comm - MPI communicator 3590 . bs - the block size, only a block size of 1 is supported 3591 . m - number of local rows (Cannot be PETSC_DECIDE) 3592 . n - This value should be the same as the local size used in creating the 3593 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3594 calculated if N is given) For square matrices n is almost always m. 3595 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3596 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3597 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3598 . j - column indices 3599 - a - matrix values 3600 3601 Output Parameter: 3602 . mat - the matrix 3603 3604 Level: intermediate 3605 3606 Notes: 3607 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3608 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3609 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3610 3611 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3612 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3613 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3614 with column-major ordering within blocks. 3615 3616 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3617 3618 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3619 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3620 @*/ 3621 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3622 { 3623 PetscErrorCode ierr; 3624 3625 PetscFunctionBegin; 3626 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3627 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3628 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3629 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3630 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 3631 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 3632 ierr = MatSetUp(*mat);CHKERRQ(ierr); 3633 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3634 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3635 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3636 PetscFunctionReturn(0); 3637 } 3638 3639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3640 { 3641 PetscErrorCode ierr; 3642 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3643 PetscInt *indx; 3644 PetscScalar *values; 3645 3646 PetscFunctionBegin; 3647 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3648 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3649 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3650 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3651 PetscInt *bindx,rmax=a->rmax,j; 3652 PetscMPIInt rank,size; 3653 3654 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3655 mbs = m/bs; Nbs = N/cbs; 3656 if (n == PETSC_DECIDE) { 3657 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3658 } 3659 nbs = n/cbs; 3660 3661 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3662 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 3663 3664 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3665 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr); 3666 if (rank == size-1) { 3667 /* Check sum(nbs) = Nbs */ 3668 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 3669 } 3670 3671 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3672 for (i=0; i<mbs; i++) { 3673 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3674 nnz = nnz/bs; 3675 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3676 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3677 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3678 } 3679 ierr = PetscFree(bindx);CHKERRQ(ierr); 3680 3681 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3682 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3683 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3684 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 3685 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3686 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3687 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3688 } 3689 3690 /* numeric phase */ 3691 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3692 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3693 3694 for (i=0; i<m; i++) { 3695 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3696 Ii = i + rstart; 3697 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3698 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3699 } 3700 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 PetscFunctionReturn(0); 3703 } 3704