1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) { 211 continue; 212 } else { 213 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 214 if (mat->was_assembled) { 215 if (!baij->colmap) { 216 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 217 } 218 #if defined(PETSC_USE_CTABLE) 219 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 220 col = col - 1; 221 #else 222 col = baij->colmap[in[j]/bs] - 1; 223 #endif 224 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 225 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 226 col = in[j]; 227 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 228 B = baij->B; 229 b = (Mat_SeqBAIJ*)(B)->data; 230 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 231 ba =b->a; 232 } else { 233 PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234 col += in[j]%bs; 235 } 236 } else col = in[j]; 237 if (roworiented) value = v[i*n+j]; 238 else value = v[i+j*m]; 239 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 240 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 241 } 242 } 243 } else { 244 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 245 if (!baij->donotstash) { 246 mat->assembled = PETSC_FALSE; 247 if (roworiented) { 248 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 249 } else { 250 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 251 } 252 } 253 } 254 } 255 PetscFunctionReturn(0); 256 } 257 258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 259 { 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 261 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 262 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 263 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 264 PetscBool roworiented=a->roworiented; 265 const PetscScalar *value = v; 266 MatScalar *ap,*aa = a->a,*bap; 267 268 PetscFunctionBegin; 269 rp = aj + ai[row]; 270 ap = aa + bs2*ai[row]; 271 rmax = imax[row]; 272 nrow = ailen[row]; 273 value = v; 274 low = 0; 275 high = nrow; 276 while (high-low > 7) { 277 t = (low+high)/2; 278 if (rp[t] > col) high = t; 279 else low = t; 280 } 281 for (i=low; i<high; i++) { 282 if (rp[i] > col) break; 283 if (rp[i] == col) { 284 bap = ap + bs2*i; 285 if (roworiented) { 286 if (is == ADD_VALUES) { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=ii; jj<bs2; jj+=bs) { 289 bap[jj] += *value++; 290 } 291 } 292 } else { 293 for (ii=0; ii<bs; ii++) { 294 for (jj=ii; jj<bs2; jj+=bs) { 295 bap[jj] = *value++; 296 } 297 } 298 } 299 } else { 300 if (is == ADD_VALUES) { 301 for (ii=0; ii<bs; ii++,value+=bs) { 302 for (jj=0; jj<bs; jj++) { 303 bap[jj] += value[jj]; 304 } 305 bap += bs; 306 } 307 } else { 308 for (ii=0; ii<bs; ii++,value+=bs) { 309 for (jj=0; jj<bs; jj++) { 310 bap[jj] = value[jj]; 311 } 312 bap += bs; 313 } 314 } 315 } 316 goto noinsert2; 317 } 318 } 319 if (nonew == 1) goto noinsert2; 320 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 321 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 322 N = nrow++ - 1; high++; 323 /* shift up all the later entries in this row */ 324 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 325 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 326 rp[i] = col; 327 bap = ap + bs2*i; 328 if (roworiented) { 329 for (ii=0; ii<bs; ii++) { 330 for (jj=ii; jj<bs2; jj+=bs) { 331 bap[jj] = *value++; 332 } 333 } 334 } else { 335 for (ii=0; ii<bs; ii++) { 336 for (jj=0; jj<bs; jj++) { 337 *bap++ = *value++; 338 } 339 } 340 } 341 noinsert2:; 342 ailen[row] = nrow; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 348 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 349 */ 350 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 351 { 352 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 353 const PetscScalar *value; 354 MatScalar *barray = baij->barray; 355 PetscBool roworiented = baij->roworiented; 356 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 357 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 358 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 359 360 PetscFunctionBegin; 361 if (!barray) { 362 PetscCall(PetscMalloc1(bs2,&barray)); 363 baij->barray = barray; 364 } 365 366 if (roworiented) stepval = (n-1)*bs; 367 else stepval = (m-1)*bs; 368 369 for (i=0; i<m; i++) { 370 if (im[i] < 0) continue; 371 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 372 if (im[i] >= rstart && im[i] < rend) { 373 row = im[i] - rstart; 374 for (j=0; j<n; j++) { 375 /* If NumCol = 1 then a copy is not required */ 376 if ((roworiented) && (n == 1)) { 377 barray = (MatScalar*)v + i*bs2; 378 } else if ((!roworiented) && (m == 1)) { 379 barray = (MatScalar*)v + j*bs2; 380 } else { /* Here a copy is required */ 381 if (roworiented) { 382 value = v + (i*(stepval+bs) + j)*bs; 383 } else { 384 value = v + (j*(stepval+bs) + i)*bs; 385 } 386 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 387 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 388 barray += bs; 389 } 390 barray -= bs2; 391 } 392 393 if (in[j] >= cstart && in[j] < cend) { 394 col = in[j] - cstart; 395 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 396 } else if (in[j] < 0) { 397 continue; 398 } else { 399 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 400 if (mat->was_assembled) { 401 if (!baij->colmap) { 402 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 403 } 404 405 #if defined(PETSC_USE_DEBUG) 406 #if defined(PETSC_USE_CTABLE) 407 { PetscInt data; 408 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 409 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 410 } 411 #else 412 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 #endif 414 #endif 415 #if defined(PETSC_USE_CTABLE) 416 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 417 col = (col - 1)/bs; 418 #else 419 col = (baij->colmap[in[j]] - 1)/bs; 420 #endif 421 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 422 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 423 col = in[j]; 424 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 425 } else col = in[j]; 426 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 427 } 428 } 429 } else { 430 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 431 if (!baij->donotstash) { 432 if (roworiented) { 433 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 434 } else { 435 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 436 } 437 } 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442 443 #define HASH_KEY 0.6180339887 444 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 445 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 446 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 447 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 448 { 449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 450 PetscBool roworiented = baij->roworiented; 451 PetscInt i,j,row,col; 452 PetscInt rstart_orig=mat->rmap->rstart; 453 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 454 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,value; 457 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 458 459 PetscFunctionBegin; 460 for (i=0; i<m; i++) { 461 if (PetscDefined(USE_DEBUG)) { 462 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 463 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 464 } 465 row = im[i]; 466 if (row >= rstart_orig && row < rend_orig) { 467 for (j=0; j<n; j++) { 468 col = in[j]; 469 if (roworiented) value = v[i*n+j]; 470 else value = v[i+j*m]; 471 /* Look up PetscInto the Hash Table */ 472 key = (row/bs)*Nbs+(col/bs)+1; 473 h1 = HASH(size,key,tmp); 474 475 idx = h1; 476 if (PetscDefined(USE_DEBUG)) { 477 insert_ct++; 478 total_ct++; 479 if (HT[idx] != key) { 480 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 481 if (idx == size) { 482 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 483 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 484 } 485 } 486 } else if (HT[idx] != key) { 487 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 488 if (idx == size) { 489 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 490 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 491 } 492 } 493 /* A HASH table entry is found, so insert the values at the correct address */ 494 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 495 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 496 } 497 } else if (!baij->donotstash) { 498 if (roworiented) { 499 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 500 } else { 501 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 502 } 503 } 504 } 505 if (PetscDefined(USE_DEBUG)) { 506 baij->ht_total_ct += total_ct; 507 baij->ht_insert_ct += insert_ct; 508 } 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 513 { 514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 515 PetscBool roworiented = baij->roworiented; 516 PetscInt i,j,ii,jj,row,col; 517 PetscInt rstart=baij->rstartbs; 518 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 519 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 520 PetscReal tmp; 521 MatScalar **HD = baij->hd,*baij_a; 522 const PetscScalar *v_t,*value; 523 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 524 525 PetscFunctionBegin; 526 if (roworiented) stepval = (n-1)*bs; 527 else stepval = (m-1)*bs; 528 529 for (i=0; i<m; i++) { 530 if (PetscDefined(USE_DEBUG)) { 531 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 532 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 533 } 534 row = im[i]; 535 v_t = v + i*nbs2; 536 if (row >= rstart && row < rend) { 537 for (j=0; j<n; j++) { 538 col = in[j]; 539 540 /* Look up into the Hash Table */ 541 key = row*Nbs+col+1; 542 h1 = HASH(size,key,tmp); 543 544 idx = h1; 545 if (PetscDefined(USE_DEBUG)) { 546 total_ct++; 547 insert_ct++; 548 if (HT[idx] != key) { 549 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 550 if (idx == size) { 551 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 552 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 553 } 554 } 555 } else if (HT[idx] != key) { 556 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 557 if (idx == size) { 558 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 559 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 560 } 561 } 562 baij_a = HD[idx]; 563 if (roworiented) { 564 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 565 /* value = v + (i*(stepval+bs)+j)*bs; */ 566 value = v_t; 567 v_t += bs; 568 if (addv == ADD_VALUES) { 569 for (ii=0; ii<bs; ii++,value+=stepval) { 570 for (jj=ii; jj<bs2; jj+=bs) { 571 baij_a[jj] += *value++; 572 } 573 } 574 } else { 575 for (ii=0; ii<bs; ii++,value+=stepval) { 576 for (jj=ii; jj<bs2; jj+=bs) { 577 baij_a[jj] = *value++; 578 } 579 } 580 } 581 } else { 582 value = v + j*(stepval+bs)*bs + i*bs; 583 if (addv == ADD_VALUES) { 584 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 585 for (jj=0; jj<bs; jj++) { 586 baij_a[jj] += *value++; 587 } 588 } 589 } else { 590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 591 for (jj=0; jj<bs; jj++) { 592 baij_a[jj] = *value++; 593 } 594 } 595 } 596 } 597 } 598 } else { 599 if (!baij->donotstash) { 600 if (roworiented) { 601 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 602 } else { 603 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 604 } 605 } 606 } 607 } 608 if (PetscDefined(USE_DEBUG)) { 609 baij->ht_total_ct += total_ct; 610 baij->ht_insert_ct += insert_ct; 611 } 612 PetscFunctionReturn(0); 613 } 614 615 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* negative row */ 624 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* negative column */ 629 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 633 } else { 634 if (!baij->colmap) { 635 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 660 PetscReal sum = 0.0; 661 MatScalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 PetscCall(MatNorm(baij->A,type,nrm)); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 nz = amat->nz*bs2; 670 for (i=0; i<nz; i++) { 671 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 672 } 673 v = bmat->a; 674 nz = bmat->nz*bs2; 675 for (i=0; i<nz; i++) { 676 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 677 } 678 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *nrm = PetscSqrtReal(*nrm); 680 } else if (type == NORM_1) { /* max column sum */ 681 PetscReal *tmp,*tmp2; 682 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 683 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 684 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 685 v = amat->a; jj = amat->j; 686 for (i=0; i<amat->nz; i++) { 687 for (j=0; j<bs; j++) { 688 col = bs*(cstart + *jj) + j; /* column index */ 689 for (row=0; row<bs; row++) { 690 tmp[col] += PetscAbsScalar(*v); v++; 691 } 692 } 693 jj++; 694 } 695 v = bmat->a; jj = bmat->j; 696 for (i=0; i<bmat->nz; i++) { 697 for (j=0; j<bs; j++) { 698 col = bs*garray[*jj] + j; 699 for (row=0; row<bs; row++) { 700 tmp[col] += PetscAbsScalar(*v); v++; 701 } 702 } 703 jj++; 704 } 705 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 706 *nrm = 0.0; 707 for (j=0; j<mat->cmap->N; j++) { 708 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 709 } 710 PetscCall(PetscFree(tmp)); 711 PetscCall(PetscFree(tmp2)); 712 } else if (type == NORM_INFINITY) { /* max row sum */ 713 PetscReal *sums; 714 PetscCall(PetscMalloc1(bs,&sums)); 715 sum = 0.0; 716 for (j=0; j<amat->mbs; j++) { 717 for (row=0; row<bs; row++) sums[row] = 0.0; 718 v = amat->a + bs2*amat->i[j]; 719 nz = amat->i[j+1]-amat->i[j]; 720 for (i=0; i<nz; i++) { 721 for (col=0; col<bs; col++) { 722 for (row=0; row<bs; row++) { 723 sums[row] += PetscAbsScalar(*v); v++; 724 } 725 } 726 } 727 v = bmat->a + bs2*bmat->i[j]; 728 nz = bmat->i[j+1]-bmat->i[j]; 729 for (i=0; i<nz; i++) { 730 for (col=0; col<bs; col++) { 731 for (row=0; row<bs; row++) { 732 sums[row] += PetscAbsScalar(*v); v++; 733 } 734 } 735 } 736 for (row=0; row<bs; row++) { 737 if (sums[row] > sum) sum = sums[row]; 738 } 739 } 740 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 741 PetscCall(PetscFree(sums)); 742 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 Creates the hash table, and sets the table 749 This table is created only once. 750 If new entried need to be added to the matrix 751 then the hash table has to be destroyed and 752 recreated. 753 */ 754 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 755 { 756 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 757 Mat A = baij->A,B=baij->B; 758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 759 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 760 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 761 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 762 PetscInt *HT,key; 763 MatScalar **HD; 764 PetscReal tmp; 765 #if defined(PETSC_USE_INFO) 766 PetscInt ct=0,max=0; 767 #endif 768 769 PetscFunctionBegin; 770 if (baij->ht) PetscFunctionReturn(0); 771 772 baij->ht_size = (PetscInt)(factor*nz); 773 ht_size = baij->ht_size; 774 775 /* Allocate Memory for Hash Table */ 776 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 777 HD = baij->hd; 778 HT = baij->ht; 779 780 /* Loop Over A */ 781 for (i=0; i<a->mbs; i++) { 782 for (j=ai[i]; j<ai[i+1]; j++) { 783 row = i+rstart; 784 col = aj[j]+cstart; 785 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = a->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 /* Loop Over B */ 805 for (i=0; i<b->mbs; i++) { 806 for (j=bi[i]; j<bi[i+1]; j++) { 807 row = i+rstart; 808 col = garray[bj[j]]; 809 key = row*Nbs + col + 1; 810 h1 = HASH(ht_size,key,tmp); 811 for (k=0; k<ht_size; k++) { 812 if (!HT[(h1+k)%ht_size]) { 813 HT[(h1+k)%ht_size] = key; 814 HD[(h1+k)%ht_size] = b->a + j*bs2; 815 break; 816 #if defined(PETSC_USE_INFO) 817 } else { 818 ct++; 819 #endif 820 } 821 } 822 #if defined(PETSC_USE_INFO) 823 if (k> max) max = k; 824 #endif 825 } 826 } 827 828 /* Print Summary */ 829 #if defined(PETSC_USE_INFO) 830 for (i=0,j=0; i<ht_size; i++) { 831 if (HT[i]) j++; 832 } 833 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 834 #endif 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 839 { 840 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 841 PetscInt nstash,reallocs; 842 843 PetscFunctionBegin; 844 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 845 846 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 847 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 848 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 849 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 850 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 851 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 856 { 857 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 858 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 859 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 860 PetscInt *row,*col; 861 PetscBool r1,r2,r3,other_disassembled; 862 MatScalar *val; 863 PetscMPIInt n; 864 865 PetscFunctionBegin; 866 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 /* Now assemble all these values with a single function call */ 880 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 881 i = j; 882 } 883 } 884 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 885 /* Now process the block-stash. Since the values are stashed column-oriented, 886 set the roworiented flag to column oriented, and after MatSetValues() 887 restore the original flags */ 888 r1 = baij->roworiented; 889 r2 = a->roworiented; 890 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 891 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 895 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 896 while (1) { 897 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 898 if (!flg) break; 899 900 for (i=0; i<n;) { 901 /* Now identify the consecutive vals belonging to the same row */ 902 for (j=i,rstart=row[j]; j<n; j++) { 903 if (row[j] != rstart) break; 904 } 905 if (j < n) ncols = j-i; 906 else ncols = n-i; 907 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 908 i = j; 909 } 910 } 911 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 912 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 PetscCall(MatAssemblyBegin(baij->A,mode)); 920 PetscCall(MatAssemblyEnd(baij->A,mode)); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselves, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 930 if (mat->was_assembled && !other_disassembled) { 931 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 937 } 938 PetscCall(MatAssemblyBegin(baij->B,mode)); 939 PetscCall(MatAssemblyEnd(baij->B,mode)); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 944 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 951 952 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 953 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 954 } 955 956 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 957 958 baij->rowvalues = NULL; 959 960 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 961 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 962 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 963 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 969 #include <petscdraw.h> 970 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 973 PetscMPIInt rank = baij->rank; 974 PetscInt bs = mat->rmap->bs; 975 PetscBool iascii,isdraw; 976 PetscViewer sviewer; 977 PetscViewerFormat format; 978 979 PetscFunctionBegin; 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 982 if (iascii) { 983 PetscCall(PetscViewerGetFormat(viewer,&format)); 984 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 985 MatInfo info; 986 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 987 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 990 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 991 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 992 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 993 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 994 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 995 PetscCall(PetscViewerFlush(viewer)); 996 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 997 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 998 PetscCall(VecScatterView(baij->Mvctx,viewer)); 999 PetscFunctionReturn(0); 1000 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1001 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1002 PetscFunctionReturn(0); 1003 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1004 PetscFunctionReturn(0); 1005 } 1006 } 1007 1008 if (isdraw) { 1009 PetscDraw draw; 1010 PetscBool isnull; 1011 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1012 PetscCall(PetscDrawIsNull(draw,&isnull)); 1013 if (isnull) PetscFunctionReturn(0); 1014 } 1015 1016 { 1017 /* assemble the entire matrix onto first processor. */ 1018 Mat A; 1019 Mat_SeqBAIJ *Aloc; 1020 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1021 MatScalar *a; 1022 const char *matname; 1023 1024 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1025 /* Perhaps this should be the type of mat? */ 1026 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1027 if (rank == 0) { 1028 PetscCall(MatSetSizes(A,M,N,M,N)); 1029 } else { 1030 PetscCall(MatSetSizes(A,0,0,M,N)); 1031 } 1032 PetscCall(MatSetType(A,MATMPIBAIJ)); 1033 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1034 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1035 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1036 1037 /* copy over the A part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 PetscCall(PetscMalloc1(bs,&rvals)); 1041 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = (baij->cstartbs+aj[j])*bs; 1047 for (k=0; k<bs; k++) { 1048 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 /* copy over the B part */ 1054 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1055 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1056 for (i=0; i<mbs; i++) { 1057 rvals[0] = bs*(baij->rstartbs + i); 1058 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1059 for (j=ai[i]; j<ai[i+1]; j++) { 1060 col = baij->garray[aj[j]]*bs; 1061 for (k=0; k<bs; k++) { 1062 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1063 col++; a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1095 PetscInt *rowlens,*colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs*bs*(A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; header[2] = N; header[3] = nz; 1112 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1113 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1114 1115 /* fill in and store row lengths */ 1116 PetscCall(PetscMalloc1(m,&rowlens)); 1117 for (cnt=0, i=0; i<A->mbs; i++) 1118 for (j=0; j<bs; j++) 1119 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1120 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1121 PetscCall(PetscFree(rowlens)); 1122 1123 /* fill in and store column indices */ 1124 PetscCall(PetscMalloc1(nz,&colidxs)); 1125 for (cnt=0, i=0; i<A->mbs; i++) { 1126 for (k=0; k<bs; k++) { 1127 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1128 if (garray[B->j[jb]] > cs/bs) break; 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1131 } 1132 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1133 for (l=0; l<bs; l++) 1134 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1135 for (; jb<B->i[i+1]; jb++) 1136 for (l=0; l<bs; l++) 1137 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1138 } 1139 } 1140 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1141 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1142 PetscCall(PetscFree(colidxs)); 1143 1144 /* fill in and store nonzero values */ 1145 PetscCall(PetscMalloc1(nz,&matvals)); 1146 for (cnt=0, i=0; i<A->mbs; i++) { 1147 for (k=0; k<bs; k++) { 1148 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1149 if (garray[B->j[jb]] > cs/bs) break; 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1152 } 1153 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1154 for (l=0; l<bs; l++) 1155 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1156 for (; jb<B->i[i+1]; jb++) 1157 for (l=0; l<bs; l++) 1158 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1159 } 1160 } 1161 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1162 PetscCall(PetscFree(matvals)); 1163 1164 /* write block size option to the viewer's .info file */ 1165 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscBool iascii,isdraw,issocket,isbinary; 1172 1173 PetscFunctionBegin; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1175 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1176 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1177 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1178 if (iascii || isdraw || issocket) { 1179 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1180 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1185 { 1186 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1191 #endif 1192 PetscCall(MatStashDestroy_Private(&mat->stash)); 1193 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1194 PetscCall(MatDestroy(&baij->A)); 1195 PetscCall(MatDestroy(&baij->B)); 1196 #if defined(PETSC_USE_CTABLE) 1197 PetscCall(PetscTableDestroy(&baij->colmap)); 1198 #else 1199 PetscCall(PetscFree(baij->colmap)); 1200 #endif 1201 PetscCall(PetscFree(baij->garray)); 1202 PetscCall(VecDestroy(&baij->lvec)); 1203 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1204 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1205 PetscCall(PetscFree(baij->barray)); 1206 PetscCall(PetscFree2(baij->hd,baij->ht)); 1207 PetscCall(PetscFree(baij->rangebs)); 1208 PetscCall(PetscFree(mat->data)); 1209 1210 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1220 #if defined(PETSC_HAVE_HYPRE) 1221 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1222 #endif 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx,&nt)); 1234 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy,&nt)); 1236 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A,aa)); 1304 PetscCall(MatScale(a->B,aa)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1313 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1314 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1318 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1326 PetscInt max = 1,mbs = mat->mbs,tmp; 1327 for (i=0; i<mbs; i++) { 1328 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1336 if (!v) {pvA = NULL; pvB = NULL;} 1337 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1338 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i=0; i<nzB; i++) { 1350 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1355 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i=0; i<imark; i++) { 1361 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1362 } 1363 } else { 1364 for (i=0; i<nzB; i++) { 1365 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1366 else break; 1367 } 1368 imark = i; 1369 } 1370 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1371 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1372 } 1373 } else { 1374 if (idx) *idx = NULL; 1375 if (v) *v = NULL; 1376 } 1377 } 1378 *nz = nztot; 1379 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1380 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1387 1388 PetscFunctionBegin; 1389 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1390 baij->getrowactive = PETSC_FALSE; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatZeroEntries(l->A)); 1400 PetscCall(MatZeroEntries(l->B)); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1407 Mat A = a->A,B = a->B; 1408 PetscLogDouble isend[5],irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (PetscReal)matin->rmap->bs; 1412 1413 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1414 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; isend[4] = info->mallocs; 1417 1418 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1419 1420 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1421 isend[3] += info->memory; isend[4] += info->mallocs; 1422 1423 if (flag == MAT_LOCAL) { 1424 info->nz_used = isend[0]; 1425 info->nz_allocated = isend[1]; 1426 info->nz_unneeded = isend[2]; 1427 info->memory = isend[3]; 1428 info->mallocs = isend[4]; 1429 } else if (flag == MAT_GLOBAL_MAX) { 1430 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1431 1432 info->nz_used = irecv[0]; 1433 info->nz_allocated = irecv[1]; 1434 info->nz_unneeded = irecv[2]; 1435 info->memory = irecv[3]; 1436 info->mallocs = irecv[4]; 1437 } else if (flag == MAT_GLOBAL_SUM) { 1438 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1446 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1447 info->fill_ratio_needed = 0; 1448 info->factor_mallocs = 0; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1453 { 1454 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1455 1456 PetscFunctionBegin; 1457 switch (op) { 1458 case MAT_NEW_NONZERO_LOCATIONS: 1459 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1460 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1461 case MAT_KEEP_NONZERO_PATTERN: 1462 case MAT_NEW_NONZERO_LOCATION_ERR: 1463 MatCheckPreallocated(A,1); 1464 PetscCall(MatSetOption(a->A,op,flg)); 1465 PetscCall(MatSetOption(a->B,op,flg)); 1466 break; 1467 case MAT_ROW_ORIENTED: 1468 MatCheckPreallocated(A,1); 1469 a->roworiented = flg; 1470 1471 PetscCall(MatSetOption(a->A,op,flg)); 1472 PetscCall(MatSetOption(a->B,op,flg)); 1473 break; 1474 case MAT_FORCE_DIAGONAL_ENTRIES: 1475 case MAT_SORTED_FULL: 1476 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SUBMAT_SINGLEIS: 1489 case MAT_SYMMETRY_ETERNAL: 1490 MatCheckPreallocated(A,1); 1491 PetscCall(MatSetOption(a->A,op,flg)); 1492 break; 1493 default: 1494 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1495 } 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1500 { 1501 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1502 Mat_SeqBAIJ *Aloc; 1503 Mat B; 1504 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1505 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1506 MatScalar *a; 1507 1508 PetscFunctionBegin; 1509 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1510 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1511 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1512 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1513 /* Do not know preallocation information, but must set block size */ 1514 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1515 } else { 1516 B = *matout; 1517 } 1518 1519 /* copy over the A part */ 1520 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1521 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1522 PetscCall(PetscMalloc1(bs,&rvals)); 1523 1524 for (i=0; i<mbs; i++) { 1525 rvals[0] = bs*(baij->rstartbs + i); 1526 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1527 for (j=ai[i]; j<ai[i+1]; j++) { 1528 col = (baij->cstartbs+aj[j])*bs; 1529 for (k=0; k<bs; k++) { 1530 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1531 1532 col++; a += bs; 1533 } 1534 } 1535 } 1536 /* copy over the B part */ 1537 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1538 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1539 for (i=0; i<mbs; i++) { 1540 rvals[0] = bs*(baij->rstartbs + i); 1541 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1542 for (j=ai[i]; j<ai[i+1]; j++) { 1543 col = baij->garray[aj[j]]*bs; 1544 for (k=0; k<bs; k++) { 1545 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1546 col++; 1547 a += bs; 1548 } 1549 } 1550 } 1551 PetscCall(PetscFree(rvals)); 1552 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1553 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1554 1555 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1556 else { 1557 PetscCall(MatHeaderMerge(A,&B)); 1558 } 1559 PetscFunctionReturn(0); 1560 } 1561 1562 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1563 { 1564 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1565 Mat a = baij->A,b = baij->B; 1566 PetscInt s1,s2,s3; 1567 1568 PetscFunctionBegin; 1569 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1570 if (rr) { 1571 PetscCall(VecGetLocalSize(rr,&s1)); 1572 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1573 /* Overlap communication with computation. */ 1574 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1575 } 1576 if (ll) { 1577 PetscCall(VecGetLocalSize(ll,&s1)); 1578 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1579 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1580 } 1581 /* scale the diagonal block */ 1582 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1583 1584 if (rr) { 1585 /* Do a scatter end and then right scale the off-diagonal block */ 1586 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1587 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1588 } 1589 PetscFunctionReturn(0); 1590 } 1591 1592 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1593 { 1594 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1595 PetscInt *lrows; 1596 PetscInt r, len; 1597 PetscBool cong; 1598 1599 PetscFunctionBegin; 1600 /* get locally owned rows */ 1601 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1602 /* fix right hand side if needed */ 1603 if (x && b) { 1604 const PetscScalar *xx; 1605 PetscScalar *bb; 1606 1607 PetscCall(VecGetArrayRead(x,&xx)); 1608 PetscCall(VecGetArray(b,&bb)); 1609 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1610 PetscCall(VecRestoreArrayRead(x,&xx)); 1611 PetscCall(VecRestoreArray(b,&bb)); 1612 } 1613 1614 /* actually zap the local rows */ 1615 /* 1616 Zero the required rows. If the "diagonal block" of the matrix 1617 is square and the user wishes to set the diagonal we use separate 1618 code so that MatSetValues() is not called for each diagonal allocating 1619 new memory, thus calling lots of mallocs and slowing things down. 1620 1621 */ 1622 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1623 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1624 PetscCall(MatHasCongruentLayouts(A,&cong)); 1625 if ((diag != 0.0) && cong) { 1626 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1627 } else if (diag != 0.0) { 1628 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1629 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1630 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1631 for (r = 0; r < len; ++r) { 1632 const PetscInt row = lrows[r] + A->rmap->rstart; 1633 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1634 } 1635 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1636 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1637 } else { 1638 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1639 } 1640 PetscCall(PetscFree(lrows)); 1641 1642 /* only change matrix nonzero state if pattern was allowed to be changed */ 1643 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1644 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1645 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1651 { 1652 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1653 PetscMPIInt n = A->rmap->n,p = 0; 1654 PetscInt i,j,k,r,len = 0,row,col,count; 1655 PetscInt *lrows,*owners = A->rmap->range; 1656 PetscSFNode *rrows; 1657 PetscSF sf; 1658 const PetscScalar *xx; 1659 PetscScalar *bb,*mask; 1660 Vec xmask,lmask; 1661 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1662 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1663 PetscScalar *aa; 1664 1665 PetscFunctionBegin; 1666 /* Create SF where leaves are input rows and roots are owned rows */ 1667 PetscCall(PetscMalloc1(n, &lrows)); 1668 for (r = 0; r < n; ++r) lrows[r] = -1; 1669 PetscCall(PetscMalloc1(N, &rrows)); 1670 for (r = 0; r < N; ++r) { 1671 const PetscInt idx = rows[r]; 1672 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1673 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1674 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1675 } 1676 rrows[r].rank = p; 1677 rrows[r].index = rows[r] - owners[p]; 1678 } 1679 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1680 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1681 /* Collect flags for rows to be zeroed */ 1682 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1683 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1684 PetscCall(PetscSFDestroy(&sf)); 1685 /* Compress and put in row numbers */ 1686 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1687 /* zero diagonal part of matrix */ 1688 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1689 /* handle off diagonal part of matrix */ 1690 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1691 PetscCall(VecDuplicate(l->lvec,&lmask)); 1692 PetscCall(VecGetArray(xmask,&bb)); 1693 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1694 PetscCall(VecRestoreArray(xmask,&bb)); 1695 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1696 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1697 PetscCall(VecDestroy(&xmask)); 1698 if (x) { 1699 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1700 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1701 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1702 PetscCall(VecGetArray(b,&bb)); 1703 } 1704 PetscCall(VecGetArray(lmask,&mask)); 1705 /* remove zeroed rows of off diagonal matrix */ 1706 for (i = 0; i < len; ++i) { 1707 row = lrows[i]; 1708 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1709 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1710 for (k = 0; k < count; ++k) { 1711 aa[0] = 0.0; 1712 aa += bs; 1713 } 1714 } 1715 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1716 for (i = 0; i < l->B->rmap->N; ++i) { 1717 row = i/bs; 1718 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1719 for (k = 0; k < bs; ++k) { 1720 col = bs*baij->j[j] + k; 1721 if (PetscAbsScalar(mask[col])) { 1722 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1723 if (x) bb[i] -= aa[0]*xx[col]; 1724 aa[0] = 0.0; 1725 } 1726 } 1727 } 1728 } 1729 if (x) { 1730 PetscCall(VecRestoreArray(b,&bb)); 1731 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1732 } 1733 PetscCall(VecRestoreArray(lmask,&mask)); 1734 PetscCall(VecDestroy(&lmask)); 1735 PetscCall(PetscFree(lrows)); 1736 1737 /* only change matrix nonzero state if pattern was allowed to be changed */ 1738 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1739 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1740 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1741 } 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1746 { 1747 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1748 1749 PetscFunctionBegin; 1750 PetscCall(MatSetUnfactored(a->A)); 1751 PetscFunctionReturn(0); 1752 } 1753 1754 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1755 1756 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1757 { 1758 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1759 Mat a,b,c,d; 1760 PetscBool flg; 1761 1762 PetscFunctionBegin; 1763 a = matA->A; b = matA->B; 1764 c = matB->A; d = matB->B; 1765 1766 PetscCall(MatEqual(a,c,&flg)); 1767 if (flg) { 1768 PetscCall(MatEqual(b,d,&flg)); 1769 } 1770 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1775 { 1776 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1777 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1778 1779 PetscFunctionBegin; 1780 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1781 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1782 PetscCall(MatCopy_Basic(A,B,str)); 1783 } else { 1784 PetscCall(MatCopy(a->A,b->A,str)); 1785 PetscCall(MatCopy(a->B,b->B,str)); 1786 } 1787 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1792 { 1793 PetscFunctionBegin; 1794 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1799 { 1800 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1801 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1802 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1803 1804 PetscFunctionBegin; 1805 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1810 { 1811 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1812 PetscBLASInt bnz,one=1; 1813 Mat_SeqBAIJ *x,*y; 1814 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1815 1816 PetscFunctionBegin; 1817 if (str == SAME_NONZERO_PATTERN) { 1818 PetscScalar alpha = a; 1819 x = (Mat_SeqBAIJ*)xx->A->data; 1820 y = (Mat_SeqBAIJ*)yy->A->data; 1821 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1822 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1823 x = (Mat_SeqBAIJ*)xx->B->data; 1824 y = (Mat_SeqBAIJ*)yy->B->data; 1825 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1826 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1827 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1828 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1829 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1830 } else { 1831 Mat B; 1832 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1833 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1834 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1835 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1836 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1837 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1838 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1839 PetscCall(MatSetType(B,MATMPIBAIJ)); 1840 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1841 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1842 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1843 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1844 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1845 PetscCall(MatHeaderMerge(Y,&B)); 1846 PetscCall(PetscFree(nnz_d)); 1847 PetscCall(PetscFree(nnz_o)); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1853 { 1854 PetscFunctionBegin; 1855 if (PetscDefined(USE_COMPLEX)) { 1856 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1857 1858 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1859 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1865 { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1867 1868 PetscFunctionBegin; 1869 PetscCall(MatRealPart(a->A)); 1870 PetscCall(MatRealPart(a->B)); 1871 PetscFunctionReturn(0); 1872 } 1873 1874 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1875 { 1876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1877 1878 PetscFunctionBegin; 1879 PetscCall(MatImaginaryPart(a->A)); 1880 PetscCall(MatImaginaryPart(a->B)); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1885 { 1886 IS iscol_local; 1887 PetscInt csize; 1888 1889 PetscFunctionBegin; 1890 PetscCall(ISGetLocalSize(iscol,&csize)); 1891 if (call == MAT_REUSE_MATRIX) { 1892 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1893 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1894 } else { 1895 PetscCall(ISAllGather(iscol,&iscol_local)); 1896 } 1897 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1900 PetscCall(ISDestroy(&iscol_local)); 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 /* 1906 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1907 in local and then by concatenating the local matrices the end result. 1908 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1909 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1910 */ 1911 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1912 { 1913 PetscMPIInt rank,size; 1914 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1915 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1916 Mat M,Mreuse; 1917 MatScalar *vwork,*aa; 1918 MPI_Comm comm; 1919 IS isrow_new, iscol_new; 1920 Mat_SeqBAIJ *aij; 1921 1922 PetscFunctionBegin; 1923 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1924 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1925 PetscCallMPI(MPI_Comm_size(comm,&size)); 1926 /* The compression and expansion should be avoided. Doesn't point 1927 out errors, might change the indices, hence buggey */ 1928 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1929 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1930 1931 if (call == MAT_REUSE_MATRIX) { 1932 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1933 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1934 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1935 } else { 1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1937 } 1938 PetscCall(ISDestroy(&isrow_new)); 1939 PetscCall(ISDestroy(&iscol_new)); 1940 /* 1941 m - number of local rows 1942 n - number of columns (same on all processors) 1943 rstart - first row in new global matrix generated 1944 */ 1945 PetscCall(MatGetBlockSize(mat,&bs)); 1946 PetscCall(MatGetSize(Mreuse,&m,&n)); 1947 m = m/bs; 1948 n = n/bs; 1949 1950 if (call == MAT_INITIAL_MATRIX) { 1951 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1952 ii = aij->i; 1953 jj = aij->j; 1954 1955 /* 1956 Determine the number of non-zeros in the diagonal and off-diagonal 1957 portions of the matrix in order to do correct preallocation 1958 */ 1959 1960 /* first get start and end of "diagonal" columns */ 1961 if (csize == PETSC_DECIDE) { 1962 PetscCall(ISGetSize(isrow,&mglobal)); 1963 if (mglobal == n*bs) { /* square matrix */ 1964 nlocal = m; 1965 } else { 1966 nlocal = n/size + ((n % size) > rank); 1967 } 1968 } else { 1969 nlocal = csize/bs; 1970 } 1971 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1972 rstart = rend - nlocal; 1973 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1974 1975 /* next, compute all the lengths */ 1976 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1977 for (i=0; i<m; i++) { 1978 jend = ii[i+1] - ii[i]; 1979 olen = 0; 1980 dlen = 0; 1981 for (j=0; j<jend; j++) { 1982 if (*jj < rstart || *jj >= rend) olen++; 1983 else dlen++; 1984 jj++; 1985 } 1986 olens[i] = olen; 1987 dlens[i] = dlen; 1988 } 1989 PetscCall(MatCreate(comm,&M)); 1990 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1991 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1992 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1993 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1994 PetscCall(PetscFree2(dlens,olens)); 1995 } else { 1996 PetscInt ml,nl; 1997 1998 M = *newmat; 1999 PetscCall(MatGetLocalSize(M,&ml,&nl)); 2000 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2001 PetscCall(MatZeroEntries(M)); 2002 /* 2003 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2004 rather than the slower MatSetValues(). 2005 */ 2006 M->was_assembled = PETSC_TRUE; 2007 M->assembled = PETSC_FALSE; 2008 } 2009 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2010 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2011 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2012 ii = aij->i; 2013 jj = aij->j; 2014 aa = aij->a; 2015 for (i=0; i<m; i++) { 2016 row = rstart/bs + i; 2017 nz = ii[i+1] - ii[i]; 2018 cwork = jj; jj += nz; 2019 vwork = aa; aa += nz*bs*bs; 2020 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2021 } 2022 2023 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2024 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2025 *newmat = M; 2026 2027 /* save submatrix used in processor for next request */ 2028 if (call == MAT_INITIAL_MATRIX) { 2029 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2030 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2036 { 2037 MPI_Comm comm,pcomm; 2038 PetscInt clocal_size,nrows; 2039 const PetscInt *rows; 2040 PetscMPIInt size; 2041 IS crowp,lcolp; 2042 2043 PetscFunctionBegin; 2044 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2045 /* make a collective version of 'rowp' */ 2046 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2047 if (pcomm==comm) { 2048 crowp = rowp; 2049 } else { 2050 PetscCall(ISGetSize(rowp,&nrows)); 2051 PetscCall(ISGetIndices(rowp,&rows)); 2052 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2053 PetscCall(ISRestoreIndices(rowp,&rows)); 2054 } 2055 PetscCall(ISSetPermutation(crowp)); 2056 /* make a local version of 'colp' */ 2057 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2058 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2059 if (size==1) { 2060 lcolp = colp; 2061 } else { 2062 PetscCall(ISAllGather(colp,&lcolp)); 2063 } 2064 PetscCall(ISSetPermutation(lcolp)); 2065 /* now we just get the submatrix */ 2066 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2067 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2068 /* clean up */ 2069 if (pcomm!=comm) { 2070 PetscCall(ISDestroy(&crowp)); 2071 } 2072 if (size>1) { 2073 PetscCall(ISDestroy(&lcolp)); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2079 { 2080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2081 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2082 2083 PetscFunctionBegin; 2084 if (nghosts) *nghosts = B->nbs; 2085 if (ghosts) *ghosts = baij->garray; 2086 PetscFunctionReturn(0); 2087 } 2088 2089 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2090 { 2091 Mat B; 2092 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2093 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2094 Mat_SeqAIJ *b; 2095 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2096 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2097 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2098 2099 PetscFunctionBegin; 2100 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2101 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2102 2103 /* ---------------------------------------------------------------- 2104 Tell every processor the number of nonzeros per row 2105 */ 2106 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2107 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2108 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2109 } 2110 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2111 displs = recvcounts + size; 2112 for (i=0; i<size; i++) { 2113 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2114 displs[i] = A->rmap->range[i]/bs; 2115 } 2116 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2117 /* --------------------------------------------------------------- 2118 Create the sequential matrix of the same type as the local block diagonal 2119 */ 2120 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2121 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2122 PetscCall(MatSetType(B,MATSEQAIJ)); 2123 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2124 b = (Mat_SeqAIJ*)B->data; 2125 2126 /*-------------------------------------------------------------------- 2127 Copy my part of matrix column indices over 2128 */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2134 cnt = 0; 2135 for (i=0; i<n; i++) { 2136 2137 /* put in lower diagonal portion */ 2138 m = bd->i[i+1] - bd->i[i]; 2139 while (m > 0) { 2140 /* is it above diagonal (in bd (compressed) numbering) */ 2141 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2142 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2143 m--; 2144 } 2145 2146 /* put in diagonal portion */ 2147 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2148 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2149 } 2150 2151 /* put in upper diagonal portion */ 2152 while (m-- > 0) { 2153 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2154 } 2155 } 2156 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2157 2158 /*-------------------------------------------------------------------- 2159 Gather all column indices to all processors 2160 */ 2161 for (i=0; i<size; i++) { 2162 recvcounts[i] = 0; 2163 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2164 recvcounts[i] += lens[j]; 2165 } 2166 } 2167 displs[0] = 0; 2168 for (i=1; i<size; i++) { 2169 displs[i] = displs[i-1] + recvcounts[i-1]; 2170 } 2171 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2172 /*-------------------------------------------------------------------- 2173 Assemble the matrix into useable form (note numerical values not yet set) 2174 */ 2175 /* set the b->ilen (length of each row) values */ 2176 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2177 /* set the b->i indices */ 2178 b->i[0] = 0; 2179 for (i=1; i<=A->rmap->N/bs; i++) { 2180 b->i[i] = b->i[i-1] + lens[i-1]; 2181 } 2182 PetscCall(PetscFree(lens)); 2183 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2184 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2185 PetscCall(PetscFree(recvcounts)); 2186 2187 if (A->symmetric) PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 2188 else if (A->hermitian) PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE)); 2189 else if (A->structurally_symmetric) PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 2190 *newmat = B; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2195 { 2196 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2197 Vec bb1 = NULL; 2198 2199 PetscFunctionBegin; 2200 if (flag == SOR_APPLY_UPPER) { 2201 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall(VecDuplicate(bb,&bb1)); 2207 } 2208 2209 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2210 if (flag & SOR_ZERO_INITIAL_GUESS) { 2211 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2212 its--; 2213 } 2214 2215 while (its--) { 2216 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2217 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2218 2219 /* update rhs: bb1 = bb - B*x */ 2220 PetscCall(VecScale(mat->lvec,-1.0)); 2221 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2222 2223 /* local sweep */ 2224 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2225 } 2226 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2227 if (flag & SOR_ZERO_INITIAL_GUESS) { 2228 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2229 its--; 2230 } 2231 while (its--) { 2232 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2233 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2234 2235 /* update rhs: bb1 = bb - B*x */ 2236 PetscCall(VecScale(mat->lvec,-1.0)); 2237 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2238 2239 /* local sweep */ 2240 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2241 } 2242 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2243 if (flag & SOR_ZERO_INITIAL_GUESS) { 2244 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2245 its--; 2246 } 2247 while (its--) { 2248 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2249 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2250 2251 /* update rhs: bb1 = bb - B*x */ 2252 PetscCall(VecScale(mat->lvec,-1.0)); 2253 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2254 2255 /* local sweep */ 2256 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2257 } 2258 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2259 2260 PetscCall(VecDestroy(&bb1)); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2265 { 2266 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2267 PetscInt m,N,i,*garray = aij->garray; 2268 PetscInt ib,jb,bs = A->rmap->bs; 2269 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2270 MatScalar *a_val = a_aij->a; 2271 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2272 MatScalar *b_val = b_aij->a; 2273 PetscReal *work; 2274 2275 PetscFunctionBegin; 2276 PetscCall(MatGetSize(A,&m,&N)); 2277 PetscCall(PetscCalloc1(N,&work)); 2278 if (type == NORM_2) { 2279 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2280 for (jb=0; jb<bs; jb++) { 2281 for (ib=0; ib<bs; ib++) { 2282 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2283 a_val++; 2284 } 2285 } 2286 } 2287 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2288 for (jb=0; jb<bs; jb++) { 2289 for (ib=0; ib<bs; ib++) { 2290 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2291 b_val++; 2292 } 2293 } 2294 } 2295 } else if (type == NORM_1) { 2296 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2297 for (jb=0; jb<bs; jb++) { 2298 for (ib=0; ib<bs; ib++) { 2299 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2300 a_val++; 2301 } 2302 } 2303 } 2304 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2305 for (jb=0; jb<bs; jb++) { 2306 for (ib=0; ib<bs; ib++) { 2307 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == NORM_INFINITY) { 2313 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2314 for (jb=0; jb<bs; jb++) { 2315 for (ib=0; ib<bs; ib++) { 2316 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2317 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2318 a_val++; 2319 } 2320 } 2321 } 2322 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2323 for (jb=0; jb<bs; jb++) { 2324 for (ib=0; ib<bs; ib++) { 2325 int col = garray[b_aij->j[i]] * bs + jb; 2326 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2327 b_val++; 2328 } 2329 } 2330 } 2331 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2332 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2333 for (jb=0; jb<bs; jb++) { 2334 for (ib=0; ib<bs; ib++) { 2335 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2336 a_val++; 2337 } 2338 } 2339 } 2340 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2341 for (jb=0; jb<bs; jb++) { 2342 for (ib=0; ib<bs; ib++) { 2343 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2344 b_val++; 2345 } 2346 } 2347 } 2348 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2349 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2350 for (jb=0; jb<bs; jb++) { 2351 for (ib=0; ib<bs; ib++) { 2352 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2353 a_val++; 2354 } 2355 } 2356 } 2357 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2358 for (jb=0; jb<bs; jb++) { 2359 for (ib=0; ib<bs; ib++) { 2360 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2361 b_val++; 2362 } 2363 } 2364 } 2365 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2366 if (type == NORM_INFINITY) { 2367 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2368 } else { 2369 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2370 } 2371 PetscCall(PetscFree(work)); 2372 if (type == NORM_2) { 2373 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2374 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2375 for (i=0; i<N; i++) reductions[i] /= m; 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2381 { 2382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2383 2384 PetscFunctionBegin; 2385 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2386 A->factorerrortype = a->A->factorerrortype; 2387 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2388 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2393 { 2394 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2395 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2396 2397 PetscFunctionBegin; 2398 if (!Y->preallocated) { 2399 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2400 } else if (!aij->nz) { 2401 PetscInt nonew = aij->nonew; 2402 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2403 aij->nonew = nonew; 2404 } 2405 PetscCall(MatShift_Basic(Y,a)); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2410 { 2411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2412 2413 PetscFunctionBegin; 2414 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2415 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2416 if (d) { 2417 PetscInt rstart; 2418 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2419 *d += rstart/A->rmap->bs; 2420 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2426 { 2427 PetscFunctionBegin; 2428 *a = ((Mat_MPIBAIJ*)A->data)->A; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* -------------------------------------------------------------------*/ 2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2434 MatGetRow_MPIBAIJ, 2435 MatRestoreRow_MPIBAIJ, 2436 MatMult_MPIBAIJ, 2437 /* 4*/ MatMultAdd_MPIBAIJ, 2438 MatMultTranspose_MPIBAIJ, 2439 MatMultTransposeAdd_MPIBAIJ, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*10*/ NULL, 2444 NULL, 2445 NULL, 2446 MatSOR_MPIBAIJ, 2447 MatTranspose_MPIBAIJ, 2448 /*15*/ MatGetInfo_MPIBAIJ, 2449 MatEqual_MPIBAIJ, 2450 MatGetDiagonal_MPIBAIJ, 2451 MatDiagonalScale_MPIBAIJ, 2452 MatNorm_MPIBAIJ, 2453 /*20*/ MatAssemblyBegin_MPIBAIJ, 2454 MatAssemblyEnd_MPIBAIJ, 2455 MatSetOption_MPIBAIJ, 2456 MatZeroEntries_MPIBAIJ, 2457 /*24*/ MatZeroRows_MPIBAIJ, 2458 NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 /*29*/ MatSetUp_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 MatGetDiagonalBlock_MPIBAIJ, 2466 NULL, 2467 /*34*/ MatDuplicate_MPIBAIJ, 2468 NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 /*39*/ MatAXPY_MPIBAIJ, 2473 MatCreateSubMatrices_MPIBAIJ, 2474 MatIncreaseOverlap_MPIBAIJ, 2475 MatGetValues_MPIBAIJ, 2476 MatCopy_MPIBAIJ, 2477 /*44*/ NULL, 2478 MatScale_MPIBAIJ, 2479 MatShift_MPIBAIJ, 2480 NULL, 2481 MatZeroRowsColumns_MPIBAIJ, 2482 /*49*/ NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 /*54*/ MatFDColoringCreate_MPIXAIJ, 2488 NULL, 2489 MatSetUnfactored_MPIBAIJ, 2490 MatPermute_MPIBAIJ, 2491 MatSetValuesBlocked_MPIBAIJ, 2492 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2493 MatDestroy_MPIBAIJ, 2494 MatView_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 /*64*/ NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*74*/ NULL, 2508 MatFDColoringApply_BAIJ, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*79*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 MatLoad_MPIBAIJ, 2517 /*84*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*89*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*94*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 /*99*/ NULL, 2533 NULL, 2534 NULL, 2535 MatConjugate_MPIBAIJ, 2536 NULL, 2537 /*104*/NULL, 2538 MatRealPart_MPIBAIJ, 2539 MatImaginaryPart_MPIBAIJ, 2540 NULL, 2541 NULL, 2542 /*109*/NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatMissingDiagonal_MPIBAIJ, 2547 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2548 NULL, 2549 MatGetGhosts_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 /*119*/NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 MatGetMultiProcBlock_MPIBAIJ, 2557 /*124*/NULL, 2558 MatGetColumnReductions_MPIBAIJ, 2559 MatInvertBlockDiagonal_MPIBAIJ, 2560 NULL, 2561 NULL, 2562 /*129*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*134*/ NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*139*/ MatSetBlockSizes_Default, 2573 NULL, 2574 NULL, 2575 MatFDColoringSetUp_MPIXAIJ, 2576 NULL, 2577 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2578 NULL, 2579 NULL, 2580 NULL, 2581 NULL, 2582 NULL 2583 }; 2584 2585 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2586 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2587 2588 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2589 { 2590 PetscInt m,rstart,cstart,cend; 2591 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2592 const PetscInt *JJ =NULL; 2593 PetscScalar *values=NULL; 2594 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2595 PetscBool nooffprocentries; 2596 2597 PetscFunctionBegin; 2598 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2599 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2600 PetscCall(PetscLayoutSetUp(B->rmap)); 2601 PetscCall(PetscLayoutSetUp(B->cmap)); 2602 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2603 m = B->rmap->n/bs; 2604 rstart = B->rmap->rstart/bs; 2605 cstart = B->cmap->rstart/bs; 2606 cend = B->cmap->rend/bs; 2607 2608 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2609 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2610 for (i=0; i<m; i++) { 2611 nz = ii[i+1] - ii[i]; 2612 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2613 nz_max = PetscMax(nz_max,nz); 2614 dlen = 0; 2615 olen = 0; 2616 JJ = jj + ii[i]; 2617 for (j=0; j<nz; j++) { 2618 if (*JJ < cstart || *JJ >= cend) olen++; 2619 else dlen++; 2620 JJ++; 2621 } 2622 d_nnz[i] = dlen; 2623 o_nnz[i] = olen; 2624 } 2625 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2626 PetscCall(PetscFree2(d_nnz,o_nnz)); 2627 2628 values = (PetscScalar*)V; 2629 if (!values) { 2630 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2631 } 2632 for (i=0; i<m; i++) { 2633 PetscInt row = i + rstart; 2634 PetscInt ncols = ii[i+1] - ii[i]; 2635 const PetscInt *icols = jj + ii[i]; 2636 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2637 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2638 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2639 } else { /* block ordering does not match so we can only insert one block at a time. */ 2640 PetscInt j; 2641 for (j=0; j<ncols; j++) { 2642 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2643 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2644 } 2645 } 2646 } 2647 2648 if (!V) PetscCall(PetscFree(values)); 2649 nooffprocentries = B->nooffprocentries; 2650 B->nooffprocentries = PETSC_TRUE; 2651 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2652 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2653 B->nooffprocentries = nooffprocentries; 2654 2655 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2656 PetscFunctionReturn(0); 2657 } 2658 2659 /*@C 2660 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2661 2662 Collective 2663 2664 Input Parameters: 2665 + B - the matrix 2666 . bs - the block size 2667 . i - the indices into j for the start of each local row (starts with zero) 2668 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2669 - v - optional values in the matrix 2670 2671 Level: advanced 2672 2673 Notes: 2674 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2675 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2676 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2677 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2678 block column and the second index is over columns within a block. 2679 2680 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2681 2682 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2683 @*/ 2684 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2685 { 2686 PetscFunctionBegin; 2687 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2688 PetscValidType(B,1); 2689 PetscValidLogicalCollectiveInt(B,bs,2); 2690 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2695 { 2696 Mat_MPIBAIJ *b; 2697 PetscInt i; 2698 PetscMPIInt size; 2699 2700 PetscFunctionBegin; 2701 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2702 PetscCall(PetscLayoutSetUp(B->rmap)); 2703 PetscCall(PetscLayoutSetUp(B->cmap)); 2704 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2705 2706 if (d_nnz) { 2707 for (i=0; i<B->rmap->n/bs; i++) { 2708 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2709 } 2710 } 2711 if (o_nnz) { 2712 for (i=0; i<B->rmap->n/bs; i++) { 2713 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2714 } 2715 } 2716 2717 b = (Mat_MPIBAIJ*)B->data; 2718 b->bs2 = bs*bs; 2719 b->mbs = B->rmap->n/bs; 2720 b->nbs = B->cmap->n/bs; 2721 b->Mbs = B->rmap->N/bs; 2722 b->Nbs = B->cmap->N/bs; 2723 2724 for (i=0; i<=b->size; i++) { 2725 b->rangebs[i] = B->rmap->range[i]/bs; 2726 } 2727 b->rstartbs = B->rmap->rstart/bs; 2728 b->rendbs = B->rmap->rend/bs; 2729 b->cstartbs = B->cmap->rstart/bs; 2730 b->cendbs = B->cmap->rend/bs; 2731 2732 #if defined(PETSC_USE_CTABLE) 2733 PetscCall(PetscTableDestroy(&b->colmap)); 2734 #else 2735 PetscCall(PetscFree(b->colmap)); 2736 #endif 2737 PetscCall(PetscFree(b->garray)); 2738 PetscCall(VecDestroy(&b->lvec)); 2739 PetscCall(VecScatterDestroy(&b->Mvctx)); 2740 2741 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2742 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2743 PetscCall(MatDestroy(&b->B)); 2744 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2745 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2746 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2747 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2748 2749 if (!B->preallocated) { 2750 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2751 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2752 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2753 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2754 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2755 } 2756 2757 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2758 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2759 B->preallocated = PETSC_TRUE; 2760 B->was_assembled = PETSC_FALSE; 2761 B->assembled = PETSC_FALSE; 2762 PetscFunctionReturn(0); 2763 } 2764 2765 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2766 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2767 2768 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2769 { 2770 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2771 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2772 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2773 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2774 2775 PetscFunctionBegin; 2776 PetscCall(PetscMalloc1(M+1,&ii)); 2777 ii[0] = 0; 2778 for (i=0; i<M; i++) { 2779 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2780 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2781 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2782 /* remove one from count of matrix has diagonal */ 2783 for (j=id[i]; j<id[i+1]; j++) { 2784 if (jd[j] == i) {ii[i+1]--;break;} 2785 } 2786 } 2787 PetscCall(PetscMalloc1(ii[M],&jj)); 2788 cnt = 0; 2789 for (i=0; i<M; i++) { 2790 for (j=io[i]; j<io[i+1]; j++) { 2791 if (garray[jo[j]] > rstart) break; 2792 jj[cnt++] = garray[jo[j]]; 2793 } 2794 for (k=id[i]; k<id[i+1]; k++) { 2795 if (jd[k] != i) { 2796 jj[cnt++] = rstart + jd[k]; 2797 } 2798 } 2799 for (; j<io[i+1]; j++) { 2800 jj[cnt++] = garray[jo[j]]; 2801 } 2802 } 2803 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2808 2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2810 2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2812 { 2813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2814 Mat_MPIAIJ *b; 2815 Mat B; 2816 2817 PetscFunctionBegin; 2818 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2819 2820 if (reuse == MAT_REUSE_MATRIX) { 2821 B = *newmat; 2822 } else { 2823 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2824 PetscCall(MatSetType(B,MATMPIAIJ)); 2825 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2826 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2827 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2828 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2829 } 2830 b = (Mat_MPIAIJ*) B->data; 2831 2832 if (reuse == MAT_REUSE_MATRIX) { 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2835 } else { 2836 PetscCall(MatDestroy(&b->A)); 2837 PetscCall(MatDestroy(&b->B)); 2838 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2839 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2840 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2841 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2842 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2843 } 2844 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2845 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2846 2847 if (reuse == MAT_INPLACE_MATRIX) { 2848 PetscCall(MatHeaderReplace(A,&B)); 2849 } else { 2850 *newmat = B; 2851 } 2852 PetscFunctionReturn(0); 2853 } 2854 2855 /*MC 2856 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2857 2858 Options Database Keys: 2859 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2860 . -mat_block_size <bs> - set the blocksize used to store the matrix 2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2862 - -mat_use_hash_table <fact> - set hash table factor 2863 2864 Level: beginner 2865 2866 Notes: 2867 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2868 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2869 2870 .seealso: `MatCreateBAIJ` 2871 M*/ 2872 2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2874 2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2876 { 2877 Mat_MPIBAIJ *b; 2878 PetscBool flg = PETSC_FALSE; 2879 2880 PetscFunctionBegin; 2881 PetscCall(PetscNewLog(B,&b)); 2882 B->data = (void*)b; 2883 2884 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2885 B->assembled = PETSC_FALSE; 2886 2887 B->insertmode = NOT_SET_VALUES; 2888 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2889 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2890 2891 /* build local table of row and column ownerships */ 2892 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2893 2894 /* build cache for off array entries formed */ 2895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2896 2897 b->donotstash = PETSC_FALSE; 2898 b->colmap = NULL; 2899 b->garray = NULL; 2900 b->roworiented = PETSC_TRUE; 2901 2902 /* stuff used in block assembly */ 2903 b->barray = NULL; 2904 2905 /* stuff used for matrix vector multiply */ 2906 b->lvec = NULL; 2907 b->Mvctx = NULL; 2908 2909 /* stuff for MatGetRow() */ 2910 b->rowindices = NULL; 2911 b->rowvalues = NULL; 2912 b->getrowactive = PETSC_FALSE; 2913 2914 /* hash table stuff */ 2915 b->ht = NULL; 2916 b->hd = NULL; 2917 b->ht_size = 0; 2918 b->ht_flag = PETSC_FALSE; 2919 b->ht_fact = 0; 2920 b->ht_total_ct = 0; 2921 b->ht_insert_ct = 0; 2922 2923 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2924 b->ijonly = PETSC_FALSE; 2925 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2929 #if defined(PETSC_HAVE_HYPRE) 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2931 #endif 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2939 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2940 2941 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2942 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2943 if (flg) { 2944 PetscReal fact = 1.39; 2945 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2946 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2947 if (fact <= 1.0) fact = 1.39; 2948 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2949 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2950 } 2951 PetscOptionsEnd(); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 /*MC 2956 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2957 2958 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2959 and MATMPIBAIJ otherwise. 2960 2961 Options Database Keys: 2962 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2963 2964 Level: beginner 2965 2966 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2967 M*/ 2968 2969 /*@C 2970 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2971 (block compressed row). For good matrix assembly performance 2972 the user should preallocate the matrix storage by setting the parameters 2973 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2974 performance can be increased by more than a factor of 50. 2975 2976 Collective on Mat 2977 2978 Input Parameters: 2979 + B - the matrix 2980 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2981 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2982 . d_nz - number of block nonzeros per block row in diagonal portion of local 2983 submatrix (same for all local rows) 2984 . d_nnz - array containing the number of block nonzeros in the various block rows 2985 of the in diagonal portion of the local (possibly different for each block 2986 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2987 set it even if it is zero. 2988 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2989 submatrix (same for all local rows). 2990 - o_nnz - array containing the number of nonzeros in the various block rows of the 2991 off-diagonal portion of the local submatrix (possibly different for 2992 each block row) or NULL. 2993 2994 If the *_nnz parameter is given then the *_nz parameter is ignored 2995 2996 Options Database Keys: 2997 + -mat_block_size - size of the blocks to use 2998 - -mat_use_hash_table <fact> - set hash table factor 2999 3000 Notes: 3001 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3002 than it must be used on all processors that share the object for that argument. 3003 3004 Storage Information: 3005 For a square global matrix we define each processor's diagonal portion 3006 to be its local rows and the corresponding columns (a square submatrix); 3007 each processor's off-diagonal portion encompasses the remainder of the 3008 local matrix (a rectangular submatrix). 3009 3010 The user can specify preallocated storage for the diagonal part of 3011 the local submatrix with either d_nz or d_nnz (not both). Set 3012 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3013 memory allocation. Likewise, specify preallocated storage for the 3014 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3015 3016 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3017 the figure below we depict these three local rows and all columns (0-11). 3018 3019 .vb 3020 0 1 2 3 4 5 6 7 8 9 10 11 3021 -------------------------- 3022 row 3 |o o o d d d o o o o o o 3023 row 4 |o o o d d d o o o o o o 3024 row 5 |o o o d d d o o o o o o 3025 -------------------------- 3026 .ve 3027 3028 Thus, any entries in the d locations are stored in the d (diagonal) 3029 submatrix, and any entries in the o locations are stored in the 3030 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3031 stored simply in the MATSEQBAIJ format for compressed row storage. 3032 3033 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3034 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3035 In general, for PDE problems in which most nonzeros are near the diagonal, 3036 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3037 or you will get TERRIBLE performance; see the users' manual chapter on 3038 matrices. 3039 3040 You can call MatGetInfo() to get information on how effective the preallocation was; 3041 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3042 You can also run with the option -info and look for messages with the string 3043 malloc in them to see if additional memory allocation was needed. 3044 3045 Level: intermediate 3046 3047 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3048 @*/ 3049 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3050 { 3051 PetscFunctionBegin; 3052 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3053 PetscValidType(B,1); 3054 PetscValidLogicalCollectiveInt(B,bs,2); 3055 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 /*@C 3060 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3061 (block compressed row). For good matrix assembly performance 3062 the user should preallocate the matrix storage by setting the parameters 3063 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3064 performance can be increased by more than a factor of 50. 3065 3066 Collective 3067 3068 Input Parameters: 3069 + comm - MPI communicator 3070 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3071 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3072 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3073 This value should be the same as the local size used in creating the 3074 y vector for the matrix-vector product y = Ax. 3075 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3076 This value should be the same as the local size used in creating the 3077 x vector for the matrix-vector product y = Ax. 3078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3080 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3081 submatrix (same for all local rows) 3082 . d_nnz - array containing the number of nonzero blocks in the various block rows 3083 of the in diagonal portion of the local (possibly different for each block 3084 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3085 and set it even if it is zero. 3086 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or NULL. 3091 3092 Output Parameter: 3093 . A - the matrix 3094 3095 Options Database Keys: 3096 + -mat_block_size - size of the blocks to use 3097 - -mat_use_hash_table <fact> - set hash table factor 3098 3099 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3100 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3101 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3102 3103 Notes: 3104 If the *_nnz parameter is given then the *_nz parameter is ignored 3105 3106 A nonzero block is any block that as 1 or more nonzeros in it 3107 3108 The user MUST specify either the local or global matrix dimensions 3109 (possibly both). 3110 3111 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3112 than it must be used on all processors that share the object for that argument. 3113 3114 Storage Information: 3115 For a square global matrix we define each processor's diagonal portion 3116 to be its local rows and the corresponding columns (a square submatrix); 3117 each processor's off-diagonal portion encompasses the remainder of the 3118 local matrix (a rectangular submatrix). 3119 3120 The user can specify preallocated storage for the diagonal part of 3121 the local submatrix with either d_nz or d_nnz (not both). Set 3122 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3123 memory allocation. Likewise, specify preallocated storage for the 3124 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3125 3126 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3127 the figure below we depict these three local rows and all columns (0-11). 3128 3129 .vb 3130 0 1 2 3 4 5 6 7 8 9 10 11 3131 -------------------------- 3132 row 3 |o o o d d d o o o o o o 3133 row 4 |o o o d d d o o o o o o 3134 row 5 |o o o d d d o o o o o o 3135 -------------------------- 3136 .ve 3137 3138 Thus, any entries in the d locations are stored in the d (diagonal) 3139 submatrix, and any entries in the o locations are stored in the 3140 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3141 stored simply in the MATSEQBAIJ format for compressed row storage. 3142 3143 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3144 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3145 In general, for PDE problems in which most nonzeros are near the diagonal, 3146 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3147 or you will get TERRIBLE performance; see the users' manual chapter on 3148 matrices. 3149 3150 Level: intermediate 3151 3152 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3153 @*/ 3154 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3155 { 3156 PetscMPIInt size; 3157 3158 PetscFunctionBegin; 3159 PetscCall(MatCreate(comm,A)); 3160 PetscCall(MatSetSizes(*A,m,n,M,N)); 3161 PetscCallMPI(MPI_Comm_size(comm,&size)); 3162 if (size > 1) { 3163 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3164 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3165 } else { 3166 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3167 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3168 } 3169 PetscFunctionReturn(0); 3170 } 3171 3172 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3173 { 3174 Mat mat; 3175 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3176 PetscInt len=0; 3177 3178 PetscFunctionBegin; 3179 *newmat = NULL; 3180 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3181 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3182 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3183 3184 mat->factortype = matin->factortype; 3185 mat->preallocated = PETSC_TRUE; 3186 mat->assembled = PETSC_TRUE; 3187 mat->insertmode = NOT_SET_VALUES; 3188 3189 a = (Mat_MPIBAIJ*)mat->data; 3190 mat->rmap->bs = matin->rmap->bs; 3191 a->bs2 = oldmat->bs2; 3192 a->mbs = oldmat->mbs; 3193 a->nbs = oldmat->nbs; 3194 a->Mbs = oldmat->Mbs; 3195 a->Nbs = oldmat->Nbs; 3196 3197 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3198 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3199 3200 a->size = oldmat->size; 3201 a->rank = oldmat->rank; 3202 a->donotstash = oldmat->donotstash; 3203 a->roworiented = oldmat->roworiented; 3204 a->rowindices = NULL; 3205 a->rowvalues = NULL; 3206 a->getrowactive = PETSC_FALSE; 3207 a->barray = NULL; 3208 a->rstartbs = oldmat->rstartbs; 3209 a->rendbs = oldmat->rendbs; 3210 a->cstartbs = oldmat->cstartbs; 3211 a->cendbs = oldmat->cendbs; 3212 3213 /* hash table stuff */ 3214 a->ht = NULL; 3215 a->hd = NULL; 3216 a->ht_size = 0; 3217 a->ht_flag = oldmat->ht_flag; 3218 a->ht_fact = oldmat->ht_fact; 3219 a->ht_total_ct = 0; 3220 a->ht_insert_ct = 0; 3221 3222 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3223 if (oldmat->colmap) { 3224 #if defined(PETSC_USE_CTABLE) 3225 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3226 #else 3227 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3228 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3229 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3230 #endif 3231 } else a->colmap = NULL; 3232 3233 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3234 PetscCall(PetscMalloc1(len,&a->garray)); 3235 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3236 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3237 } else a->garray = NULL; 3238 3239 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3240 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3241 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3242 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3243 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3244 3245 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3246 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3247 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3248 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3249 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3250 *newmat = mat; 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3255 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3256 { 3257 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3258 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3259 PetscScalar *matvals; 3260 3261 PetscFunctionBegin; 3262 PetscCall(PetscViewerSetUp(viewer)); 3263 3264 /* read in matrix header */ 3265 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3266 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3267 M = header[1]; N = header[2]; nz = header[3]; 3268 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3269 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3270 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3271 3272 /* set block sizes from the viewer's .info file */ 3273 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3274 /* set local sizes if not set already */ 3275 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3276 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3277 /* set global sizes if not set already */ 3278 if (mat->rmap->N < 0) mat->rmap->N = M; 3279 if (mat->cmap->N < 0) mat->cmap->N = N; 3280 PetscCall(PetscLayoutSetUp(mat->rmap)); 3281 PetscCall(PetscLayoutSetUp(mat->cmap)); 3282 3283 /* check if the matrix sizes are correct */ 3284 PetscCall(MatGetSize(mat,&rows,&cols)); 3285 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3286 PetscCall(MatGetBlockSize(mat,&bs)); 3287 PetscCall(MatGetLocalSize(mat,&m,&n)); 3288 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3289 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3290 mbs = m/bs; nbs = n/bs; 3291 3292 /* read in row lengths and build row indices */ 3293 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3294 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3295 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3296 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3297 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3298 3299 /* read in column indices and matrix values */ 3300 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3302 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3303 3304 { /* preallocate matrix storage */ 3305 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3306 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3307 PetscBool sbaij,done; 3308 PetscInt *d_nnz,*o_nnz; 3309 3310 PetscCall(PetscBTCreate(nbs,&bt)); 3311 PetscCall(PetscHSetICreate(&ht)); 3312 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3313 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3314 for (i=0; i<mbs; i++) { 3315 PetscCall(PetscBTMemzero(nbs,bt)); 3316 PetscCall(PetscHSetIClear(ht)); 3317 for (k=0; k<bs; k++) { 3318 PetscInt row = bs*i + k; 3319 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3320 PetscInt col = colidxs[j]; 3321 if (!sbaij || col >= row) { 3322 if (col >= cs && col < ce) { 3323 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3324 } else { 3325 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3326 if (done) o_nnz[i]++; 3327 } 3328 } 3329 } 3330 } 3331 } 3332 PetscCall(PetscBTDestroy(&bt)); 3333 PetscCall(PetscHSetIDestroy(&ht)); 3334 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3335 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3336 PetscCall(PetscFree2(d_nnz,o_nnz)); 3337 } 3338 3339 /* store matrix values */ 3340 for (i=0; i<m; i++) { 3341 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3342 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3343 } 3344 3345 PetscCall(PetscFree(rowidxs)); 3346 PetscCall(PetscFree2(colidxs,matvals)); 3347 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3348 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3353 { 3354 PetscBool isbinary; 3355 3356 PetscFunctionBegin; 3357 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3358 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3359 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3360 PetscFunctionReturn(0); 3361 } 3362 3363 /*@ 3364 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3365 3366 Input Parameters: 3367 + mat - the matrix 3368 - fact - factor 3369 3370 Not Collective, each process can use a different factor 3371 3372 Level: advanced 3373 3374 Notes: 3375 This can also be set by the command line option: -mat_use_hash_table <fact> 3376 3377 .seealso: `MatSetOption()` 3378 @*/ 3379 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3380 { 3381 PetscFunctionBegin; 3382 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3387 { 3388 Mat_MPIBAIJ *baij; 3389 3390 PetscFunctionBegin; 3391 baij = (Mat_MPIBAIJ*)mat->data; 3392 baij->ht_fact = fact; 3393 PetscFunctionReturn(0); 3394 } 3395 3396 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3397 { 3398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3399 PetscBool flg; 3400 3401 PetscFunctionBegin; 3402 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3403 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3404 if (Ad) *Ad = a->A; 3405 if (Ao) *Ao = a->B; 3406 if (colmap) *colmap = a->garray; 3407 PetscFunctionReturn(0); 3408 } 3409 3410 /* 3411 Special version for direct calls from Fortran (to eliminate two function call overheads 3412 */ 3413 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3414 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3415 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3416 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3417 #endif 3418 3419 /*@C 3420 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3421 3422 Collective on Mat 3423 3424 Input Parameters: 3425 + mat - the matrix 3426 . min - number of input rows 3427 . im - input rows 3428 . nin - number of input columns 3429 . in - input columns 3430 . v - numerical values input 3431 - addvin - INSERT_VALUES or ADD_VALUES 3432 3433 Notes: 3434 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3435 3436 Level: advanced 3437 3438 .seealso: `MatSetValuesBlocked()` 3439 @*/ 3440 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3441 { 3442 /* convert input arguments to C version */ 3443 Mat mat = *matin; 3444 PetscInt m = *min, n = *nin; 3445 InsertMode addv = *addvin; 3446 3447 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3448 const MatScalar *value; 3449 MatScalar *barray = baij->barray; 3450 PetscBool roworiented = baij->roworiented; 3451 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3452 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3453 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3454 3455 PetscFunctionBegin; 3456 /* tasks normally handled by MatSetValuesBlocked() */ 3457 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3458 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3459 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3460 if (mat->assembled) { 3461 mat->was_assembled = PETSC_TRUE; 3462 mat->assembled = PETSC_FALSE; 3463 } 3464 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3465 3466 if (!barray) { 3467 PetscCall(PetscMalloc1(bs2,&barray)); 3468 baij->barray = barray; 3469 } 3470 3471 if (roworiented) stepval = (n-1)*bs; 3472 else stepval = (m-1)*bs; 3473 3474 for (i=0; i<m; i++) { 3475 if (im[i] < 0) continue; 3476 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3477 if (im[i] >= rstart && im[i] < rend) { 3478 row = im[i] - rstart; 3479 for (j=0; j<n; j++) { 3480 /* If NumCol = 1 then a copy is not required */ 3481 if ((roworiented) && (n == 1)) { 3482 barray = (MatScalar*)v + i*bs2; 3483 } else if ((!roworiented) && (m == 1)) { 3484 barray = (MatScalar*)v + j*bs2; 3485 } else { /* Here a copy is required */ 3486 if (roworiented) { 3487 value = v + i*(stepval+bs)*bs + j*bs; 3488 } else { 3489 value = v + j*(stepval+bs)*bs + i*bs; 3490 } 3491 for (ii=0; ii<bs; ii++,value+=stepval) { 3492 for (jj=0; jj<bs; jj++) { 3493 *barray++ = *value++; 3494 } 3495 } 3496 barray -=bs2; 3497 } 3498 3499 if (in[j] >= cstart && in[j] < cend) { 3500 col = in[j] - cstart; 3501 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3502 } else if (in[j] < 0) { 3503 continue; 3504 } else { 3505 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3506 if (mat->was_assembled) { 3507 if (!baij->colmap) { 3508 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3509 } 3510 3511 #if defined(PETSC_USE_DEBUG) 3512 #if defined(PETSC_USE_CTABLE) 3513 { PetscInt data; 3514 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3515 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3516 } 3517 #else 3518 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3519 #endif 3520 #endif 3521 #if defined(PETSC_USE_CTABLE) 3522 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3523 col = (col - 1)/bs; 3524 #else 3525 col = (baij->colmap[in[j]] - 1)/bs; 3526 #endif 3527 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3528 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3529 col = in[j]; 3530 } 3531 } else col = in[j]; 3532 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3533 } 3534 } 3535 } else { 3536 if (!baij->donotstash) { 3537 if (roworiented) { 3538 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3539 } else { 3540 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3541 } 3542 } 3543 } 3544 } 3545 3546 /* task normally handled by MatSetValuesBlocked() */ 3547 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3548 PetscFunctionReturn(0); 3549 } 3550 3551 /*@ 3552 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3553 CSR format the local rows. 3554 3555 Collective 3556 3557 Input Parameters: 3558 + comm - MPI communicator 3559 . bs - the block size, only a block size of 1 is supported 3560 . m - number of local rows (Cannot be PETSC_DECIDE) 3561 . n - This value should be the same as the local size used in creating the 3562 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3563 calculated if N is given) For square matrices n is almost always m. 3564 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3565 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3566 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3567 . j - column indices 3568 - a - matrix values 3569 3570 Output Parameter: 3571 . mat - the matrix 3572 3573 Level: intermediate 3574 3575 Notes: 3576 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3577 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3578 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3579 3580 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3581 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3582 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3583 with column-major ordering within blocks. 3584 3585 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3586 3587 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3588 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3589 @*/ 3590 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3591 { 3592 PetscFunctionBegin; 3593 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3594 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3595 PetscCall(MatCreate(comm,mat)); 3596 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3597 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3598 PetscCall(MatSetBlockSize(*mat,bs)); 3599 PetscCall(MatSetUp(*mat)); 3600 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3601 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3602 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3603 PetscFunctionReturn(0); 3604 } 3605 3606 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3607 { 3608 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3609 PetscInt *indx; 3610 PetscScalar *values; 3611 3612 PetscFunctionBegin; 3613 PetscCall(MatGetSize(inmat,&m,&N)); 3614 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3616 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3617 PetscInt *bindx,rmax=a->rmax,j; 3618 PetscMPIInt rank,size; 3619 3620 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3621 mbs = m/bs; Nbs = N/cbs; 3622 if (n == PETSC_DECIDE) { 3623 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3624 } 3625 nbs = n/cbs; 3626 3627 PetscCall(PetscMalloc1(rmax,&bindx)); 3628 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3629 3630 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3631 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3632 if (rank == size-1) { 3633 /* Check sum(nbs) = Nbs */ 3634 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3635 } 3636 3637 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3638 for (i=0; i<mbs; i++) { 3639 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3640 nnz = nnz/bs; 3641 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3642 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3643 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3644 } 3645 PetscCall(PetscFree(bindx)); 3646 3647 PetscCall(MatCreate(comm,outmat)); 3648 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3649 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3650 PetscCall(MatSetType(*outmat,MATBAIJ)); 3651 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3652 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3653 MatPreallocateEnd(dnz,onz); 3654 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3655 } 3656 3657 /* numeric phase */ 3658 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3659 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3660 3661 for (i=0; i<m; i++) { 3662 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3663 Ii = i + rstart; 3664 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3665 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3666 } 3667 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3668 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3669 PetscFunctionReturn(0); 3670 } 3671