1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) { 211 continue; 212 } else { 213 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 214 if (mat->was_assembled) { 215 if (!baij->colmap) { 216 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 217 } 218 #if defined(PETSC_USE_CTABLE) 219 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 220 col = col - 1; 221 #else 222 col = baij->colmap[in[j]/bs] - 1; 223 #endif 224 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 225 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 226 col = in[j]; 227 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 228 B = baij->B; 229 b = (Mat_SeqBAIJ*)(B)->data; 230 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 231 ba =b->a; 232 } else { 233 PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 234 col += in[j]%bs; 235 } 236 } else col = in[j]; 237 if (roworiented) value = v[i*n+j]; 238 else value = v[i+j*m]; 239 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 240 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 241 } 242 } 243 } else { 244 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 245 if (!baij->donotstash) { 246 mat->assembled = PETSC_FALSE; 247 if (roworiented) { 248 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 249 } else { 250 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 251 } 252 } 253 } 254 } 255 PetscFunctionReturn(0); 256 } 257 258 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 259 { 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 261 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 262 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 263 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 264 PetscBool roworiented=a->roworiented; 265 const PetscScalar *value = v; 266 MatScalar *ap,*aa = a->a,*bap; 267 268 PetscFunctionBegin; 269 rp = aj + ai[row]; 270 ap = aa + bs2*ai[row]; 271 rmax = imax[row]; 272 nrow = ailen[row]; 273 value = v; 274 low = 0; 275 high = nrow; 276 while (high-low > 7) { 277 t = (low+high)/2; 278 if (rp[t] > col) high = t; 279 else low = t; 280 } 281 for (i=low; i<high; i++) { 282 if (rp[i] > col) break; 283 if (rp[i] == col) { 284 bap = ap + bs2*i; 285 if (roworiented) { 286 if (is == ADD_VALUES) { 287 for (ii=0; ii<bs; ii++) { 288 for (jj=ii; jj<bs2; jj+=bs) { 289 bap[jj] += *value++; 290 } 291 } 292 } else { 293 for (ii=0; ii<bs; ii++) { 294 for (jj=ii; jj<bs2; jj+=bs) { 295 bap[jj] = *value++; 296 } 297 } 298 } 299 } else { 300 if (is == ADD_VALUES) { 301 for (ii=0; ii<bs; ii++,value+=bs) { 302 for (jj=0; jj<bs; jj++) { 303 bap[jj] += value[jj]; 304 } 305 bap += bs; 306 } 307 } else { 308 for (ii=0; ii<bs; ii++,value+=bs) { 309 for (jj=0; jj<bs; jj++) { 310 bap[jj] = value[jj]; 311 } 312 bap += bs; 313 } 314 } 315 } 316 goto noinsert2; 317 } 318 } 319 if (nonew == 1) goto noinsert2; 320 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 321 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 322 N = nrow++ - 1; high++; 323 /* shift up all the later entries in this row */ 324 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 325 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 326 rp[i] = col; 327 bap = ap + bs2*i; 328 if (roworiented) { 329 for (ii=0; ii<bs; ii++) { 330 for (jj=ii; jj<bs2; jj+=bs) { 331 bap[jj] = *value++; 332 } 333 } 334 } else { 335 for (ii=0; ii<bs; ii++) { 336 for (jj=0; jj<bs; jj++) { 337 *bap++ = *value++; 338 } 339 } 340 } 341 noinsert2:; 342 ailen[row] = nrow; 343 PetscFunctionReturn(0); 344 } 345 346 /* 347 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 348 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 349 */ 350 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 351 { 352 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 353 const PetscScalar *value; 354 MatScalar *barray = baij->barray; 355 PetscBool roworiented = baij->roworiented; 356 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 357 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 358 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 359 360 PetscFunctionBegin; 361 if (!barray) { 362 PetscCall(PetscMalloc1(bs2,&barray)); 363 baij->barray = barray; 364 } 365 366 if (roworiented) stepval = (n-1)*bs; 367 else stepval = (m-1)*bs; 368 369 for (i=0; i<m; i++) { 370 if (im[i] < 0) continue; 371 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 372 if (im[i] >= rstart && im[i] < rend) { 373 row = im[i] - rstart; 374 for (j=0; j<n; j++) { 375 /* If NumCol = 1 then a copy is not required */ 376 if ((roworiented) && (n == 1)) { 377 barray = (MatScalar*)v + i*bs2; 378 } else if ((!roworiented) && (m == 1)) { 379 barray = (MatScalar*)v + j*bs2; 380 } else { /* Here a copy is required */ 381 if (roworiented) { 382 value = v + (i*(stepval+bs) + j)*bs; 383 } else { 384 value = v + (j*(stepval+bs) + i)*bs; 385 } 386 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 387 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 388 barray += bs; 389 } 390 barray -= bs2; 391 } 392 393 if (in[j] >= cstart && in[j] < cend) { 394 col = in[j] - cstart; 395 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 396 } else if (in[j] < 0) { 397 continue; 398 } else { 399 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 400 if (mat->was_assembled) { 401 if (!baij->colmap) { 402 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 403 } 404 405 #if defined(PETSC_USE_DEBUG) 406 #if defined(PETSC_USE_CTABLE) 407 { PetscInt data; 408 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 409 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 410 } 411 #else 412 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413 #endif 414 #endif 415 #if defined(PETSC_USE_CTABLE) 416 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 417 col = (col - 1)/bs; 418 #else 419 col = (baij->colmap[in[j]] - 1)/bs; 420 #endif 421 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 422 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 423 col = in[j]; 424 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 425 } else col = in[j]; 426 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 427 } 428 } 429 } else { 430 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 431 if (!baij->donotstash) { 432 if (roworiented) { 433 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 434 } else { 435 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 436 } 437 } 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442 443 #define HASH_KEY 0.6180339887 444 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 445 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 446 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 447 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 448 { 449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 450 PetscBool roworiented = baij->roworiented; 451 PetscInt i,j,row,col; 452 PetscInt rstart_orig=mat->rmap->rstart; 453 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 454 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 455 PetscReal tmp; 456 MatScalar **HD = baij->hd,value; 457 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 458 459 PetscFunctionBegin; 460 for (i=0; i<m; i++) { 461 if (PetscDefined(USE_DEBUG)) { 462 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 463 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 464 } 465 row = im[i]; 466 if (row >= rstart_orig && row < rend_orig) { 467 for (j=0; j<n; j++) { 468 col = in[j]; 469 if (roworiented) value = v[i*n+j]; 470 else value = v[i+j*m]; 471 /* Look up PetscInto the Hash Table */ 472 key = (row/bs)*Nbs+(col/bs)+1; 473 h1 = HASH(size,key,tmp); 474 475 idx = h1; 476 if (PetscDefined(USE_DEBUG)) { 477 insert_ct++; 478 total_ct++; 479 if (HT[idx] != key) { 480 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 481 if (idx == size) { 482 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 483 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 484 } 485 } 486 } else if (HT[idx] != key) { 487 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 488 if (idx == size) { 489 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 490 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 491 } 492 } 493 /* A HASH table entry is found, so insert the values at the correct address */ 494 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 495 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 496 } 497 } else if (!baij->donotstash) { 498 if (roworiented) { 499 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 500 } else { 501 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 502 } 503 } 504 } 505 if (PetscDefined(USE_DEBUG)) { 506 baij->ht_total_ct += total_ct; 507 baij->ht_insert_ct += insert_ct; 508 } 509 PetscFunctionReturn(0); 510 } 511 512 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 513 { 514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 515 PetscBool roworiented = baij->roworiented; 516 PetscInt i,j,ii,jj,row,col; 517 PetscInt rstart=baij->rstartbs; 518 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 519 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 520 PetscReal tmp; 521 MatScalar **HD = baij->hd,*baij_a; 522 const PetscScalar *v_t,*value; 523 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 524 525 PetscFunctionBegin; 526 if (roworiented) stepval = (n-1)*bs; 527 else stepval = (m-1)*bs; 528 529 for (i=0; i<m; i++) { 530 if (PetscDefined(USE_DEBUG)) { 531 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 532 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 533 } 534 row = im[i]; 535 v_t = v + i*nbs2; 536 if (row >= rstart && row < rend) { 537 for (j=0; j<n; j++) { 538 col = in[j]; 539 540 /* Look up into the Hash Table */ 541 key = row*Nbs+col+1; 542 h1 = HASH(size,key,tmp); 543 544 idx = h1; 545 if (PetscDefined(USE_DEBUG)) { 546 total_ct++; 547 insert_ct++; 548 if (HT[idx] != key) { 549 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 550 if (idx == size) { 551 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 552 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 553 } 554 } 555 } else if (HT[idx] != key) { 556 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 557 if (idx == size) { 558 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 559 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 560 } 561 } 562 baij_a = HD[idx]; 563 if (roworiented) { 564 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 565 /* value = v + (i*(stepval+bs)+j)*bs; */ 566 value = v_t; 567 v_t += bs; 568 if (addv == ADD_VALUES) { 569 for (ii=0; ii<bs; ii++,value+=stepval) { 570 for (jj=ii; jj<bs2; jj+=bs) { 571 baij_a[jj] += *value++; 572 } 573 } 574 } else { 575 for (ii=0; ii<bs; ii++,value+=stepval) { 576 for (jj=ii; jj<bs2; jj+=bs) { 577 baij_a[jj] = *value++; 578 } 579 } 580 } 581 } else { 582 value = v + j*(stepval+bs)*bs + i*bs; 583 if (addv == ADD_VALUES) { 584 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 585 for (jj=0; jj<bs; jj++) { 586 baij_a[jj] += *value++; 587 } 588 } 589 } else { 590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 591 for (jj=0; jj<bs; jj++) { 592 baij_a[jj] = *value++; 593 } 594 } 595 } 596 } 597 } 598 } else { 599 if (!baij->donotstash) { 600 if (roworiented) { 601 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 602 } else { 603 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 604 } 605 } 606 } 607 } 608 if (PetscDefined(USE_DEBUG)) { 609 baij->ht_total_ct += total_ct; 610 baij->ht_insert_ct += insert_ct; 611 } 612 PetscFunctionReturn(0); 613 } 614 615 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 616 { 617 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 618 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 619 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 620 621 PetscFunctionBegin; 622 for (i=0; i<m; i++) { 623 if (idxm[i] < 0) continue; /* negative row */ 624 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 625 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 626 row = idxm[i] - bsrstart; 627 for (j=0; j<n; j++) { 628 if (idxn[j] < 0) continue; /* negative column */ 629 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 630 if (idxn[j] >= bscstart && idxn[j] < bscend) { 631 col = idxn[j] - bscstart; 632 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 633 } else { 634 if (!baij->colmap) { 635 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 636 } 637 #if defined(PETSC_USE_CTABLE) 638 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 639 data--; 640 #else 641 data = baij->colmap[idxn[j]/bs]-1; 642 #endif 643 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 644 else { 645 col = data + idxn[j]%bs; 646 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 647 } 648 } 649 } 650 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 656 { 657 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 658 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 659 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 660 PetscReal sum = 0.0; 661 MatScalar *v; 662 663 PetscFunctionBegin; 664 if (baij->size == 1) { 665 PetscCall(MatNorm(baij->A,type,nrm)); 666 } else { 667 if (type == NORM_FROBENIUS) { 668 v = amat->a; 669 nz = amat->nz*bs2; 670 for (i=0; i<nz; i++) { 671 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 672 } 673 v = bmat->a; 674 nz = bmat->nz*bs2; 675 for (i=0; i<nz; i++) { 676 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 677 } 678 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 679 *nrm = PetscSqrtReal(*nrm); 680 } else if (type == NORM_1) { /* max column sum */ 681 PetscReal *tmp,*tmp2; 682 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 683 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 684 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 685 v = amat->a; jj = amat->j; 686 for (i=0; i<amat->nz; i++) { 687 for (j=0; j<bs; j++) { 688 col = bs*(cstart + *jj) + j; /* column index */ 689 for (row=0; row<bs; row++) { 690 tmp[col] += PetscAbsScalar(*v); v++; 691 } 692 } 693 jj++; 694 } 695 v = bmat->a; jj = bmat->j; 696 for (i=0; i<bmat->nz; i++) { 697 for (j=0; j<bs; j++) { 698 col = bs*garray[*jj] + j; 699 for (row=0; row<bs; row++) { 700 tmp[col] += PetscAbsScalar(*v); v++; 701 } 702 } 703 jj++; 704 } 705 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 706 *nrm = 0.0; 707 for (j=0; j<mat->cmap->N; j++) { 708 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 709 } 710 PetscCall(PetscFree(tmp)); 711 PetscCall(PetscFree(tmp2)); 712 } else if (type == NORM_INFINITY) { /* max row sum */ 713 PetscReal *sums; 714 PetscCall(PetscMalloc1(bs,&sums)); 715 sum = 0.0; 716 for (j=0; j<amat->mbs; j++) { 717 for (row=0; row<bs; row++) sums[row] = 0.0; 718 v = amat->a + bs2*amat->i[j]; 719 nz = amat->i[j+1]-amat->i[j]; 720 for (i=0; i<nz; i++) { 721 for (col=0; col<bs; col++) { 722 for (row=0; row<bs; row++) { 723 sums[row] += PetscAbsScalar(*v); v++; 724 } 725 } 726 } 727 v = bmat->a + bs2*bmat->i[j]; 728 nz = bmat->i[j+1]-bmat->i[j]; 729 for (i=0; i<nz; i++) { 730 for (col=0; col<bs; col++) { 731 for (row=0; row<bs; row++) { 732 sums[row] += PetscAbsScalar(*v); v++; 733 } 734 } 735 } 736 for (row=0; row<bs; row++) { 737 if (sums[row] > sum) sum = sums[row]; 738 } 739 } 740 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 741 PetscCall(PetscFree(sums)); 742 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 743 } 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 Creates the hash table, and sets the table 749 This table is created only once. 750 If new entried need to be added to the matrix 751 then the hash table has to be destroyed and 752 recreated. 753 */ 754 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 755 { 756 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 757 Mat A = baij->A,B=baij->B; 758 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 759 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 760 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 761 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 762 PetscInt *HT,key; 763 MatScalar **HD; 764 PetscReal tmp; 765 #if defined(PETSC_USE_INFO) 766 PetscInt ct=0,max=0; 767 #endif 768 769 PetscFunctionBegin; 770 if (baij->ht) PetscFunctionReturn(0); 771 772 baij->ht_size = (PetscInt)(factor*nz); 773 ht_size = baij->ht_size; 774 775 /* Allocate Memory for Hash Table */ 776 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 777 HD = baij->hd; 778 HT = baij->ht; 779 780 /* Loop Over A */ 781 for (i=0; i<a->mbs; i++) { 782 for (j=ai[i]; j<ai[i+1]; j++) { 783 row = i+rstart; 784 col = aj[j]+cstart; 785 786 key = row*Nbs + col + 1; 787 h1 = HASH(ht_size,key,tmp); 788 for (k=0; k<ht_size; k++) { 789 if (!HT[(h1+k)%ht_size]) { 790 HT[(h1+k)%ht_size] = key; 791 HD[(h1+k)%ht_size] = a->a + j*bs2; 792 break; 793 #if defined(PETSC_USE_INFO) 794 } else { 795 ct++; 796 #endif 797 } 798 } 799 #if defined(PETSC_USE_INFO) 800 if (k> max) max = k; 801 #endif 802 } 803 } 804 /* Loop Over B */ 805 for (i=0; i<b->mbs; i++) { 806 for (j=bi[i]; j<bi[i+1]; j++) { 807 row = i+rstart; 808 col = garray[bj[j]]; 809 key = row*Nbs + col + 1; 810 h1 = HASH(ht_size,key,tmp); 811 for (k=0; k<ht_size; k++) { 812 if (!HT[(h1+k)%ht_size]) { 813 HT[(h1+k)%ht_size] = key; 814 HD[(h1+k)%ht_size] = b->a + j*bs2; 815 break; 816 #if defined(PETSC_USE_INFO) 817 } else { 818 ct++; 819 #endif 820 } 821 } 822 #if defined(PETSC_USE_INFO) 823 if (k> max) max = k; 824 #endif 825 } 826 } 827 828 /* Print Summary */ 829 #if defined(PETSC_USE_INFO) 830 for (i=0,j=0; i<ht_size; i++) { 831 if (HT[i]) j++; 832 } 833 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 834 #endif 835 PetscFunctionReturn(0); 836 } 837 838 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 839 { 840 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 841 PetscInt nstash,reallocs; 842 843 PetscFunctionBegin; 844 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 845 846 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 847 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 848 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 849 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 850 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 851 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 856 { 857 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 858 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 859 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 860 PetscInt *row,*col; 861 PetscBool r1,r2,r3,other_disassembled; 862 MatScalar *val; 863 PetscMPIInt n; 864 865 PetscFunctionBegin; 866 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 867 if (!baij->donotstash && !mat->nooffprocentries) { 868 while (1) { 869 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 870 if (!flg) break; 871 872 for (i=0; i<n;) { 873 /* Now identify the consecutive vals belonging to the same row */ 874 for (j=i,rstart=row[j]; j<n; j++) { 875 if (row[j] != rstart) break; 876 } 877 if (j < n) ncols = j-i; 878 else ncols = n-i; 879 /* Now assemble all these values with a single function call */ 880 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 881 i = j; 882 } 883 } 884 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 885 /* Now process the block-stash. Since the values are stashed column-oriented, 886 set the roworiented flag to column oriented, and after MatSetValues() 887 restore the original flags */ 888 r1 = baij->roworiented; 889 r2 = a->roworiented; 890 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 891 892 baij->roworiented = PETSC_FALSE; 893 a->roworiented = PETSC_FALSE; 894 895 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 896 while (1) { 897 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 898 if (!flg) break; 899 900 for (i=0; i<n;) { 901 /* Now identify the consecutive vals belonging to the same row */ 902 for (j=i,rstart=row[j]; j<n; j++) { 903 if (row[j] != rstart) break; 904 } 905 if (j < n) ncols = j-i; 906 else ncols = n-i; 907 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 908 i = j; 909 } 910 } 911 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 912 913 baij->roworiented = r1; 914 a->roworiented = r2; 915 916 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 917 } 918 919 PetscCall(MatAssemblyBegin(baij->A,mode)); 920 PetscCall(MatAssemblyEnd(baij->A,mode)); 921 922 /* determine if any processor has disassembled, if so we must 923 also disassemble ourselves, in order that we may reassemble. */ 924 /* 925 if nonzero structure of submatrix B cannot change then we know that 926 no processor disassembled thus we can skip this stuff 927 */ 928 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 929 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 930 if (mat->was_assembled && !other_disassembled) { 931 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 932 } 933 } 934 935 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 936 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 937 } 938 PetscCall(MatAssemblyBegin(baij->B,mode)); 939 PetscCall(MatAssemblyEnd(baij->B,mode)); 940 941 #if defined(PETSC_USE_INFO) 942 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 943 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 944 945 baij->ht_total_ct = 0; 946 baij->ht_insert_ct = 0; 947 } 948 #endif 949 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 950 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 951 952 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 953 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 954 } 955 956 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 957 958 baij->rowvalues = NULL; 959 960 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 961 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 962 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 963 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 964 } 965 PetscFunctionReturn(0); 966 } 967 968 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 969 #include <petscdraw.h> 970 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 971 { 972 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 973 PetscMPIInt rank = baij->rank; 974 PetscInt bs = mat->rmap->bs; 975 PetscBool iascii,isdraw; 976 PetscViewer sviewer; 977 PetscViewerFormat format; 978 979 PetscFunctionBegin; 980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 981 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 982 if (iascii) { 983 PetscCall(PetscViewerGetFormat(viewer,&format)); 984 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 985 MatInfo info; 986 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 987 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 990 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 991 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 992 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 993 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 994 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 995 PetscCall(PetscViewerFlush(viewer)); 996 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 997 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 998 PetscCall(VecScatterView(baij->Mvctx,viewer)); 999 PetscFunctionReturn(0); 1000 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1001 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1002 PetscFunctionReturn(0); 1003 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1004 PetscFunctionReturn(0); 1005 } 1006 } 1007 1008 if (isdraw) { 1009 PetscDraw draw; 1010 PetscBool isnull; 1011 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1012 PetscCall(PetscDrawIsNull(draw,&isnull)); 1013 if (isnull) PetscFunctionReturn(0); 1014 } 1015 1016 { 1017 /* assemble the entire matrix onto first processor. */ 1018 Mat A; 1019 Mat_SeqBAIJ *Aloc; 1020 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1021 MatScalar *a; 1022 const char *matname; 1023 1024 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1025 /* Perhaps this should be the type of mat? */ 1026 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1027 if (rank == 0) { 1028 PetscCall(MatSetSizes(A,M,N,M,N)); 1029 } else { 1030 PetscCall(MatSetSizes(A,0,0,M,N)); 1031 } 1032 PetscCall(MatSetType(A,MATMPIBAIJ)); 1033 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1034 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1035 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1036 1037 /* copy over the A part */ 1038 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1039 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1040 PetscCall(PetscMalloc1(bs,&rvals)); 1041 1042 for (i=0; i<mbs; i++) { 1043 rvals[0] = bs*(baij->rstartbs + i); 1044 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1045 for (j=ai[i]; j<ai[i+1]; j++) { 1046 col = (baij->cstartbs+aj[j])*bs; 1047 for (k=0; k<bs; k++) { 1048 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1049 col++; a += bs; 1050 } 1051 } 1052 } 1053 /* copy over the B part */ 1054 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1055 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1056 for (i=0; i<mbs; i++) { 1057 rvals[0] = bs*(baij->rstartbs + i); 1058 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1059 for (j=ai[i]; j<ai[i+1]; j++) { 1060 col = baij->garray[aj[j]]*bs; 1061 for (k=0; k<bs; k++) { 1062 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1063 col++; a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1095 PetscInt *rowlens,*colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs*bs*(A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; header[2] = N; header[3] = nz; 1112 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1113 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1114 1115 /* fill in and store row lengths */ 1116 PetscCall(PetscMalloc1(m,&rowlens)); 1117 for (cnt=0, i=0; i<A->mbs; i++) 1118 for (j=0; j<bs; j++) 1119 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1120 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1121 PetscCall(PetscFree(rowlens)); 1122 1123 /* fill in and store column indices */ 1124 PetscCall(PetscMalloc1(nz,&colidxs)); 1125 for (cnt=0, i=0; i<A->mbs; i++) { 1126 for (k=0; k<bs; k++) { 1127 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1128 if (garray[B->j[jb]] > cs/bs) break; 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1131 } 1132 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1133 for (l=0; l<bs; l++) 1134 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1135 for (; jb<B->i[i+1]; jb++) 1136 for (l=0; l<bs; l++) 1137 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1138 } 1139 } 1140 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1141 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1142 PetscCall(PetscFree(colidxs)); 1143 1144 /* fill in and store nonzero values */ 1145 PetscCall(PetscMalloc1(nz,&matvals)); 1146 for (cnt=0, i=0; i<A->mbs; i++) { 1147 for (k=0; k<bs; k++) { 1148 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1149 if (garray[B->j[jb]] > cs/bs) break; 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1152 } 1153 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1154 for (l=0; l<bs; l++) 1155 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1156 for (; jb<B->i[i+1]; jb++) 1157 for (l=0; l<bs; l++) 1158 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1159 } 1160 } 1161 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1162 PetscCall(PetscFree(matvals)); 1163 1164 /* write block size option to the viewer's .info file */ 1165 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1170 { 1171 PetscBool iascii,isdraw,issocket,isbinary; 1172 1173 PetscFunctionBegin; 1174 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1175 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1176 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1177 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1178 if (iascii || isdraw || issocket) { 1179 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1180 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1185 { 1186 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1187 1188 PetscFunctionBegin; 1189 #if defined(PETSC_USE_LOG) 1190 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1191 #endif 1192 PetscCall(MatStashDestroy_Private(&mat->stash)); 1193 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1194 PetscCall(MatDestroy(&baij->A)); 1195 PetscCall(MatDestroy(&baij->B)); 1196 #if defined(PETSC_USE_CTABLE) 1197 PetscCall(PetscTableDestroy(&baij->colmap)); 1198 #else 1199 PetscCall(PetscFree(baij->colmap)); 1200 #endif 1201 PetscCall(PetscFree(baij->garray)); 1202 PetscCall(VecDestroy(&baij->lvec)); 1203 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1204 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1205 PetscCall(PetscFree(baij->barray)); 1206 PetscCall(PetscFree2(baij->hd,baij->ht)); 1207 PetscCall(PetscFree(baij->rangebs)); 1208 PetscCall(PetscFree(mat->data)); 1209 1210 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1220 #if defined(PETSC_HAVE_HYPRE) 1221 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1222 #endif 1223 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx,&nt)); 1234 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy,&nt)); 1236 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A,aa)); 1304 PetscCall(MatScale(a->B,aa)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1311 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1312 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1313 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1314 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1318 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1326 PetscInt max = 1,mbs = mat->mbs,tmp; 1327 for (i=0; i<mbs; i++) { 1328 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1336 if (!v) {pvA = NULL; pvB = NULL;} 1337 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1338 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i=0; i<nzB; i++) { 1350 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1355 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i=0; i<imark; i++) { 1361 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1362 } 1363 } else { 1364 for (i=0; i<nzB; i++) { 1365 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1366 else break; 1367 } 1368 imark = i; 1369 } 1370 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1371 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1372 } 1373 } else { 1374 if (idx) *idx = NULL; 1375 if (v) *v = NULL; 1376 } 1377 } 1378 *nz = nztot; 1379 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1380 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1385 { 1386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1387 1388 PetscFunctionBegin; 1389 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1390 baij->getrowactive = PETSC_FALSE; 1391 PetscFunctionReturn(0); 1392 } 1393 1394 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatZeroEntries(l->A)); 1400 PetscCall(MatZeroEntries(l->B)); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1407 Mat A = a->A,B = a->B; 1408 PetscLogDouble isend[5],irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (PetscReal)matin->rmap->bs; 1412 1413 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1414 1415 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; isend[4] = info->mallocs; 1417 1418 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1419 1420 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1421 isend[3] += info->memory; isend[4] += info->mallocs; 1422 1423 if (flag == MAT_LOCAL) { 1424 info->nz_used = isend[0]; 1425 info->nz_allocated = isend[1]; 1426 info->nz_unneeded = isend[2]; 1427 info->memory = isend[3]; 1428 info->mallocs = isend[4]; 1429 } else if (flag == MAT_GLOBAL_MAX) { 1430 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1431 1432 info->nz_used = irecv[0]; 1433 info->nz_allocated = irecv[1]; 1434 info->nz_unneeded = irecv[2]; 1435 info->memory = irecv[3]; 1436 info->mallocs = irecv[4]; 1437 } else if (flag == MAT_GLOBAL_SUM) { 1438 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1446 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1447 info->fill_ratio_needed = 0; 1448 info->factor_mallocs = 0; 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1453 { 1454 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1455 1456 PetscFunctionBegin; 1457 switch (op) { 1458 case MAT_NEW_NONZERO_LOCATIONS: 1459 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1460 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1461 case MAT_KEEP_NONZERO_PATTERN: 1462 case MAT_NEW_NONZERO_LOCATION_ERR: 1463 MatCheckPreallocated(A,1); 1464 PetscCall(MatSetOption(a->A,op,flg)); 1465 PetscCall(MatSetOption(a->B,op,flg)); 1466 break; 1467 case MAT_ROW_ORIENTED: 1468 MatCheckPreallocated(A,1); 1469 a->roworiented = flg; 1470 1471 PetscCall(MatSetOption(a->A,op,flg)); 1472 PetscCall(MatSetOption(a->B,op,flg)); 1473 break; 1474 case MAT_FORCE_DIAGONAL_ENTRIES: 1475 case MAT_SORTED_FULL: 1476 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SUBMAT_SINGLEIS: 1489 case MAT_SYMMETRY_ETERNAL: 1490 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1491 case MAT_SPD_ETERNAL: 1492 /* if the diagonal matrix is square it inherits some of the properties above */ 1493 break; 1494 default: 1495 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1496 } 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1501 { 1502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1503 Mat_SeqBAIJ *Aloc; 1504 Mat B; 1505 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1506 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1507 MatScalar *a; 1508 1509 PetscFunctionBegin; 1510 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*matout)); 1511 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1512 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1513 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1514 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1515 /* Do not know preallocation information, but must set block size */ 1516 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1517 } else { 1518 B = *matout; 1519 } 1520 1521 /* copy over the A part */ 1522 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1523 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1524 PetscCall(PetscMalloc1(bs,&rvals)); 1525 1526 for (i=0; i<mbs; i++) { 1527 rvals[0] = bs*(baij->rstartbs + i); 1528 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1529 for (j=ai[i]; j<ai[i+1]; j++) { 1530 col = (baij->cstartbs+aj[j])*bs; 1531 for (k=0; k<bs; k++) { 1532 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1533 1534 col++; a += bs; 1535 } 1536 } 1537 } 1538 /* copy over the B part */ 1539 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1540 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1541 for (i=0; i<mbs; i++) { 1542 rvals[0] = bs*(baij->rstartbs + i); 1543 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1544 for (j=ai[i]; j<ai[i+1]; j++) { 1545 col = baij->garray[aj[j]]*bs; 1546 for (k=0; k<bs; k++) { 1547 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1548 col++; 1549 a += bs; 1550 } 1551 } 1552 } 1553 PetscCall(PetscFree(rvals)); 1554 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1555 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1556 1557 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1558 else { 1559 PetscCall(MatHeaderMerge(A,&B)); 1560 } 1561 PetscFunctionReturn(0); 1562 } 1563 1564 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1565 { 1566 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1567 Mat a = baij->A,b = baij->B; 1568 PetscInt s1,s2,s3; 1569 1570 PetscFunctionBegin; 1571 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1572 if (rr) { 1573 PetscCall(VecGetLocalSize(rr,&s1)); 1574 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1575 /* Overlap communication with computation. */ 1576 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1577 } 1578 if (ll) { 1579 PetscCall(VecGetLocalSize(ll,&s1)); 1580 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1581 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1582 } 1583 /* scale the diagonal block */ 1584 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1585 1586 if (rr) { 1587 /* Do a scatter end and then right scale the off-diagonal block */ 1588 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1589 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1590 } 1591 PetscFunctionReturn(0); 1592 } 1593 1594 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1595 { 1596 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1597 PetscInt *lrows; 1598 PetscInt r, len; 1599 PetscBool cong; 1600 1601 PetscFunctionBegin; 1602 /* get locally owned rows */ 1603 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1604 /* fix right hand side if needed */ 1605 if (x && b) { 1606 const PetscScalar *xx; 1607 PetscScalar *bb; 1608 1609 PetscCall(VecGetArrayRead(x,&xx)); 1610 PetscCall(VecGetArray(b,&bb)); 1611 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1612 PetscCall(VecRestoreArrayRead(x,&xx)); 1613 PetscCall(VecRestoreArray(b,&bb)); 1614 } 1615 1616 /* actually zap the local rows */ 1617 /* 1618 Zero the required rows. If the "diagonal block" of the matrix 1619 is square and the user wishes to set the diagonal we use separate 1620 code so that MatSetValues() is not called for each diagonal allocating 1621 new memory, thus calling lots of mallocs and slowing things down. 1622 1623 */ 1624 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1625 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1626 PetscCall(MatHasCongruentLayouts(A,&cong)); 1627 if ((diag != 0.0) && cong) { 1628 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1629 } else if (diag != 0.0) { 1630 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1631 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1632 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1633 for (r = 0; r < len; ++r) { 1634 const PetscInt row = lrows[r] + A->rmap->rstart; 1635 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1636 } 1637 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1638 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1639 } else { 1640 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1641 } 1642 PetscCall(PetscFree(lrows)); 1643 1644 /* only change matrix nonzero state if pattern was allowed to be changed */ 1645 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1646 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1647 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1648 } 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1653 { 1654 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1655 PetscMPIInt n = A->rmap->n,p = 0; 1656 PetscInt i,j,k,r,len = 0,row,col,count; 1657 PetscInt *lrows,*owners = A->rmap->range; 1658 PetscSFNode *rrows; 1659 PetscSF sf; 1660 const PetscScalar *xx; 1661 PetscScalar *bb,*mask; 1662 Vec xmask,lmask; 1663 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1664 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1665 PetscScalar *aa; 1666 1667 PetscFunctionBegin; 1668 /* Create SF where leaves are input rows and roots are owned rows */ 1669 PetscCall(PetscMalloc1(n, &lrows)); 1670 for (r = 0; r < n; ++r) lrows[r] = -1; 1671 PetscCall(PetscMalloc1(N, &rrows)); 1672 for (r = 0; r < N; ++r) { 1673 const PetscInt idx = rows[r]; 1674 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1675 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1676 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1677 } 1678 rrows[r].rank = p; 1679 rrows[r].index = rows[r] - owners[p]; 1680 } 1681 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1682 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1683 /* Collect flags for rows to be zeroed */ 1684 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1685 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1686 PetscCall(PetscSFDestroy(&sf)); 1687 /* Compress and put in row numbers */ 1688 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1689 /* zero diagonal part of matrix */ 1690 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1691 /* handle off diagonal part of matrix */ 1692 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1693 PetscCall(VecDuplicate(l->lvec,&lmask)); 1694 PetscCall(VecGetArray(xmask,&bb)); 1695 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1696 PetscCall(VecRestoreArray(xmask,&bb)); 1697 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1698 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1699 PetscCall(VecDestroy(&xmask)); 1700 if (x) { 1701 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1702 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1703 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1704 PetscCall(VecGetArray(b,&bb)); 1705 } 1706 PetscCall(VecGetArray(lmask,&mask)); 1707 /* remove zeroed rows of off diagonal matrix */ 1708 for (i = 0; i < len; ++i) { 1709 row = lrows[i]; 1710 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1711 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1712 for (k = 0; k < count; ++k) { 1713 aa[0] = 0.0; 1714 aa += bs; 1715 } 1716 } 1717 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1718 for (i = 0; i < l->B->rmap->N; ++i) { 1719 row = i/bs; 1720 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1721 for (k = 0; k < bs; ++k) { 1722 col = bs*baij->j[j] + k; 1723 if (PetscAbsScalar(mask[col])) { 1724 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1725 if (x) bb[i] -= aa[0]*xx[col]; 1726 aa[0] = 0.0; 1727 } 1728 } 1729 } 1730 } 1731 if (x) { 1732 PetscCall(VecRestoreArray(b,&bb)); 1733 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1734 } 1735 PetscCall(VecRestoreArray(lmask,&mask)); 1736 PetscCall(VecDestroy(&lmask)); 1737 PetscCall(PetscFree(lrows)); 1738 1739 /* only change matrix nonzero state if pattern was allowed to be changed */ 1740 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1741 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1742 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1748 { 1749 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1750 1751 PetscFunctionBegin; 1752 PetscCall(MatSetUnfactored(a->A)); 1753 PetscFunctionReturn(0); 1754 } 1755 1756 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1757 1758 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1759 { 1760 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1761 Mat a,b,c,d; 1762 PetscBool flg; 1763 1764 PetscFunctionBegin; 1765 a = matA->A; b = matA->B; 1766 c = matB->A; d = matB->B; 1767 1768 PetscCall(MatEqual(a,c,&flg)); 1769 if (flg) { 1770 PetscCall(MatEqual(b,d,&flg)); 1771 } 1772 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1777 { 1778 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1779 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1780 1781 PetscFunctionBegin; 1782 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1783 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1784 PetscCall(MatCopy_Basic(A,B,str)); 1785 } else { 1786 PetscCall(MatCopy(a->A,b->A,str)); 1787 PetscCall(MatCopy(a->B,b->B,str)); 1788 } 1789 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1794 { 1795 PetscFunctionBegin; 1796 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1801 { 1802 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1803 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1804 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1805 1806 PetscFunctionBegin; 1807 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1808 PetscFunctionReturn(0); 1809 } 1810 1811 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1812 { 1813 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1814 PetscBLASInt bnz,one=1; 1815 Mat_SeqBAIJ *x,*y; 1816 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1817 1818 PetscFunctionBegin; 1819 if (str == SAME_NONZERO_PATTERN) { 1820 PetscScalar alpha = a; 1821 x = (Mat_SeqBAIJ*)xx->A->data; 1822 y = (Mat_SeqBAIJ*)yy->A->data; 1823 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1824 PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1825 x = (Mat_SeqBAIJ*)xx->B->data; 1826 y = (Mat_SeqBAIJ*)yy->B->data; 1827 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1828 PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1829 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1830 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1831 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1832 } else { 1833 Mat B; 1834 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1835 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1836 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1837 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1838 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1839 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1840 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1841 PetscCall(MatSetType(B,MATMPIBAIJ)); 1842 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1843 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1844 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1845 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1846 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1847 PetscCall(MatHeaderMerge(Y,&B)); 1848 PetscCall(PetscFree(nnz_d)); 1849 PetscCall(PetscFree(nnz_o)); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1855 { 1856 PetscFunctionBegin; 1857 if (PetscDefined(USE_COMPLEX)) { 1858 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1859 1860 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1861 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1867 { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1869 1870 PetscFunctionBegin; 1871 PetscCall(MatRealPart(a->A)); 1872 PetscCall(MatRealPart(a->B)); 1873 PetscFunctionReturn(0); 1874 } 1875 1876 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatImaginaryPart(a->A)); 1882 PetscCall(MatImaginaryPart(a->B)); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1887 { 1888 IS iscol_local; 1889 PetscInt csize; 1890 1891 PetscFunctionBegin; 1892 PetscCall(ISGetLocalSize(iscol,&csize)); 1893 if (call == MAT_REUSE_MATRIX) { 1894 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1895 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1896 } else { 1897 PetscCall(ISAllGather(iscol,&iscol_local)); 1898 } 1899 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1900 if (call == MAT_INITIAL_MATRIX) { 1901 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1902 PetscCall(ISDestroy(&iscol_local)); 1903 } 1904 PetscFunctionReturn(0); 1905 } 1906 1907 /* 1908 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1909 in local and then by concatenating the local matrices the end result. 1910 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1911 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1912 */ 1913 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1914 { 1915 PetscMPIInt rank,size; 1916 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1917 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1918 Mat M,Mreuse; 1919 MatScalar *vwork,*aa; 1920 MPI_Comm comm; 1921 IS isrow_new, iscol_new; 1922 Mat_SeqBAIJ *aij; 1923 1924 PetscFunctionBegin; 1925 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1926 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1927 PetscCallMPI(MPI_Comm_size(comm,&size)); 1928 /* The compression and expansion should be avoided. Doesn't point 1929 out errors, might change the indices, hence buggey */ 1930 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1931 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1932 1933 if (call == MAT_REUSE_MATRIX) { 1934 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1935 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1937 } else { 1938 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1939 } 1940 PetscCall(ISDestroy(&isrow_new)); 1941 PetscCall(ISDestroy(&iscol_new)); 1942 /* 1943 m - number of local rows 1944 n - number of columns (same on all processors) 1945 rstart - first row in new global matrix generated 1946 */ 1947 PetscCall(MatGetBlockSize(mat,&bs)); 1948 PetscCall(MatGetSize(Mreuse,&m,&n)); 1949 m = m/bs; 1950 n = n/bs; 1951 1952 if (call == MAT_INITIAL_MATRIX) { 1953 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1954 ii = aij->i; 1955 jj = aij->j; 1956 1957 /* 1958 Determine the number of non-zeros in the diagonal and off-diagonal 1959 portions of the matrix in order to do correct preallocation 1960 */ 1961 1962 /* first get start and end of "diagonal" columns */ 1963 if (csize == PETSC_DECIDE) { 1964 PetscCall(ISGetSize(isrow,&mglobal)); 1965 if (mglobal == n*bs) { /* square matrix */ 1966 nlocal = m; 1967 } else { 1968 nlocal = n/size + ((n % size) > rank); 1969 } 1970 } else { 1971 nlocal = csize/bs; 1972 } 1973 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1974 rstart = rend - nlocal; 1975 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1976 1977 /* next, compute all the lengths */ 1978 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1979 for (i=0; i<m; i++) { 1980 jend = ii[i+1] - ii[i]; 1981 olen = 0; 1982 dlen = 0; 1983 for (j=0; j<jend; j++) { 1984 if (*jj < rstart || *jj >= rend) olen++; 1985 else dlen++; 1986 jj++; 1987 } 1988 olens[i] = olen; 1989 dlens[i] = dlen; 1990 } 1991 PetscCall(MatCreate(comm,&M)); 1992 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1993 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1994 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1995 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1996 PetscCall(PetscFree2(dlens,olens)); 1997 } else { 1998 PetscInt ml,nl; 1999 2000 M = *newmat; 2001 PetscCall(MatGetLocalSize(M,&ml,&nl)); 2002 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2003 PetscCall(MatZeroEntries(M)); 2004 /* 2005 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2006 rather than the slower MatSetValues(). 2007 */ 2008 M->was_assembled = PETSC_TRUE; 2009 M->assembled = PETSC_FALSE; 2010 } 2011 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2012 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2013 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2014 ii = aij->i; 2015 jj = aij->j; 2016 aa = aij->a; 2017 for (i=0; i<m; i++) { 2018 row = rstart/bs + i; 2019 nz = ii[i+1] - ii[i]; 2020 cwork = jj; jj += nz; 2021 vwork = aa; aa += nz*bs*bs; 2022 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2023 } 2024 2025 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2026 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2027 *newmat = M; 2028 2029 /* save submatrix used in processor for next request */ 2030 if (call == MAT_INITIAL_MATRIX) { 2031 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2032 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2038 { 2039 MPI_Comm comm,pcomm; 2040 PetscInt clocal_size,nrows; 2041 const PetscInt *rows; 2042 PetscMPIInt size; 2043 IS crowp,lcolp; 2044 2045 PetscFunctionBegin; 2046 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2047 /* make a collective version of 'rowp' */ 2048 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2049 if (pcomm==comm) { 2050 crowp = rowp; 2051 } else { 2052 PetscCall(ISGetSize(rowp,&nrows)); 2053 PetscCall(ISGetIndices(rowp,&rows)); 2054 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2055 PetscCall(ISRestoreIndices(rowp,&rows)); 2056 } 2057 PetscCall(ISSetPermutation(crowp)); 2058 /* make a local version of 'colp' */ 2059 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2060 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2061 if (size==1) { 2062 lcolp = colp; 2063 } else { 2064 PetscCall(ISAllGather(colp,&lcolp)); 2065 } 2066 PetscCall(ISSetPermutation(lcolp)); 2067 /* now we just get the submatrix */ 2068 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2069 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2070 /* clean up */ 2071 if (pcomm!=comm) { 2072 PetscCall(ISDestroy(&crowp)); 2073 } 2074 if (size>1) { 2075 PetscCall(ISDestroy(&lcolp)); 2076 } 2077 PetscFunctionReturn(0); 2078 } 2079 2080 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2081 { 2082 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2083 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2084 2085 PetscFunctionBegin; 2086 if (nghosts) *nghosts = B->nbs; 2087 if (ghosts) *ghosts = baij->garray; 2088 PetscFunctionReturn(0); 2089 } 2090 2091 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2092 { 2093 Mat B; 2094 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2095 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2096 Mat_SeqAIJ *b; 2097 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2098 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2099 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2100 2101 PetscFunctionBegin; 2102 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2103 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2104 2105 /* ---------------------------------------------------------------- 2106 Tell every processor the number of nonzeros per row 2107 */ 2108 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2109 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2110 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2111 } 2112 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2113 displs = recvcounts + size; 2114 for (i=0; i<size; i++) { 2115 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2116 displs[i] = A->rmap->range[i]/bs; 2117 } 2118 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2119 /* --------------------------------------------------------------- 2120 Create the sequential matrix of the same type as the local block diagonal 2121 */ 2122 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2123 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2124 PetscCall(MatSetType(B,MATSEQAIJ)); 2125 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2126 b = (Mat_SeqAIJ*)B->data; 2127 2128 /*-------------------------------------------------------------------- 2129 Copy my part of matrix column indices over 2130 */ 2131 sendcount = ad->nz + bd->nz; 2132 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2133 a_jsendbuf = ad->j; 2134 b_jsendbuf = bd->j; 2135 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2136 cnt = 0; 2137 for (i=0; i<n; i++) { 2138 2139 /* put in lower diagonal portion */ 2140 m = bd->i[i+1] - bd->i[i]; 2141 while (m > 0) { 2142 /* is it above diagonal (in bd (compressed) numbering) */ 2143 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2144 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2145 m--; 2146 } 2147 2148 /* put in diagonal portion */ 2149 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2150 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2151 } 2152 2153 /* put in upper diagonal portion */ 2154 while (m-- > 0) { 2155 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2156 } 2157 } 2158 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2159 2160 /*-------------------------------------------------------------------- 2161 Gather all column indices to all processors 2162 */ 2163 for (i=0; i<size; i++) { 2164 recvcounts[i] = 0; 2165 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2166 recvcounts[i] += lens[j]; 2167 } 2168 } 2169 displs[0] = 0; 2170 for (i=1; i<size; i++) { 2171 displs[i] = displs[i-1] + recvcounts[i-1]; 2172 } 2173 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2174 /*-------------------------------------------------------------------- 2175 Assemble the matrix into useable form (note numerical values not yet set) 2176 */ 2177 /* set the b->ilen (length of each row) values */ 2178 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2179 /* set the b->i indices */ 2180 b->i[0] = 0; 2181 for (i=1; i<=A->rmap->N/bs; i++) { 2182 b->i[i] = b->i[i-1] + lens[i-1]; 2183 } 2184 PetscCall(PetscFree(lens)); 2185 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2187 PetscCall(PetscFree(recvcounts)); 2188 2189 PetscCall(MatPropagateSymmetryOptions(A,B)); 2190 *newmat = B; 2191 PetscFunctionReturn(0); 2192 } 2193 2194 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2195 { 2196 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2197 Vec bb1 = NULL; 2198 2199 PetscFunctionBegin; 2200 if (flag == SOR_APPLY_UPPER) { 2201 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall(VecDuplicate(bb,&bb1)); 2207 } 2208 2209 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2210 if (flag & SOR_ZERO_INITIAL_GUESS) { 2211 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2212 its--; 2213 } 2214 2215 while (its--) { 2216 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2217 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2218 2219 /* update rhs: bb1 = bb - B*x */ 2220 PetscCall(VecScale(mat->lvec,-1.0)); 2221 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2222 2223 /* local sweep */ 2224 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2225 } 2226 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2227 if (flag & SOR_ZERO_INITIAL_GUESS) { 2228 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2229 its--; 2230 } 2231 while (its--) { 2232 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2233 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2234 2235 /* update rhs: bb1 = bb - B*x */ 2236 PetscCall(VecScale(mat->lvec,-1.0)); 2237 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2238 2239 /* local sweep */ 2240 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2241 } 2242 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2243 if (flag & SOR_ZERO_INITIAL_GUESS) { 2244 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2245 its--; 2246 } 2247 while (its--) { 2248 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2249 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2250 2251 /* update rhs: bb1 = bb - B*x */ 2252 PetscCall(VecScale(mat->lvec,-1.0)); 2253 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2254 2255 /* local sweep */ 2256 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2257 } 2258 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2259 2260 PetscCall(VecDestroy(&bb1)); 2261 PetscFunctionReturn(0); 2262 } 2263 2264 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2265 { 2266 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2267 PetscInt m,N,i,*garray = aij->garray; 2268 PetscInt ib,jb,bs = A->rmap->bs; 2269 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2270 MatScalar *a_val = a_aij->a; 2271 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2272 MatScalar *b_val = b_aij->a; 2273 PetscReal *work; 2274 2275 PetscFunctionBegin; 2276 PetscCall(MatGetSize(A,&m,&N)); 2277 PetscCall(PetscCalloc1(N,&work)); 2278 if (type == NORM_2) { 2279 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2280 for (jb=0; jb<bs; jb++) { 2281 for (ib=0; ib<bs; ib++) { 2282 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2283 a_val++; 2284 } 2285 } 2286 } 2287 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2288 for (jb=0; jb<bs; jb++) { 2289 for (ib=0; ib<bs; ib++) { 2290 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2291 b_val++; 2292 } 2293 } 2294 } 2295 } else if (type == NORM_1) { 2296 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2297 for (jb=0; jb<bs; jb++) { 2298 for (ib=0; ib<bs; ib++) { 2299 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2300 a_val++; 2301 } 2302 } 2303 } 2304 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2305 for (jb=0; jb<bs; jb++) { 2306 for (ib=0; ib<bs; ib++) { 2307 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == NORM_INFINITY) { 2313 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2314 for (jb=0; jb<bs; jb++) { 2315 for (ib=0; ib<bs; ib++) { 2316 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2317 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2318 a_val++; 2319 } 2320 } 2321 } 2322 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2323 for (jb=0; jb<bs; jb++) { 2324 for (ib=0; ib<bs; ib++) { 2325 int col = garray[b_aij->j[i]] * bs + jb; 2326 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2327 b_val++; 2328 } 2329 } 2330 } 2331 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2332 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2333 for (jb=0; jb<bs; jb++) { 2334 for (ib=0; ib<bs; ib++) { 2335 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2336 a_val++; 2337 } 2338 } 2339 } 2340 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2341 for (jb=0; jb<bs; jb++) { 2342 for (ib=0; ib<bs; ib++) { 2343 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2344 b_val++; 2345 } 2346 } 2347 } 2348 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2349 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2350 for (jb=0; jb<bs; jb++) { 2351 for (ib=0; ib<bs; ib++) { 2352 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2353 a_val++; 2354 } 2355 } 2356 } 2357 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2358 for (jb=0; jb<bs; jb++) { 2359 for (ib=0; ib<bs; ib++) { 2360 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2361 b_val++; 2362 } 2363 } 2364 } 2365 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2366 if (type == NORM_INFINITY) { 2367 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2368 } else { 2369 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2370 } 2371 PetscCall(PetscFree(work)); 2372 if (type == NORM_2) { 2373 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2374 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2375 for (i=0; i<N; i++) reductions[i] /= m; 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2381 { 2382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2383 2384 PetscFunctionBegin; 2385 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2386 A->factorerrortype = a->A->factorerrortype; 2387 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2388 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2393 { 2394 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2395 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2396 2397 PetscFunctionBegin; 2398 if (!Y->preallocated) { 2399 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2400 } else if (!aij->nz) { 2401 PetscInt nonew = aij->nonew; 2402 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2403 aij->nonew = nonew; 2404 } 2405 PetscCall(MatShift_Basic(Y,a)); 2406 PetscFunctionReturn(0); 2407 } 2408 2409 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2410 { 2411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2412 2413 PetscFunctionBegin; 2414 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2415 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2416 if (d) { 2417 PetscInt rstart; 2418 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2419 *d += rstart/A->rmap->bs; 2420 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 2425 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2426 { 2427 PetscFunctionBegin; 2428 *a = ((Mat_MPIBAIJ*)A->data)->A; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* -------------------------------------------------------------------*/ 2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2434 MatGetRow_MPIBAIJ, 2435 MatRestoreRow_MPIBAIJ, 2436 MatMult_MPIBAIJ, 2437 /* 4*/ MatMultAdd_MPIBAIJ, 2438 MatMultTranspose_MPIBAIJ, 2439 MatMultTransposeAdd_MPIBAIJ, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*10*/ NULL, 2444 NULL, 2445 NULL, 2446 MatSOR_MPIBAIJ, 2447 MatTranspose_MPIBAIJ, 2448 /*15*/ MatGetInfo_MPIBAIJ, 2449 MatEqual_MPIBAIJ, 2450 MatGetDiagonal_MPIBAIJ, 2451 MatDiagonalScale_MPIBAIJ, 2452 MatNorm_MPIBAIJ, 2453 /*20*/ MatAssemblyBegin_MPIBAIJ, 2454 MatAssemblyEnd_MPIBAIJ, 2455 MatSetOption_MPIBAIJ, 2456 MatZeroEntries_MPIBAIJ, 2457 /*24*/ MatZeroRows_MPIBAIJ, 2458 NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 /*29*/ MatSetUp_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 MatGetDiagonalBlock_MPIBAIJ, 2466 NULL, 2467 /*34*/ MatDuplicate_MPIBAIJ, 2468 NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 /*39*/ MatAXPY_MPIBAIJ, 2473 MatCreateSubMatrices_MPIBAIJ, 2474 MatIncreaseOverlap_MPIBAIJ, 2475 MatGetValues_MPIBAIJ, 2476 MatCopy_MPIBAIJ, 2477 /*44*/ NULL, 2478 MatScale_MPIBAIJ, 2479 MatShift_MPIBAIJ, 2480 NULL, 2481 MatZeroRowsColumns_MPIBAIJ, 2482 /*49*/ NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 /*54*/ MatFDColoringCreate_MPIXAIJ, 2488 NULL, 2489 MatSetUnfactored_MPIBAIJ, 2490 MatPermute_MPIBAIJ, 2491 MatSetValuesBlocked_MPIBAIJ, 2492 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2493 MatDestroy_MPIBAIJ, 2494 MatView_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 /*64*/ NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*74*/ NULL, 2508 MatFDColoringApply_BAIJ, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*79*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 MatLoad_MPIBAIJ, 2517 /*84*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*89*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*94*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 /*99*/ NULL, 2533 NULL, 2534 NULL, 2535 MatConjugate_MPIBAIJ, 2536 NULL, 2537 /*104*/NULL, 2538 MatRealPart_MPIBAIJ, 2539 MatImaginaryPart_MPIBAIJ, 2540 NULL, 2541 NULL, 2542 /*109*/NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatMissingDiagonal_MPIBAIJ, 2547 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2548 NULL, 2549 MatGetGhosts_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 /*119*/NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 MatGetMultiProcBlock_MPIBAIJ, 2557 /*124*/NULL, 2558 MatGetColumnReductions_MPIBAIJ, 2559 MatInvertBlockDiagonal_MPIBAIJ, 2560 NULL, 2561 NULL, 2562 /*129*/NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*134*/NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*139*/MatSetBlockSizes_Default, 2573 NULL, 2574 NULL, 2575 MatFDColoringSetUp_MPIXAIJ, 2576 NULL, 2577 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2578 NULL, 2579 NULL, 2580 NULL, 2581 NULL, 2582 NULL, 2583 /*150*/NULL 2584 }; 2585 2586 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2587 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2588 2589 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2590 { 2591 PetscInt m,rstart,cstart,cend; 2592 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2593 const PetscInt *JJ =NULL; 2594 PetscScalar *values=NULL; 2595 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2596 PetscBool nooffprocentries; 2597 2598 PetscFunctionBegin; 2599 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2600 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2601 PetscCall(PetscLayoutSetUp(B->rmap)); 2602 PetscCall(PetscLayoutSetUp(B->cmap)); 2603 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2604 m = B->rmap->n/bs; 2605 rstart = B->rmap->rstart/bs; 2606 cstart = B->cmap->rstart/bs; 2607 cend = B->cmap->rend/bs; 2608 2609 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2610 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2611 for (i=0; i<m; i++) { 2612 nz = ii[i+1] - ii[i]; 2613 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2614 nz_max = PetscMax(nz_max,nz); 2615 dlen = 0; 2616 olen = 0; 2617 JJ = jj + ii[i]; 2618 for (j=0; j<nz; j++) { 2619 if (*JJ < cstart || *JJ >= cend) olen++; 2620 else dlen++; 2621 JJ++; 2622 } 2623 d_nnz[i] = dlen; 2624 o_nnz[i] = olen; 2625 } 2626 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2627 PetscCall(PetscFree2(d_nnz,o_nnz)); 2628 2629 values = (PetscScalar*)V; 2630 if (!values) { 2631 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2632 } 2633 for (i=0; i<m; i++) { 2634 PetscInt row = i + rstart; 2635 PetscInt ncols = ii[i+1] - ii[i]; 2636 const PetscInt *icols = jj + ii[i]; 2637 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2638 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2639 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2640 } else { /* block ordering does not match so we can only insert one block at a time. */ 2641 PetscInt j; 2642 for (j=0; j<ncols; j++) { 2643 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2644 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2645 } 2646 } 2647 } 2648 2649 if (!V) PetscCall(PetscFree(values)); 2650 nooffprocentries = B->nooffprocentries; 2651 B->nooffprocentries = PETSC_TRUE; 2652 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2653 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2654 B->nooffprocentries = nooffprocentries; 2655 2656 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2657 PetscFunctionReturn(0); 2658 } 2659 2660 /*@C 2661 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2662 2663 Collective 2664 2665 Input Parameters: 2666 + B - the matrix 2667 . bs - the block size 2668 . i - the indices into j for the start of each local row (starts with zero) 2669 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2670 - v - optional values in the matrix 2671 2672 Level: advanced 2673 2674 Notes: 2675 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2676 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2677 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2678 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2679 block column and the second index is over columns within a block. 2680 2681 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2682 2683 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2684 @*/ 2685 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2686 { 2687 PetscFunctionBegin; 2688 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2689 PetscValidType(B,1); 2690 PetscValidLogicalCollectiveInt(B,bs,2); 2691 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2696 { 2697 Mat_MPIBAIJ *b; 2698 PetscInt i; 2699 PetscMPIInt size; 2700 2701 PetscFunctionBegin; 2702 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2703 PetscCall(PetscLayoutSetUp(B->rmap)); 2704 PetscCall(PetscLayoutSetUp(B->cmap)); 2705 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2706 2707 if (d_nnz) { 2708 for (i=0; i<B->rmap->n/bs; i++) { 2709 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2710 } 2711 } 2712 if (o_nnz) { 2713 for (i=0; i<B->rmap->n/bs; i++) { 2714 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2715 } 2716 } 2717 2718 b = (Mat_MPIBAIJ*)B->data; 2719 b->bs2 = bs*bs; 2720 b->mbs = B->rmap->n/bs; 2721 b->nbs = B->cmap->n/bs; 2722 b->Mbs = B->rmap->N/bs; 2723 b->Nbs = B->cmap->N/bs; 2724 2725 for (i=0; i<=b->size; i++) { 2726 b->rangebs[i] = B->rmap->range[i]/bs; 2727 } 2728 b->rstartbs = B->rmap->rstart/bs; 2729 b->rendbs = B->rmap->rend/bs; 2730 b->cstartbs = B->cmap->rstart/bs; 2731 b->cendbs = B->cmap->rend/bs; 2732 2733 #if defined(PETSC_USE_CTABLE) 2734 PetscCall(PetscTableDestroy(&b->colmap)); 2735 #else 2736 PetscCall(PetscFree(b->colmap)); 2737 #endif 2738 PetscCall(PetscFree(b->garray)); 2739 PetscCall(VecDestroy(&b->lvec)); 2740 PetscCall(VecScatterDestroy(&b->Mvctx)); 2741 2742 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2743 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2744 PetscCall(MatDestroy(&b->B)); 2745 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2746 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2747 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2748 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2749 2750 if (!B->preallocated) { 2751 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2752 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2753 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2754 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2755 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2756 } 2757 2758 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2759 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2760 B->preallocated = PETSC_TRUE; 2761 B->was_assembled = PETSC_FALSE; 2762 B->assembled = PETSC_FALSE; 2763 PetscFunctionReturn(0); 2764 } 2765 2766 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2767 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2768 2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2770 { 2771 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2772 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2773 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2774 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2775 2776 PetscFunctionBegin; 2777 PetscCall(PetscMalloc1(M+1,&ii)); 2778 ii[0] = 0; 2779 for (i=0; i<M; i++) { 2780 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2781 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2782 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2783 /* remove one from count of matrix has diagonal */ 2784 for (j=id[i]; j<id[i+1]; j++) { 2785 if (jd[j] == i) {ii[i+1]--;break;} 2786 } 2787 } 2788 PetscCall(PetscMalloc1(ii[M],&jj)); 2789 cnt = 0; 2790 for (i=0; i<M; i++) { 2791 for (j=io[i]; j<io[i+1]; j++) { 2792 if (garray[jo[j]] > rstart) break; 2793 jj[cnt++] = garray[jo[j]]; 2794 } 2795 for (k=id[i]; k<id[i+1]; k++) { 2796 if (jd[k] != i) { 2797 jj[cnt++] = rstart + jd[k]; 2798 } 2799 } 2800 for (; j<io[i+1]; j++) { 2801 jj[cnt++] = garray[jo[j]]; 2802 } 2803 } 2804 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2809 2810 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2811 2812 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2813 { 2814 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2815 Mat_MPIAIJ *b; 2816 Mat B; 2817 2818 PetscFunctionBegin; 2819 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2820 2821 if (reuse == MAT_REUSE_MATRIX) { 2822 B = *newmat; 2823 } else { 2824 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2825 PetscCall(MatSetType(B,MATMPIAIJ)); 2826 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2827 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2828 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2829 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2830 } 2831 b = (Mat_MPIAIJ*) B->data; 2832 2833 if (reuse == MAT_REUSE_MATRIX) { 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2835 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2836 } else { 2837 PetscCall(MatDestroy(&b->A)); 2838 PetscCall(MatDestroy(&b->B)); 2839 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2840 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2841 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2842 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2843 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2844 } 2845 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2846 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2847 2848 if (reuse == MAT_INPLACE_MATRIX) { 2849 PetscCall(MatHeaderReplace(A,&B)); 2850 } else { 2851 *newmat = B; 2852 } 2853 PetscFunctionReturn(0); 2854 } 2855 2856 /*MC 2857 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2858 2859 Options Database Keys: 2860 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2861 . -mat_block_size <bs> - set the blocksize used to store the matrix 2862 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2863 - -mat_use_hash_table <fact> - set hash table factor 2864 2865 Level: beginner 2866 2867 Notes: 2868 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2869 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2870 2871 .seealso: `MatCreateBAIJ` 2872 M*/ 2873 2874 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2875 2876 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2877 { 2878 Mat_MPIBAIJ *b; 2879 PetscBool flg = PETSC_FALSE; 2880 2881 PetscFunctionBegin; 2882 PetscCall(PetscNewLog(B,&b)); 2883 B->data = (void*)b; 2884 2885 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2886 B->assembled = PETSC_FALSE; 2887 2888 B->insertmode = NOT_SET_VALUES; 2889 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2890 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2891 2892 /* build local table of row and column ownerships */ 2893 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2894 2895 /* build cache for off array entries formed */ 2896 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2897 2898 b->donotstash = PETSC_FALSE; 2899 b->colmap = NULL; 2900 b->garray = NULL; 2901 b->roworiented = PETSC_TRUE; 2902 2903 /* stuff used in block assembly */ 2904 b->barray = NULL; 2905 2906 /* stuff used for matrix vector multiply */ 2907 b->lvec = NULL; 2908 b->Mvctx = NULL; 2909 2910 /* stuff for MatGetRow() */ 2911 b->rowindices = NULL; 2912 b->rowvalues = NULL; 2913 b->getrowactive = PETSC_FALSE; 2914 2915 /* hash table stuff */ 2916 b->ht = NULL; 2917 b->hd = NULL; 2918 b->ht_size = 0; 2919 b->ht_flag = PETSC_FALSE; 2920 b->ht_fact = 0; 2921 b->ht_total_ct = 0; 2922 b->ht_insert_ct = 0; 2923 2924 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2925 b->ijonly = PETSC_FALSE; 2926 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2930 #if defined(PETSC_HAVE_HYPRE) 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2932 #endif 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2940 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2941 2942 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2943 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2944 if (flg) { 2945 PetscReal fact = 1.39; 2946 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2947 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2948 if (fact <= 1.0) fact = 1.39; 2949 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2950 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2951 } 2952 PetscOptionsEnd(); 2953 PetscFunctionReturn(0); 2954 } 2955 2956 /*MC 2957 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2958 2959 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2960 and MATMPIBAIJ otherwise. 2961 2962 Options Database Keys: 2963 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2964 2965 Level: beginner 2966 2967 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2968 M*/ 2969 2970 /*@C 2971 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2972 (block compressed row). For good matrix assembly performance 2973 the user should preallocate the matrix storage by setting the parameters 2974 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2975 performance can be increased by more than a factor of 50. 2976 2977 Collective on Mat 2978 2979 Input Parameters: 2980 + B - the matrix 2981 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2982 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2983 . d_nz - number of block nonzeros per block row in diagonal portion of local 2984 submatrix (same for all local rows) 2985 . d_nnz - array containing the number of block nonzeros in the various block rows 2986 of the in diagonal portion of the local (possibly different for each block 2987 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2988 set it even if it is zero. 2989 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2990 submatrix (same for all local rows). 2991 - o_nnz - array containing the number of nonzeros in the various block rows of the 2992 off-diagonal portion of the local submatrix (possibly different for 2993 each block row) or NULL. 2994 2995 If the *_nnz parameter is given then the *_nz parameter is ignored 2996 2997 Options Database Keys: 2998 + -mat_block_size - size of the blocks to use 2999 - -mat_use_hash_table <fact> - set hash table factor 3000 3001 Notes: 3002 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3003 than it must be used on all processors that share the object for that argument. 3004 3005 Storage Information: 3006 For a square global matrix we define each processor's diagonal portion 3007 to be its local rows and the corresponding columns (a square submatrix); 3008 each processor's off-diagonal portion encompasses the remainder of the 3009 local matrix (a rectangular submatrix). 3010 3011 The user can specify preallocated storage for the diagonal part of 3012 the local submatrix with either d_nz or d_nnz (not both). Set 3013 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3014 memory allocation. Likewise, specify preallocated storage for the 3015 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3016 3017 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3018 the figure below we depict these three local rows and all columns (0-11). 3019 3020 .vb 3021 0 1 2 3 4 5 6 7 8 9 10 11 3022 -------------------------- 3023 row 3 |o o o d d d o o o o o o 3024 row 4 |o o o d d d o o o o o o 3025 row 5 |o o o d d d o o o o o o 3026 -------------------------- 3027 .ve 3028 3029 Thus, any entries in the d locations are stored in the d (diagonal) 3030 submatrix, and any entries in the o locations are stored in the 3031 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3032 stored simply in the MATSEQBAIJ format for compressed row storage. 3033 3034 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3035 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3036 In general, for PDE problems in which most nonzeros are near the diagonal, 3037 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3038 or you will get TERRIBLE performance; see the users' manual chapter on 3039 matrices. 3040 3041 You can call MatGetInfo() to get information on how effective the preallocation was; 3042 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3043 You can also run with the option -info and look for messages with the string 3044 malloc in them to see if additional memory allocation was needed. 3045 3046 Level: intermediate 3047 3048 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3049 @*/ 3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3051 { 3052 PetscFunctionBegin; 3053 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3054 PetscValidType(B,1); 3055 PetscValidLogicalCollectiveInt(B,bs,2); 3056 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 /*@C 3061 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3062 (block compressed row). For good matrix assembly performance 3063 the user should preallocate the matrix storage by setting the parameters 3064 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3065 performance can be increased by more than a factor of 50. 3066 3067 Collective 3068 3069 Input Parameters: 3070 + comm - MPI communicator 3071 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3072 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3073 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3074 This value should be the same as the local size used in creating the 3075 y vector for the matrix-vector product y = Ax. 3076 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3077 This value should be the same as the local size used in creating the 3078 x vector for the matrix-vector product y = Ax. 3079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3081 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3082 submatrix (same for all local rows) 3083 . d_nnz - array containing the number of nonzero blocks in the various block rows 3084 of the in diagonal portion of the local (possibly different for each block 3085 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3086 and set it even if it is zero. 3087 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3088 submatrix (same for all local rows). 3089 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3090 off-diagonal portion of the local submatrix (possibly different for 3091 each block row) or NULL. 3092 3093 Output Parameter: 3094 . A - the matrix 3095 3096 Options Database Keys: 3097 + -mat_block_size - size of the blocks to use 3098 - -mat_use_hash_table <fact> - set hash table factor 3099 3100 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3101 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3102 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3103 3104 Notes: 3105 If the *_nnz parameter is given then the *_nz parameter is ignored 3106 3107 A nonzero block is any block that as 1 or more nonzeros in it 3108 3109 The user MUST specify either the local or global matrix dimensions 3110 (possibly both). 3111 3112 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3113 than it must be used on all processors that share the object for that argument. 3114 3115 Storage Information: 3116 For a square global matrix we define each processor's diagonal portion 3117 to be its local rows and the corresponding columns (a square submatrix); 3118 each processor's off-diagonal portion encompasses the remainder of the 3119 local matrix (a rectangular submatrix). 3120 3121 The user can specify preallocated storage for the diagonal part of 3122 the local submatrix with either d_nz or d_nnz (not both). Set 3123 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3124 memory allocation. Likewise, specify preallocated storage for the 3125 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3126 3127 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3128 the figure below we depict these three local rows and all columns (0-11). 3129 3130 .vb 3131 0 1 2 3 4 5 6 7 8 9 10 11 3132 -------------------------- 3133 row 3 |o o o d d d o o o o o o 3134 row 4 |o o o d d d o o o o o o 3135 row 5 |o o o d d d o o o o o o 3136 -------------------------- 3137 .ve 3138 3139 Thus, any entries in the d locations are stored in the d (diagonal) 3140 submatrix, and any entries in the o locations are stored in the 3141 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3142 stored simply in the MATSEQBAIJ format for compressed row storage. 3143 3144 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3145 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3146 In general, for PDE problems in which most nonzeros are near the diagonal, 3147 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3148 or you will get TERRIBLE performance; see the users' manual chapter on 3149 matrices. 3150 3151 Level: intermediate 3152 3153 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3154 @*/ 3155 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3156 { 3157 PetscMPIInt size; 3158 3159 PetscFunctionBegin; 3160 PetscCall(MatCreate(comm,A)); 3161 PetscCall(MatSetSizes(*A,m,n,M,N)); 3162 PetscCallMPI(MPI_Comm_size(comm,&size)); 3163 if (size > 1) { 3164 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3165 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3166 } else { 3167 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3168 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3169 } 3170 PetscFunctionReturn(0); 3171 } 3172 3173 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3174 { 3175 Mat mat; 3176 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3177 PetscInt len=0; 3178 3179 PetscFunctionBegin; 3180 *newmat = NULL; 3181 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3182 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3183 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3184 3185 mat->factortype = matin->factortype; 3186 mat->preallocated = PETSC_TRUE; 3187 mat->assembled = PETSC_TRUE; 3188 mat->insertmode = NOT_SET_VALUES; 3189 3190 a = (Mat_MPIBAIJ*)mat->data; 3191 mat->rmap->bs = matin->rmap->bs; 3192 a->bs2 = oldmat->bs2; 3193 a->mbs = oldmat->mbs; 3194 a->nbs = oldmat->nbs; 3195 a->Mbs = oldmat->Mbs; 3196 a->Nbs = oldmat->Nbs; 3197 3198 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3199 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3200 3201 a->size = oldmat->size; 3202 a->rank = oldmat->rank; 3203 a->donotstash = oldmat->donotstash; 3204 a->roworiented = oldmat->roworiented; 3205 a->rowindices = NULL; 3206 a->rowvalues = NULL; 3207 a->getrowactive = PETSC_FALSE; 3208 a->barray = NULL; 3209 a->rstartbs = oldmat->rstartbs; 3210 a->rendbs = oldmat->rendbs; 3211 a->cstartbs = oldmat->cstartbs; 3212 a->cendbs = oldmat->cendbs; 3213 3214 /* hash table stuff */ 3215 a->ht = NULL; 3216 a->hd = NULL; 3217 a->ht_size = 0; 3218 a->ht_flag = oldmat->ht_flag; 3219 a->ht_fact = oldmat->ht_fact; 3220 a->ht_total_ct = 0; 3221 a->ht_insert_ct = 0; 3222 3223 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3224 if (oldmat->colmap) { 3225 #if defined(PETSC_USE_CTABLE) 3226 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3227 #else 3228 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3229 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3230 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3231 #endif 3232 } else a->colmap = NULL; 3233 3234 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3235 PetscCall(PetscMalloc1(len,&a->garray)); 3236 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3237 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3238 } else a->garray = NULL; 3239 3240 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3241 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3242 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3243 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3244 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3245 3246 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3247 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3248 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3249 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3250 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3251 *newmat = mat; 3252 PetscFunctionReturn(0); 3253 } 3254 3255 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3256 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3257 { 3258 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3259 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3260 PetscScalar *matvals; 3261 3262 PetscFunctionBegin; 3263 PetscCall(PetscViewerSetUp(viewer)); 3264 3265 /* read in matrix header */ 3266 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3267 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3268 M = header[1]; N = header[2]; nz = header[3]; 3269 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3270 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3271 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3272 3273 /* set block sizes from the viewer's .info file */ 3274 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3275 /* set local sizes if not set already */ 3276 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3277 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3278 /* set global sizes if not set already */ 3279 if (mat->rmap->N < 0) mat->rmap->N = M; 3280 if (mat->cmap->N < 0) mat->cmap->N = N; 3281 PetscCall(PetscLayoutSetUp(mat->rmap)); 3282 PetscCall(PetscLayoutSetUp(mat->cmap)); 3283 3284 /* check if the matrix sizes are correct */ 3285 PetscCall(MatGetSize(mat,&rows,&cols)); 3286 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3287 PetscCall(MatGetBlockSize(mat,&bs)); 3288 PetscCall(MatGetLocalSize(mat,&m,&n)); 3289 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3290 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3291 mbs = m/bs; nbs = n/bs; 3292 3293 /* read in row lengths and build row indices */ 3294 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3295 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3296 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3297 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3298 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3299 3300 /* read in column indices and matrix values */ 3301 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3302 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3303 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3304 3305 { /* preallocate matrix storage */ 3306 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3307 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3308 PetscBool sbaij,done; 3309 PetscInt *d_nnz,*o_nnz; 3310 3311 PetscCall(PetscBTCreate(nbs,&bt)); 3312 PetscCall(PetscHSetICreate(&ht)); 3313 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3314 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3315 for (i=0; i<mbs; i++) { 3316 PetscCall(PetscBTMemzero(nbs,bt)); 3317 PetscCall(PetscHSetIClear(ht)); 3318 for (k=0; k<bs; k++) { 3319 PetscInt row = bs*i + k; 3320 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3321 PetscInt col = colidxs[j]; 3322 if (!sbaij || col >= row) { 3323 if (col >= cs && col < ce) { 3324 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3325 } else { 3326 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3327 if (done) o_nnz[i]++; 3328 } 3329 } 3330 } 3331 } 3332 } 3333 PetscCall(PetscBTDestroy(&bt)); 3334 PetscCall(PetscHSetIDestroy(&ht)); 3335 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3336 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3337 PetscCall(PetscFree2(d_nnz,o_nnz)); 3338 } 3339 3340 /* store matrix values */ 3341 for (i=0; i<m; i++) { 3342 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3343 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3344 } 3345 3346 PetscCall(PetscFree(rowidxs)); 3347 PetscCall(PetscFree2(colidxs,matvals)); 3348 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3349 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3354 { 3355 PetscBool isbinary; 3356 3357 PetscFunctionBegin; 3358 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3359 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3360 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3361 PetscFunctionReturn(0); 3362 } 3363 3364 /*@ 3365 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3366 3367 Input Parameters: 3368 + mat - the matrix 3369 - fact - factor 3370 3371 Not Collective, each process can use a different factor 3372 3373 Level: advanced 3374 3375 Notes: 3376 This can also be set by the command line option: -mat_use_hash_table <fact> 3377 3378 .seealso: `MatSetOption()` 3379 @*/ 3380 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3381 { 3382 PetscFunctionBegin; 3383 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3388 { 3389 Mat_MPIBAIJ *baij; 3390 3391 PetscFunctionBegin; 3392 baij = (Mat_MPIBAIJ*)mat->data; 3393 baij->ht_fact = fact; 3394 PetscFunctionReturn(0); 3395 } 3396 3397 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3398 { 3399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3400 PetscBool flg; 3401 3402 PetscFunctionBegin; 3403 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3404 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3405 if (Ad) *Ad = a->A; 3406 if (Ao) *Ao = a->B; 3407 if (colmap) *colmap = a->garray; 3408 PetscFunctionReturn(0); 3409 } 3410 3411 /* 3412 Special version for direct calls from Fortran (to eliminate two function call overheads 3413 */ 3414 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3415 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3416 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3417 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3418 #endif 3419 3420 /*@C 3421 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3422 3423 Collective on Mat 3424 3425 Input Parameters: 3426 + mat - the matrix 3427 . min - number of input rows 3428 . im - input rows 3429 . nin - number of input columns 3430 . in - input columns 3431 . v - numerical values input 3432 - addvin - INSERT_VALUES or ADD_VALUES 3433 3434 Notes: 3435 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3436 3437 Level: advanced 3438 3439 .seealso: `MatSetValuesBlocked()` 3440 @*/ 3441 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3442 { 3443 /* convert input arguments to C version */ 3444 Mat mat = *matin; 3445 PetscInt m = *min, n = *nin; 3446 InsertMode addv = *addvin; 3447 3448 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3449 const MatScalar *value; 3450 MatScalar *barray = baij->barray; 3451 PetscBool roworiented = baij->roworiented; 3452 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3453 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3454 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3455 3456 PetscFunctionBegin; 3457 /* tasks normally handled by MatSetValuesBlocked() */ 3458 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3459 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3460 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3461 if (mat->assembled) { 3462 mat->was_assembled = PETSC_TRUE; 3463 mat->assembled = PETSC_FALSE; 3464 } 3465 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3466 3467 if (!barray) { 3468 PetscCall(PetscMalloc1(bs2,&barray)); 3469 baij->barray = barray; 3470 } 3471 3472 if (roworiented) stepval = (n-1)*bs; 3473 else stepval = (m-1)*bs; 3474 3475 for (i=0; i<m; i++) { 3476 if (im[i] < 0) continue; 3477 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3478 if (im[i] >= rstart && im[i] < rend) { 3479 row = im[i] - rstart; 3480 for (j=0; j<n; j++) { 3481 /* If NumCol = 1 then a copy is not required */ 3482 if ((roworiented) && (n == 1)) { 3483 barray = (MatScalar*)v + i*bs2; 3484 } else if ((!roworiented) && (m == 1)) { 3485 barray = (MatScalar*)v + j*bs2; 3486 } else { /* Here a copy is required */ 3487 if (roworiented) { 3488 value = v + i*(stepval+bs)*bs + j*bs; 3489 } else { 3490 value = v + j*(stepval+bs)*bs + i*bs; 3491 } 3492 for (ii=0; ii<bs; ii++,value+=stepval) { 3493 for (jj=0; jj<bs; jj++) { 3494 *barray++ = *value++; 3495 } 3496 } 3497 barray -=bs2; 3498 } 3499 3500 if (in[j] >= cstart && in[j] < cend) { 3501 col = in[j] - cstart; 3502 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3503 } else if (in[j] < 0) { 3504 continue; 3505 } else { 3506 PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3507 if (mat->was_assembled) { 3508 if (!baij->colmap) { 3509 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3510 } 3511 3512 #if defined(PETSC_USE_DEBUG) 3513 #if defined(PETSC_USE_CTABLE) 3514 { PetscInt data; 3515 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3516 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3517 } 3518 #else 3519 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3520 #endif 3521 #endif 3522 #if defined(PETSC_USE_CTABLE) 3523 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3524 col = (col - 1)/bs; 3525 #else 3526 col = (baij->colmap[in[j]] - 1)/bs; 3527 #endif 3528 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3529 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3530 col = in[j]; 3531 } 3532 } else col = in[j]; 3533 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3534 } 3535 } 3536 } else { 3537 if (!baij->donotstash) { 3538 if (roworiented) { 3539 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3540 } else { 3541 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3542 } 3543 } 3544 } 3545 } 3546 3547 /* task normally handled by MatSetValuesBlocked() */ 3548 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3549 PetscFunctionReturn(0); 3550 } 3551 3552 /*@ 3553 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3554 CSR format the local rows. 3555 3556 Collective 3557 3558 Input Parameters: 3559 + comm - MPI communicator 3560 . bs - the block size, only a block size of 1 is supported 3561 . m - number of local rows (Cannot be PETSC_DECIDE) 3562 . n - This value should be the same as the local size used in creating the 3563 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3564 calculated if N is given) For square matrices n is almost always m. 3565 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3566 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3567 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3568 . j - column indices 3569 - a - matrix values 3570 3571 Output Parameter: 3572 . mat - the matrix 3573 3574 Level: intermediate 3575 3576 Notes: 3577 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3578 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3579 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3580 3581 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3582 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3583 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3584 with column-major ordering within blocks. 3585 3586 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3587 3588 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3589 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3590 @*/ 3591 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3592 { 3593 PetscFunctionBegin; 3594 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3595 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3596 PetscCall(MatCreate(comm,mat)); 3597 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3598 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3599 PetscCall(MatSetBlockSize(*mat,bs)); 3600 PetscCall(MatSetUp(*mat)); 3601 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3602 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3603 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3604 PetscFunctionReturn(0); 3605 } 3606 3607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3608 { 3609 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3610 PetscInt *indx; 3611 PetscScalar *values; 3612 3613 PetscFunctionBegin; 3614 PetscCall(MatGetSize(inmat,&m,&N)); 3615 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3617 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3618 PetscInt *bindx,rmax=a->rmax,j; 3619 PetscMPIInt rank,size; 3620 3621 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3622 mbs = m/bs; Nbs = N/cbs; 3623 if (n == PETSC_DECIDE) { 3624 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3625 } 3626 nbs = n/cbs; 3627 3628 PetscCall(PetscMalloc1(rmax,&bindx)); 3629 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3630 3631 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3632 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3633 if (rank == size-1) { 3634 /* Check sum(nbs) = Nbs */ 3635 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3636 } 3637 3638 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3639 for (i=0; i<mbs; i++) { 3640 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3641 nnz = nnz/bs; 3642 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3643 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3644 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3645 } 3646 PetscCall(PetscFree(bindx)); 3647 3648 PetscCall(MatCreate(comm,outmat)); 3649 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3650 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3651 PetscCall(MatSetType(*outmat,MATBAIJ)); 3652 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3653 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3654 MatPreallocateEnd(dnz,onz); 3655 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3656 } 3657 3658 /* numeric phase */ 3659 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3660 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3661 3662 for (i=0; i<m; i++) { 3663 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3664 Ii = i + rstart; 3665 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3666 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3667 } 3668 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3669 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3670 PetscFunctionReturn(0); 3671 } 3672