1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 15 PetscScalar *va,*vv; 16 Vec vB,vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 21 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 22 23 PetscCall(VecGetArrayWrite(vA,&va)); 24 if (idx) { 25 for (i=0; i<m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 31 PetscCall(PetscMalloc1(m,&idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 33 34 PetscCall(VecGetArrayWrite(v,&vv)); 35 PetscCall(VecGetArrayRead(vB,&vb)); 36 for (i=0; i<m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 43 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 44 } 45 } 46 PetscCall(VecRestoreArrayWrite(vA,&vv)); 47 PetscCall(VecRestoreArrayWrite(vA,&va)); 48 PetscCall(VecRestoreArrayRead(vB,&vb)); 49 PetscCall(PetscFree(idxb)); 50 PetscCall(VecDestroy(&vA)); 51 PetscCall(VecDestroy(&vB)); 52 PetscFunctionReturn(0); 53 } 54 55 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 56 { 57 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 58 59 PetscFunctionBegin; 60 PetscCall(MatStoreValues(aij->A)); 61 PetscCall(MatStoreValues(aij->B)); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 66 { 67 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 68 69 PetscFunctionBegin; 70 PetscCall(MatRetrieveValues(aij->A)); 71 PetscCall(MatRetrieveValues(aij->B)); 72 PetscFunctionReturn(0); 73 } 74 75 /* 76 Local utility routine that creates a mapping from the global column 77 number to the local number in the off-diagonal part of the local 78 storage of the matrix. This is done in a non scalable way since the 79 length of colmap equals the global matrix length. 80 */ 81 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 82 { 83 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 84 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 85 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 86 87 PetscFunctionBegin; 88 #if defined(PETSC_USE_CTABLE) 89 PetscCall(PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap)); 90 for (i=0; i<nbs; i++) { 91 PetscCall(PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES)); 92 } 93 #else 94 PetscCall(PetscCalloc1(baij->Nbs+1,&baij->colmap)); 95 PetscCall(PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt))); 96 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 97 #endif 98 PetscFunctionReturn(0); 99 } 100 101 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 102 { \ 103 brow = row/bs; \ 104 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 105 rmax = aimax[brow]; nrow = ailen[brow]; \ 106 bcol = col/bs; \ 107 ridx = row % bs; cidx = col % bs; \ 108 low = 0; high = nrow; \ 109 while (high-low > 3) { \ 110 t = (low+high)/2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i=low; _i<high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2*_i + bs*cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 129 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \ 130 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2*_i + bs*cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 138 { \ 139 brow = row/bs; \ 140 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 141 rmax = bimax[brow]; nrow = bilen[brow]; \ 142 bcol = col/bs; \ 143 ridx = row % bs; cidx = col % bs; \ 144 low = 0; high = nrow; \ 145 while (high-low > 3) { \ 146 t = (low+high)/2; \ 147 if (rp[t] > bcol) high = t; \ 148 else low = t; \ 149 } \ 150 for (_i=low; _i<high; _i++) { \ 151 if (rp[_i] > bcol) break; \ 152 if (rp[_i] == bcol) { \ 153 bap = ap + bs2*_i + bs*cidx + ridx; \ 154 if (addv == ADD_VALUES) *bap += value; \ 155 else *bap = value; \ 156 goto b_noinsert; \ 157 } \ 158 } \ 159 if (b->nonew == 1) goto b_noinsert; \ 160 PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 161 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 162 N = nrow++ - 1; \ 163 /* shift up all the later entries in this row */ \ 164 PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1));\ 165 PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1)));\ 166 PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \ 167 rp[_i] = bcol; \ 168 ap[bs2*_i + bs*cidx + ridx] = value; \ 169 b_noinsert:; \ 170 bilen[brow] = nrow; \ 171 } 172 173 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i,j,row,col; 179 PetscInt rstart_orig=mat->rmap->rstart; 180 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 181 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 186 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 187 MatScalar *aa =a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 191 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 192 MatScalar *ba =b->a; 193 194 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 195 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 196 MatScalar *ap,*bap; 197 198 PetscFunctionBegin; 199 for (i=0; i<m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j=0; j<n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i*n+j]; 208 else value = v[i+j*m]; 209 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 210 } else if (in[j] < 0) continue; 211 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 212 else { 213 if (mat->was_assembled) { 214 if (!baij->colmap) { 215 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 216 } 217 #if defined(PETSC_USE_CTABLE) 218 PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col)); 219 col = col - 1; 220 #else 221 col = baij->colmap[in[j]/bs] - 1; 222 #endif 223 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 224 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 225 col = in[j]; 226 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 227 B = baij->B; 228 b = (Mat_SeqBAIJ*)(B)->data; 229 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 230 ba =b->a; 231 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 232 else col += in[j]%bs; 233 } else col = in[j]; 234 if (roworiented) value = v[i*n+j]; 235 else value = v[i+j*m]; 236 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 237 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 238 } 239 } 240 } else { 241 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 242 if (!baij->donotstash) { 243 mat->assembled = PETSC_FALSE; 244 if (roworiented) { 245 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 246 } else { 247 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 248 } 249 } 250 } 251 } 252 PetscFunctionReturn(0); 253 } 254 255 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 256 { 257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 258 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 259 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 260 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 261 PetscBool roworiented=a->roworiented; 262 const PetscScalar *value = v; 263 MatScalar *ap,*aa = a->a,*bap; 264 265 PetscFunctionBegin; 266 rp = aj + ai[row]; 267 ap = aa + bs2*ai[row]; 268 rmax = imax[row]; 269 nrow = ailen[row]; 270 value = v; 271 low = 0; 272 high = nrow; 273 while (high-low > 7) { 274 t = (low+high)/2; 275 if (rp[t] > col) high = t; 276 else low = t; 277 } 278 for (i=low; i<high; i++) { 279 if (rp[i] > col) break; 280 if (rp[i] == col) { 281 bap = ap + bs2*i; 282 if (roworiented) { 283 if (is == ADD_VALUES) { 284 for (ii=0; ii<bs; ii++) { 285 for (jj=ii; jj<bs2; jj+=bs) { 286 bap[jj] += *value++; 287 } 288 } 289 } else { 290 for (ii=0; ii<bs; ii++) { 291 for (jj=ii; jj<bs2; jj+=bs) { 292 bap[jj] = *value++; 293 } 294 } 295 } 296 } else { 297 if (is == ADD_VALUES) { 298 for (ii=0; ii<bs; ii++,value+=bs) { 299 for (jj=0; jj<bs; jj++) { 300 bap[jj] += value[jj]; 301 } 302 bap += bs; 303 } 304 } else { 305 for (ii=0; ii<bs; ii++,value+=bs) { 306 for (jj=0; jj<bs; jj++) { 307 bap[jj] = value[jj]; 308 } 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 319 N = nrow++ - 1; high++; 320 /* shift up all the later entries in this row */ 321 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 322 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 323 rp[i] = col; 324 bap = ap + bs2*i; 325 if (roworiented) { 326 for (ii=0; ii<bs; ii++) { 327 for (jj=ii; jj<bs2; jj+=bs) { 328 bap[jj] = *value++; 329 } 330 } 331 } else { 332 for (ii=0; ii<bs; ii++) { 333 for (jj=0; jj<bs; jj++) { 334 *bap++ = *value++; 335 } 336 } 337 } 338 noinsert2:; 339 ailen[row] = nrow; 340 PetscFunctionReturn(0); 341 } 342 343 /* 344 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 345 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 346 */ 347 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 348 { 349 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 350 const PetscScalar *value; 351 MatScalar *barray = baij->barray; 352 PetscBool roworiented = baij->roworiented; 353 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 354 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 355 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 356 357 PetscFunctionBegin; 358 if (!barray) { 359 PetscCall(PetscMalloc1(bs2,&barray)); 360 baij->barray = barray; 361 } 362 363 if (roworiented) stepval = (n-1)*bs; 364 else stepval = (m-1)*bs; 365 366 for (i=0; i<m; i++) { 367 if (im[i] < 0) continue; 368 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 369 if (im[i] >= rstart && im[i] < rend) { 370 row = im[i] - rstart; 371 for (j=0; j<n; j++) { 372 /* If NumCol = 1 then a copy is not required */ 373 if ((roworiented) && (n == 1)) { 374 barray = (MatScalar*)v + i*bs2; 375 } else if ((!roworiented) && (m == 1)) { 376 barray = (MatScalar*)v + j*bs2; 377 } else { /* Here a copy is required */ 378 if (roworiented) { 379 value = v + (i*(stepval+bs) + j)*bs; 380 } else { 381 value = v + (j*(stepval+bs) + i)*bs; 382 } 383 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 384 for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 385 barray += bs; 386 } 387 barray -= bs2; 388 } 389 390 if (in[j] >= cstart && in[j] < cend) { 391 col = in[j] - cstart; 392 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 393 } else if (in[j] < 0) continue; 394 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 395 else { 396 if (mat->was_assembled) { 397 if (!baij->colmap) { 398 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 399 } 400 401 #if defined(PETSC_USE_DEBUG) 402 #if defined(PETSC_USE_CTABLE) 403 { PetscInt data; 404 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 405 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 406 } 407 #else 408 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 409 #endif 410 #endif 411 #if defined(PETSC_USE_CTABLE) 412 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 413 col = (col - 1)/bs; 414 #else 415 col = (baij->colmap[in[j]] - 1)/bs; 416 #endif 417 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 418 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 419 col = in[j]; 420 } else PetscCheck(col >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 421 } else col = in[j]; 422 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 423 } 424 } 425 } else { 426 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 427 if (!baij->donotstash) { 428 if (roworiented) { 429 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 430 } else { 431 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #define HASH_KEY 0.6180339887 440 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 441 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 442 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 443 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 444 { 445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 446 PetscBool roworiented = baij->roworiented; 447 PetscInt i,j,row,col; 448 PetscInt rstart_orig=mat->rmap->rstart; 449 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 450 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 451 PetscReal tmp; 452 MatScalar **HD = baij->hd,value; 453 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 454 455 PetscFunctionBegin; 456 for (i=0; i<m; i++) { 457 if (PetscDefined(USE_DEBUG)) { 458 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 459 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 460 } 461 row = im[i]; 462 if (row >= rstart_orig && row < rend_orig) { 463 for (j=0; j<n; j++) { 464 col = in[j]; 465 if (roworiented) value = v[i*n+j]; 466 else value = v[i+j*m]; 467 /* Look up PetscInto the Hash Table */ 468 key = (row/bs)*Nbs+(col/bs)+1; 469 h1 = HASH(size,key,tmp); 470 471 idx = h1; 472 if (PetscDefined(USE_DEBUG)) { 473 insert_ct++; 474 total_ct++; 475 if (HT[idx] != key) { 476 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 477 if (idx == size) { 478 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 479 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 480 } 481 } 482 } else if (HT[idx] != key) { 483 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 484 if (idx == size) { 485 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 486 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 487 } 488 } 489 /* A HASH table entry is found, so insert the values at the correct address */ 490 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 491 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 492 } 493 } else if (!baij->donotstash) { 494 if (roworiented) { 495 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE)); 496 } else { 497 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE)); 498 } 499 } 500 } 501 if (PetscDefined(USE_DEBUG)) { 502 baij->ht_total_ct += total_ct; 503 baij->ht_insert_ct += insert_ct; 504 } 505 PetscFunctionReturn(0); 506 } 507 508 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 509 { 510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 511 PetscBool roworiented = baij->roworiented; 512 PetscInt i,j,ii,jj,row,col; 513 PetscInt rstart=baij->rstartbs; 514 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 515 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 516 PetscReal tmp; 517 MatScalar **HD = baij->hd,*baij_a; 518 const PetscScalar *v_t,*value; 519 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 520 521 PetscFunctionBegin; 522 if (roworiented) stepval = (n-1)*bs; 523 else stepval = (m-1)*bs; 524 525 for (i=0; i<m; i++) { 526 if (PetscDefined(USE_DEBUG)) { 527 PetscCheck(im[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 528 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 529 } 530 row = im[i]; 531 v_t = v + i*nbs2; 532 if (row >= rstart && row < rend) { 533 for (j=0; j<n; j++) { 534 col = in[j]; 535 536 /* Look up into the Hash Table */ 537 key = row*Nbs+col+1; 538 h1 = HASH(size,key,tmp); 539 540 idx = h1; 541 if (PetscDefined(USE_DEBUG)) { 542 total_ct++; 543 insert_ct++; 544 if (HT[idx] != key) { 545 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 546 if (idx == size) { 547 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 548 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 549 } 550 } 551 } else if (HT[idx] != key) { 552 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 553 if (idx == size) { 554 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 555 PetscCheck(idx != h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 556 } 557 } 558 baij_a = HD[idx]; 559 if (roworiented) { 560 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 561 /* value = v + (i*(stepval+bs)+j)*bs; */ 562 value = v_t; 563 v_t += bs; 564 if (addv == ADD_VALUES) { 565 for (ii=0; ii<bs; ii++,value+=stepval) { 566 for (jj=ii; jj<bs2; jj+=bs) { 567 baij_a[jj] += *value++; 568 } 569 } 570 } else { 571 for (ii=0; ii<bs; ii++,value+=stepval) { 572 for (jj=ii; jj<bs2; jj+=bs) { 573 baij_a[jj] = *value++; 574 } 575 } 576 } 577 } else { 578 value = v + j*(stepval+bs)*bs + i*bs; 579 if (addv == ADD_VALUES) { 580 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 581 for (jj=0; jj<bs; jj++) { 582 baij_a[jj] += *value++; 583 } 584 } 585 } else { 586 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 587 for (jj=0; jj<bs; jj++) { 588 baij_a[jj] = *value++; 589 } 590 } 591 } 592 } 593 } 594 } else { 595 if (!baij->donotstash) { 596 if (roworiented) { 597 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 598 } else { 599 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 600 } 601 } 602 } 603 } 604 if (PetscDefined(USE_DEBUG)) { 605 baij->ht_total_ct += total_ct; 606 baij->ht_insert_ct += insert_ct; 607 } 608 PetscFunctionReturn(0); 609 } 610 611 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 612 { 613 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 614 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 615 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 616 617 PetscFunctionBegin; 618 for (i=0; i<m; i++) { 619 if (idxm[i] < 0) continue; /* negative row */ 620 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 621 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 622 row = idxm[i] - bsrstart; 623 for (j=0; j<n; j++) { 624 if (idxn[j] < 0) continue; /* negative column */ 625 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 626 if (idxn[j] >= bscstart && idxn[j] < bscend) { 627 col = idxn[j] - bscstart; 628 PetscCall(MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j)); 629 } else { 630 if (!baij->colmap) { 631 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 632 } 633 #if defined(PETSC_USE_CTABLE) 634 PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data)); 635 data--; 636 #else 637 data = baij->colmap[idxn[j]/bs]-1; 638 #endif 639 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 640 else { 641 col = data + idxn[j]%bs; 642 PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j)); 643 } 644 } 645 } 646 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 647 } 648 PetscFunctionReturn(0); 649 } 650 651 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 652 { 653 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 654 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 655 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 656 PetscReal sum = 0.0; 657 MatScalar *v; 658 659 PetscFunctionBegin; 660 if (baij->size == 1) { 661 PetscCall(MatNorm(baij->A,type,nrm)); 662 } else { 663 if (type == NORM_FROBENIUS) { 664 v = amat->a; 665 nz = amat->nz*bs2; 666 for (i=0; i<nz; i++) { 667 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 668 } 669 v = bmat->a; 670 nz = bmat->nz*bs2; 671 for (i=0; i<nz; i++) { 672 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 673 } 674 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 675 *nrm = PetscSqrtReal(*nrm); 676 } else if (type == NORM_1) { /* max column sum */ 677 PetscReal *tmp,*tmp2; 678 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 679 PetscCall(PetscCalloc1(mat->cmap->N,&tmp)); 680 PetscCall(PetscMalloc1(mat->cmap->N,&tmp2)); 681 v = amat->a; jj = amat->j; 682 for (i=0; i<amat->nz; i++) { 683 for (j=0; j<bs; j++) { 684 col = bs*(cstart + *jj) + j; /* column index */ 685 for (row=0; row<bs; row++) { 686 tmp[col] += PetscAbsScalar(*v); v++; 687 } 688 } 689 jj++; 690 } 691 v = bmat->a; jj = bmat->j; 692 for (i=0; i<bmat->nz; i++) { 693 for (j=0; j<bs; j++) { 694 col = bs*garray[*jj] + j; 695 for (row=0; row<bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); v++; 697 } 698 } 699 jj++; 700 } 701 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 702 *nrm = 0.0; 703 for (j=0; j<mat->cmap->N; j++) { 704 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 705 } 706 PetscCall(PetscFree(tmp)); 707 PetscCall(PetscFree(tmp2)); 708 } else if (type == NORM_INFINITY) { /* max row sum */ 709 PetscReal *sums; 710 PetscCall(PetscMalloc1(bs,&sums)); 711 sum = 0.0; 712 for (j=0; j<amat->mbs; j++) { 713 for (row=0; row<bs; row++) sums[row] = 0.0; 714 v = amat->a + bs2*amat->i[j]; 715 nz = amat->i[j+1]-amat->i[j]; 716 for (i=0; i<nz; i++) { 717 for (col=0; col<bs; col++) { 718 for (row=0; row<bs; row++) { 719 sums[row] += PetscAbsScalar(*v); v++; 720 } 721 } 722 } 723 v = bmat->a + bs2*bmat->i[j]; 724 nz = bmat->i[j+1]-bmat->i[j]; 725 for (i=0; i<nz; i++) { 726 for (col=0; col<bs; col++) { 727 for (row=0; row<bs; row++) { 728 sums[row] += PetscAbsScalar(*v); v++; 729 } 730 } 731 } 732 for (row=0; row<bs; row++) { 733 if (sums[row] > sum) sum = sums[row]; 734 } 735 } 736 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 737 PetscCall(PetscFree(sums)); 738 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 739 } 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 Creates the hash table, and sets the table 745 This table is created only once. 746 If new entried need to be added to the matrix 747 then the hash table has to be destroyed and 748 recreated. 749 */ 750 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 751 { 752 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 753 Mat A = baij->A,B=baij->B; 754 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 755 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 756 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 757 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 758 PetscInt *HT,key; 759 MatScalar **HD; 760 PetscReal tmp; 761 #if defined(PETSC_USE_INFO) 762 PetscInt ct=0,max=0; 763 #endif 764 765 PetscFunctionBegin; 766 if (baij->ht) PetscFunctionReturn(0); 767 768 baij->ht_size = (PetscInt)(factor*nz); 769 ht_size = baij->ht_size; 770 771 /* Allocate Memory for Hash Table */ 772 PetscCall(PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht)); 773 HD = baij->hd; 774 HT = baij->ht; 775 776 /* Loop Over A */ 777 for (i=0; i<a->mbs; i++) { 778 for (j=ai[i]; j<ai[i+1]; j++) { 779 row = i+rstart; 780 col = aj[j]+cstart; 781 782 key = row*Nbs + col + 1; 783 h1 = HASH(ht_size,key,tmp); 784 for (k=0; k<ht_size; k++) { 785 if (!HT[(h1+k)%ht_size]) { 786 HT[(h1+k)%ht_size] = key; 787 HD[(h1+k)%ht_size] = a->a + j*bs2; 788 break; 789 #if defined(PETSC_USE_INFO) 790 } else { 791 ct++; 792 #endif 793 } 794 } 795 #if defined(PETSC_USE_INFO) 796 if (k> max) max = k; 797 #endif 798 } 799 } 800 /* Loop Over B */ 801 for (i=0; i<b->mbs; i++) { 802 for (j=bi[i]; j<bi[i+1]; j++) { 803 row = i+rstart; 804 col = garray[bj[j]]; 805 key = row*Nbs + col + 1; 806 h1 = HASH(ht_size,key,tmp); 807 for (k=0; k<ht_size; k++) { 808 if (!HT[(h1+k)%ht_size]) { 809 HT[(h1+k)%ht_size] = key; 810 HD[(h1+k)%ht_size] = b->a + j*bs2; 811 break; 812 #if defined(PETSC_USE_INFO) 813 } else { 814 ct++; 815 #endif 816 } 817 } 818 #if defined(PETSC_USE_INFO) 819 if (k> max) max = k; 820 #endif 821 } 822 } 823 824 /* Print Summary */ 825 #if defined(PETSC_USE_INFO) 826 for (i=0,j=0; i<ht_size; i++) { 827 if (HT[i]) j++; 828 } 829 PetscCall(PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max)); 830 #endif 831 PetscFunctionReturn(0); 832 } 833 834 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 835 { 836 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 837 PetscInt nstash,reallocs; 838 839 PetscFunctionBegin; 840 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 841 842 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 843 PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs)); 844 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 845 PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 846 PetscCall(MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs)); 847 PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 848 PetscFunctionReturn(0); 849 } 850 851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 852 { 853 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 854 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 855 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 856 PetscInt *row,*col; 857 PetscBool r1,r2,r3,other_disassembled; 858 MatScalar *val; 859 PetscMPIInt n; 860 861 PetscFunctionBegin; 862 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 863 if (!baij->donotstash && !mat->nooffprocentries) { 864 while (1) { 865 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 866 if (!flg) break; 867 868 for (i=0; i<n;) { 869 /* Now identify the consecutive vals belonging to the same row */ 870 for (j=i,rstart=row[j]; j<n; j++) { 871 if (row[j] != rstart) break; 872 } 873 if (j < n) ncols = j-i; 874 else ncols = n-i; 875 /* Now assemble all these values with a single function call */ 876 PetscCall(MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 877 i = j; 878 } 879 } 880 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 881 /* Now process the block-stash. Since the values are stashed column-oriented, 882 set the roworiented flag to column oriented, and after MatSetValues() 883 restore the original flags */ 884 r1 = baij->roworiented; 885 r2 = a->roworiented; 886 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 887 888 baij->roworiented = PETSC_FALSE; 889 a->roworiented = PETSC_FALSE; 890 891 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 892 while (1) { 893 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg)); 894 if (!flg) break; 895 896 for (i=0; i<n;) { 897 /* Now identify the consecutive vals belonging to the same row */ 898 for (j=i,rstart=row[j]; j<n; j++) { 899 if (row[j] != rstart) break; 900 } 901 if (j < n) ncols = j-i; 902 else ncols = n-i; 903 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode)); 904 i = j; 905 } 906 } 907 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 908 909 baij->roworiented = r1; 910 a->roworiented = r2; 911 912 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 913 } 914 915 PetscCall(MatAssemblyBegin(baij->A,mode)); 916 PetscCall(MatAssemblyEnd(baij->A,mode)); 917 918 /* determine if any processor has disassembled, if so we must 919 also disassemble ourselves, in order that we may reassemble. */ 920 /* 921 if nonzero structure of submatrix B cannot change then we know that 922 no processor disassembled thus we can skip this stuff 923 */ 924 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 925 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 926 if (mat->was_assembled && !other_disassembled) { 927 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 928 } 929 } 930 931 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 932 PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 } 934 PetscCall(MatAssemblyBegin(baij->B,mode)); 935 PetscCall(MatAssemblyEnd(baij->B,mode)); 936 937 #if defined(PETSC_USE_INFO) 938 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 939 PetscCall(PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct)); 940 941 baij->ht_total_ct = 0; 942 baij->ht_insert_ct = 0; 943 } 944 #endif 945 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 946 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact)); 947 948 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 949 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 950 } 951 952 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 953 954 baij->rowvalues = NULL; 955 956 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 957 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 958 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 959 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 960 } 961 PetscFunctionReturn(0); 962 } 963 964 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 965 #include <petscdraw.h> 966 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 967 { 968 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 969 PetscMPIInt rank = baij->rank; 970 PetscInt bs = mat->rmap->bs; 971 PetscBool iascii,isdraw; 972 PetscViewer sviewer; 973 PetscViewerFormat format; 974 975 PetscFunctionBegin; 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 977 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 978 if (iascii) { 979 PetscCall(PetscViewerGetFormat(viewer,&format)); 980 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 981 MatInfo info; 982 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 983 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 984 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 985 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 986 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory)); 987 PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info)); 988 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 989 PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info)); 990 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 991 PetscCall(PetscViewerFlush(viewer)); 992 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 993 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 994 PetscCall(VecScatterView(baij->Mvctx,viewer)); 995 PetscFunctionReturn(0); 996 } else if (format == PETSC_VIEWER_ASCII_INFO) { 997 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 998 PetscFunctionReturn(0); 999 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1000 PetscFunctionReturn(0); 1001 } 1002 } 1003 1004 if (isdraw) { 1005 PetscDraw draw; 1006 PetscBool isnull; 1007 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1008 PetscCall(PetscDrawIsNull(draw,&isnull)); 1009 if (isnull) PetscFunctionReturn(0); 1010 } 1011 1012 { 1013 /* assemble the entire matrix onto first processor. */ 1014 Mat A; 1015 Mat_SeqBAIJ *Aloc; 1016 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1017 MatScalar *a; 1018 const char *matname; 1019 1020 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1021 /* Perhaps this should be the type of mat? */ 1022 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 1023 if (rank == 0) { 1024 PetscCall(MatSetSizes(A,M,N,M,N)); 1025 } else { 1026 PetscCall(MatSetSizes(A,0,0,M,N)); 1027 } 1028 PetscCall(MatSetType(A,MATMPIBAIJ)); 1029 PetscCall(MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL)); 1030 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE)); 1031 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 1032 1033 /* copy over the A part */ 1034 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1035 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs,&rvals)); 1037 1038 for (i=0; i<mbs; i++) { 1039 rvals[0] = bs*(baij->rstartbs + i); 1040 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1041 for (j=ai[i]; j<ai[i+1]; j++) { 1042 col = (baij->cstartbs+aj[j])*bs; 1043 for (k=0; k<bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1045 col++; a += bs; 1046 } 1047 } 1048 } 1049 /* copy over the B part */ 1050 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1051 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1052 for (i=0; i<mbs; i++) { 1053 rvals[0] = bs*(baij->rstartbs + i); 1054 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1055 for (j=ai[i]; j<ai[i+1]; j++) { 1056 col = baij->garray[aj[j]]*bs; 1057 for (k=0; k<bs; k++) { 1058 PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES)); 1059 col++; a += bs; 1060 } 1061 } 1062 } 1063 PetscCall(PetscFree(rvals)); 1064 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1065 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1066 /* 1067 Everyone has to call to draw the matrix since the graphics waits are 1068 synchronized across all processors that share the PetscDraw object 1069 */ 1070 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1071 PetscCall(PetscObjectGetName((PetscObject)mat,&matname)); 1072 if (rank == 0) { 1073 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname)); 1074 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer)); 1075 } 1076 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1077 PetscCall(PetscViewerFlush(viewer)); 1078 PetscCall(MatDestroy(&A)); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1084 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1085 { 1086 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1087 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1088 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1089 const PetscInt *garray = aij->garray; 1090 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1091 PetscInt *rowlens,*colidxs; 1092 PetscScalar *matvals; 1093 1094 PetscFunctionBegin; 1095 PetscCall(PetscViewerSetUp(viewer)); 1096 1097 M = mat->rmap->N; 1098 N = mat->cmap->N; 1099 m = mat->rmap->n; 1100 rs = mat->rmap->rstart; 1101 cs = mat->cmap->rstart; 1102 bs = mat->rmap->bs; 1103 nz = bs*bs*(A->nz + B->nz); 1104 1105 /* write matrix header */ 1106 header[0] = MAT_FILE_CLASSID; 1107 header[1] = M; header[2] = N; header[3] = nz; 1108 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1109 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1110 1111 /* fill in and store row lengths */ 1112 PetscCall(PetscMalloc1(m,&rowlens)); 1113 for (cnt=0, i=0; i<A->mbs; i++) 1114 for (j=0; j<bs; j++) 1115 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1116 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1117 PetscCall(PetscFree(rowlens)); 1118 1119 /* fill in and store column indices */ 1120 PetscCall(PetscMalloc1(nz,&colidxs)); 1121 for (cnt=0, i=0; i<A->mbs; i++) { 1122 for (k=0; k<bs; k++) { 1123 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1124 if (garray[B->j[jb]] > cs/bs) break; 1125 for (l=0; l<bs; l++) 1126 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1127 } 1128 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1129 for (l=0; l<bs; l++) 1130 colidxs[cnt++] = bs*A->j[ja] + l + cs; 1131 for (; jb<B->i[i+1]; jb++) 1132 for (l=0; l<bs; l++) 1133 colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1134 } 1135 } 1136 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1137 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT)); 1138 PetscCall(PetscFree(colidxs)); 1139 1140 /* fill in and store nonzero values */ 1141 PetscCall(PetscMalloc1(nz,&matvals)); 1142 for (cnt=0, i=0; i<A->mbs; i++) { 1143 for (k=0; k<bs; k++) { 1144 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1145 if (garray[B->j[jb]] > cs/bs) break; 1146 for (l=0; l<bs; l++) 1147 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1148 } 1149 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1150 for (l=0; l<bs; l++) 1151 matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1152 for (; jb<B->i[i+1]; jb++) 1153 for (l=0; l<bs; l++) 1154 matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1155 } 1156 } 1157 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR)); 1158 PetscCall(PetscFree(matvals)); 1159 1160 /* write block size option to the viewer's .info file */ 1161 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 1166 { 1167 PetscBool iascii,isdraw,issocket,isbinary; 1168 1169 PetscFunctionBegin; 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1173 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1174 if (iascii || isdraw || issocket) { 1175 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer)); 1176 } else if (isbinary) { 1177 PetscCall(MatView_MPIBAIJ_Binary(mat,viewer)); 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1183 { 1184 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1185 1186 PetscFunctionBegin; 1187 #if defined(PETSC_USE_LOG) 1188 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1189 #endif 1190 PetscCall(MatStashDestroy_Private(&mat->stash)); 1191 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1192 PetscCall(MatDestroy(&baij->A)); 1193 PetscCall(MatDestroy(&baij->B)); 1194 #if defined(PETSC_USE_CTABLE) 1195 PetscCall(PetscTableDestroy(&baij->colmap)); 1196 #else 1197 PetscCall(PetscFree(baij->colmap)); 1198 #endif 1199 PetscCall(PetscFree(baij->garray)); 1200 PetscCall(VecDestroy(&baij->lvec)); 1201 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1202 PetscCall(PetscFree2(baij->rowvalues,baij->rowindices)); 1203 PetscCall(PetscFree(baij->barray)); 1204 PetscCall(PetscFree2(baij->hd,baij->ht)); 1205 PetscCall(PetscFree(baij->rangebs)); 1206 PetscCall(PetscFree(mat->data)); 1207 1208 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL)); 1215 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL)); 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiadj_C",NULL)); 1217 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpiaij_C",NULL)); 1218 #if defined(PETSC_HAVE_HYPRE) 1219 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL)); 1220 #endif 1221 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL)); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1226 { 1227 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1228 PetscInt nt; 1229 1230 PetscFunctionBegin; 1231 PetscCall(VecGetLocalSize(xx,&nt)); 1232 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1233 PetscCall(VecGetLocalSize(yy,&nt)); 1234 PetscCheck(nt == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1235 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1236 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 1237 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1238 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245 1246 PetscFunctionBegin; 1247 PetscCall(VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1248 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1249 PetscCall(VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1250 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1257 1258 PetscFunctionBegin; 1259 /* do nondiagonal part */ 1260 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1261 /* do local part */ 1262 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1263 /* add partial results together */ 1264 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1265 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1272 1273 PetscFunctionBegin; 1274 /* do nondiagonal part */ 1275 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1276 /* do local part */ 1277 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1278 /* add partial results together */ 1279 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1280 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /* 1285 This only works correctly for square matrices where the subblock A->A is the 1286 diagonal block 1287 */ 1288 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1289 { 1290 PetscFunctionBegin; 1291 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1292 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ*)A->data)->A,v)); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1299 1300 PetscFunctionBegin; 1301 PetscCall(MatScale(a->A,aa)); 1302 PetscCall(MatScale(a->B,aa)); 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 1309 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1310 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1311 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1312 PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1313 1314 PetscFunctionBegin; 1315 PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1316 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1317 mat->getrowactive = PETSC_TRUE; 1318 1319 if (!mat->rowvalues && (idx || v)) { 1320 /* 1321 allocate enough space to hold information from the longest row. 1322 */ 1323 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1324 PetscInt max = 1,mbs = mat->mbs,tmp; 1325 for (i=0; i<mbs; i++) { 1326 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1327 if (max < tmp) max = tmp; 1328 } 1329 PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices)); 1330 } 1331 lrow = row - brstart; 1332 1333 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1334 if (!v) {pvA = NULL; pvB = NULL;} 1335 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1336 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1337 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1338 nztot = nzA + nzB; 1339 1340 cmap = mat->garray; 1341 if (v || idx) { 1342 if (nztot) { 1343 /* Sort by increasing column numbers, assuming A and B already sorted */ 1344 PetscInt imark = -1; 1345 if (v) { 1346 *v = v_p = mat->rowvalues; 1347 for (i=0; i<nzB; i++) { 1348 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1349 else break; 1350 } 1351 imark = i; 1352 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1353 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1354 } 1355 if (idx) { 1356 *idx = idx_p = mat->rowindices; 1357 if (imark > -1) { 1358 for (i=0; i<imark; i++) { 1359 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1360 } 1361 } else { 1362 for (i=0; i<nzB; i++) { 1363 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1364 else break; 1365 } 1366 imark = i; 1367 } 1368 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1369 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1370 } 1371 } else { 1372 if (idx) *idx = NULL; 1373 if (v) *v = NULL; 1374 } 1375 } 1376 *nz = nztot; 1377 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1378 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1383 { 1384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1385 1386 PetscFunctionBegin; 1387 PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1388 baij->getrowactive = PETSC_FALSE; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1393 { 1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCall(MatZeroEntries(l->A)); 1398 PetscCall(MatZeroEntries(l->B)); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1403 { 1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 1405 Mat A = a->A,B = a->B; 1406 PetscLogDouble isend[5],irecv[5]; 1407 1408 PetscFunctionBegin; 1409 info->block_size = (PetscReal)matin->rmap->bs; 1410 1411 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1412 1413 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1414 isend[3] = info->memory; isend[4] = info->mallocs; 1415 1416 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1417 1418 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1419 isend[3] += info->memory; isend[4] += info->mallocs; 1420 1421 if (flag == MAT_LOCAL) { 1422 info->nz_used = isend[0]; 1423 info->nz_allocated = isend[1]; 1424 info->nz_unneeded = isend[2]; 1425 info->memory = isend[3]; 1426 info->mallocs = isend[4]; 1427 } else if (flag == MAT_GLOBAL_MAX) { 1428 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1429 1430 info->nz_used = irecv[0]; 1431 info->nz_allocated = irecv[1]; 1432 info->nz_unneeded = irecv[2]; 1433 info->memory = irecv[3]; 1434 info->mallocs = irecv[4]; 1435 } else if (flag == MAT_GLOBAL_SUM) { 1436 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1437 1438 info->nz_used = irecv[0]; 1439 info->nz_allocated = irecv[1]; 1440 info->nz_unneeded = irecv[2]; 1441 info->memory = irecv[3]; 1442 info->mallocs = irecv[4]; 1443 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1444 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1445 info->fill_ratio_needed = 0; 1446 info->factor_mallocs = 0; 1447 PetscFunctionReturn(0); 1448 } 1449 1450 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 1451 { 1452 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1453 1454 PetscFunctionBegin; 1455 switch (op) { 1456 case MAT_NEW_NONZERO_LOCATIONS: 1457 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1458 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1459 case MAT_KEEP_NONZERO_PATTERN: 1460 case MAT_NEW_NONZERO_LOCATION_ERR: 1461 MatCheckPreallocated(A,1); 1462 PetscCall(MatSetOption(a->A,op,flg)); 1463 PetscCall(MatSetOption(a->B,op,flg)); 1464 break; 1465 case MAT_ROW_ORIENTED: 1466 MatCheckPreallocated(A,1); 1467 a->roworiented = flg; 1468 1469 PetscCall(MatSetOption(a->A,op,flg)); 1470 PetscCall(MatSetOption(a->B,op,flg)); 1471 break; 1472 case MAT_FORCE_DIAGONAL_ENTRIES: 1473 case MAT_SORTED_FULL: 1474 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1475 break; 1476 case MAT_IGNORE_OFF_PROC_ENTRIES: 1477 a->donotstash = flg; 1478 break; 1479 case MAT_USE_HASH_TABLE: 1480 a->ht_flag = flg; 1481 a->ht_fact = 1.39; 1482 break; 1483 case MAT_SYMMETRIC: 1484 case MAT_STRUCTURALLY_SYMMETRIC: 1485 case MAT_HERMITIAN: 1486 case MAT_SUBMAT_SINGLEIS: 1487 case MAT_SYMMETRY_ETERNAL: 1488 MatCheckPreallocated(A,1); 1489 PetscCall(MatSetOption(a->A,op,flg)); 1490 break; 1491 default: 1492 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1493 } 1494 PetscFunctionReturn(0); 1495 } 1496 1497 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 1498 { 1499 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1500 Mat_SeqBAIJ *Aloc; 1501 Mat B; 1502 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1503 PetscInt bs=A->rmap->bs,mbs=baij->mbs; 1504 MatScalar *a; 1505 1506 PetscFunctionBegin; 1507 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1508 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1509 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1510 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1511 /* Do not know preallocation information, but must set block size */ 1512 PetscCall(MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL)); 1513 } else { 1514 B = *matout; 1515 } 1516 1517 /* copy over the A part */ 1518 Aloc = (Mat_SeqBAIJ*)baij->A->data; 1519 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1520 PetscCall(PetscMalloc1(bs,&rvals)); 1521 1522 for (i=0; i<mbs; i++) { 1523 rvals[0] = bs*(baij->rstartbs + i); 1524 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1525 for (j=ai[i]; j<ai[i+1]; j++) { 1526 col = (baij->cstartbs+aj[j])*bs; 1527 for (k=0; k<bs; k++) { 1528 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1529 1530 col++; a += bs; 1531 } 1532 } 1533 } 1534 /* copy over the B part */ 1535 Aloc = (Mat_SeqBAIJ*)baij->B->data; 1536 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1537 for (i=0; i<mbs; i++) { 1538 rvals[0] = bs*(baij->rstartbs + i); 1539 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 1540 for (j=ai[i]; j<ai[i+1]; j++) { 1541 col = baij->garray[aj[j]]*bs; 1542 for (k=0; k<bs; k++) { 1543 PetscCall(MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES)); 1544 col++; 1545 a += bs; 1546 } 1547 } 1548 } 1549 PetscCall(PetscFree(rvals)); 1550 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1551 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1552 1553 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1554 else { 1555 PetscCall(MatHeaderMerge(A,&B)); 1556 } 1557 PetscFunctionReturn(0); 1558 } 1559 1560 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 1561 { 1562 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1563 Mat a = baij->A,b = baij->B; 1564 PetscInt s1,s2,s3; 1565 1566 PetscFunctionBegin; 1567 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1568 if (rr) { 1569 PetscCall(VecGetLocalSize(rr,&s1)); 1570 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1571 /* Overlap communication with computation. */ 1572 PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1573 } 1574 if (ll) { 1575 PetscCall(VecGetLocalSize(ll,&s1)); 1576 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1577 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1578 } 1579 /* scale the diagonal block */ 1580 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1581 1582 if (rr) { 1583 /* Do a scatter end and then right scale the off-diagonal block */ 1584 PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1585 PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec)); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1591 { 1592 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 1593 PetscInt *lrows; 1594 PetscInt r, len; 1595 PetscBool cong; 1596 1597 PetscFunctionBegin; 1598 /* get locally owned rows */ 1599 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 1600 /* fix right hand side if needed */ 1601 if (x && b) { 1602 const PetscScalar *xx; 1603 PetscScalar *bb; 1604 1605 PetscCall(VecGetArrayRead(x,&xx)); 1606 PetscCall(VecGetArray(b,&bb)); 1607 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 1608 PetscCall(VecRestoreArrayRead(x,&xx)); 1609 PetscCall(VecRestoreArray(b,&bb)); 1610 } 1611 1612 /* actually zap the local rows */ 1613 /* 1614 Zero the required rows. If the "diagonal block" of the matrix 1615 is square and the user wishes to set the diagonal we use separate 1616 code so that MatSetValues() is not called for each diagonal allocating 1617 new memory, thus calling lots of mallocs and slowing things down. 1618 1619 */ 1620 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1621 PetscCall(MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL)); 1622 PetscCall(MatHasCongruentLayouts(A,&cong)); 1623 if ((diag != 0.0) && cong) { 1624 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL)); 1625 } else if (diag != 0.0) { 1626 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1627 PetscCheck(!((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1628 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1629 for (r = 0; r < len; ++r) { 1630 const PetscInt row = lrows[r] + A->rmap->rstart; 1631 PetscCall(MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES)); 1632 } 1633 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 1634 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 1635 } else { 1636 PetscCall(MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL)); 1637 } 1638 PetscCall(PetscFree(lrows)); 1639 1640 /* only change matrix nonzero state if pattern was allowed to be changed */ 1641 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1642 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1643 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 1649 { 1650 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1651 PetscMPIInt n = A->rmap->n,p = 0; 1652 PetscInt i,j,k,r,len = 0,row,col,count; 1653 PetscInt *lrows,*owners = A->rmap->range; 1654 PetscSFNode *rrows; 1655 PetscSF sf; 1656 const PetscScalar *xx; 1657 PetscScalar *bb,*mask; 1658 Vec xmask,lmask; 1659 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 1660 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1661 PetscScalar *aa; 1662 1663 PetscFunctionBegin; 1664 /* Create SF where leaves are input rows and roots are owned rows */ 1665 PetscCall(PetscMalloc1(n, &lrows)); 1666 for (r = 0; r < n; ++r) lrows[r] = -1; 1667 PetscCall(PetscMalloc1(N, &rrows)); 1668 for (r = 0; r < N; ++r) { 1669 const PetscInt idx = rows[r]; 1670 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 1671 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1672 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 1673 } 1674 rrows[r].rank = p; 1675 rrows[r].index = rows[r] - owners[p]; 1676 } 1677 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 1678 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1679 /* Collect flags for rows to be zeroed */ 1680 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1681 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 1682 PetscCall(PetscSFDestroy(&sf)); 1683 /* Compress and put in row numbers */ 1684 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1685 /* zero diagonal part of matrix */ 1686 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 1687 /* handle off diagonal part of matrix */ 1688 PetscCall(MatCreateVecs(A,&xmask,NULL)); 1689 PetscCall(VecDuplicate(l->lvec,&lmask)); 1690 PetscCall(VecGetArray(xmask,&bb)); 1691 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1692 PetscCall(VecRestoreArray(xmask,&bb)); 1693 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1694 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 1695 PetscCall(VecDestroy(&xmask)); 1696 if (x) { 1697 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1698 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1699 PetscCall(VecGetArrayRead(l->lvec,&xx)); 1700 PetscCall(VecGetArray(b,&bb)); 1701 } 1702 PetscCall(VecGetArray(lmask,&mask)); 1703 /* remove zeroed rows of off diagonal matrix */ 1704 for (i = 0; i < len; ++i) { 1705 row = lrows[i]; 1706 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1707 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 1708 for (k = 0; k < count; ++k) { 1709 aa[0] = 0.0; 1710 aa += bs; 1711 } 1712 } 1713 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1714 for (i = 0; i < l->B->rmap->N; ++i) { 1715 row = i/bs; 1716 for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 1717 for (k = 0; k < bs; ++k) { 1718 col = bs*baij->j[j] + k; 1719 if (PetscAbsScalar(mask[col])) { 1720 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 1721 if (x) bb[i] -= aa[0]*xx[col]; 1722 aa[0] = 0.0; 1723 } 1724 } 1725 } 1726 } 1727 if (x) { 1728 PetscCall(VecRestoreArray(b,&bb)); 1729 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 1730 } 1731 PetscCall(VecRestoreArray(lmask,&mask)); 1732 PetscCall(VecDestroy(&lmask)); 1733 PetscCall(PetscFree(lrows)); 1734 1735 /* only change matrix nonzero state if pattern was allowed to be changed */ 1736 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1737 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1738 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 1739 } 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1744 { 1745 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1746 1747 PetscFunctionBegin; 1748 PetscCall(MatSetUnfactored(a->A)); 1749 PetscFunctionReturn(0); 1750 } 1751 1752 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 1753 1754 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 1755 { 1756 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 1757 Mat a,b,c,d; 1758 PetscBool flg; 1759 1760 PetscFunctionBegin; 1761 a = matA->A; b = matA->B; 1762 c = matB->A; d = matB->B; 1763 1764 PetscCall(MatEqual(a,c,&flg)); 1765 if (flg) { 1766 PetscCall(MatEqual(b,d,&flg)); 1767 } 1768 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 1773 { 1774 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1775 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 1776 1777 PetscFunctionBegin; 1778 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1779 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1780 PetscCall(MatCopy_Basic(A,B,str)); 1781 } else { 1782 PetscCall(MatCopy(a->A,b->A,str)); 1783 PetscCall(MatCopy(a->B,b->B,str)); 1784 } 1785 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1790 { 1791 PetscFunctionBegin; 1792 PetscCall(MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 1793 PetscFunctionReturn(0); 1794 } 1795 1796 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 1797 { 1798 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 1799 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 1800 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 1801 1802 PetscFunctionBegin; 1803 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1808 { 1809 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 1810 PetscBLASInt bnz,one=1; 1811 Mat_SeqBAIJ *x,*y; 1812 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 1813 1814 PetscFunctionBegin; 1815 if (str == SAME_NONZERO_PATTERN) { 1816 PetscScalar alpha = a; 1817 x = (Mat_SeqBAIJ*)xx->A->data; 1818 y = (Mat_SeqBAIJ*)yy->A->data; 1819 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1820 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1821 x = (Mat_SeqBAIJ*)xx->B->data; 1822 y = (Mat_SeqBAIJ*)yy->B->data; 1823 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 1824 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1825 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1826 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1827 PetscCall(MatAXPY_Basic(Y,a,X,str)); 1828 } else { 1829 Mat B; 1830 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1831 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 1832 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 1833 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 1834 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 1835 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 1836 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 1837 PetscCall(MatSetType(B,MATMPIBAIJ)); 1838 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d)); 1839 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 1840 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o)); 1841 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1842 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 1843 PetscCall(MatHeaderMerge(Y,&B)); 1844 PetscCall(PetscFree(nnz_d)); 1845 PetscCall(PetscFree(nnz_o)); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1851 { 1852 PetscFunctionBegin; 1853 if (PetscDefined(USE_COMPLEX)) { 1854 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1855 1856 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1857 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1863 { 1864 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1865 1866 PetscFunctionBegin; 1867 PetscCall(MatRealPart(a->A)); 1868 PetscCall(MatRealPart(a->B)); 1869 PetscFunctionReturn(0); 1870 } 1871 1872 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1873 { 1874 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1875 1876 PetscFunctionBegin; 1877 PetscCall(MatImaginaryPart(a->A)); 1878 PetscCall(MatImaginaryPart(a->B)); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1883 { 1884 IS iscol_local; 1885 PetscInt csize; 1886 1887 PetscFunctionBegin; 1888 PetscCall(ISGetLocalSize(iscol,&csize)); 1889 if (call == MAT_REUSE_MATRIX) { 1890 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 1891 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1892 } else { 1893 PetscCall(ISAllGather(iscol,&iscol_local)); 1894 } 1895 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat)); 1896 if (call == MAT_INITIAL_MATRIX) { 1897 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 1898 PetscCall(ISDestroy(&iscol_local)); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 /* 1904 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1905 in local and then by concatenating the local matrices the end result. 1906 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1907 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1908 */ 1909 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 1910 { 1911 PetscMPIInt rank,size; 1912 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1913 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1914 Mat M,Mreuse; 1915 MatScalar *vwork,*aa; 1916 MPI_Comm comm; 1917 IS isrow_new, iscol_new; 1918 Mat_SeqBAIJ *aij; 1919 1920 PetscFunctionBegin; 1921 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 1922 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1923 PetscCallMPI(MPI_Comm_size(comm,&size)); 1924 /* The compression and expansion should be avoided. Doesn't point 1925 out errors, might change the indices, hence buggey */ 1926 PetscCall(ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new)); 1927 PetscCall(ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new)); 1928 1929 if (call == MAT_REUSE_MATRIX) { 1930 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 1931 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1932 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse)); 1933 } else { 1934 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse)); 1935 } 1936 PetscCall(ISDestroy(&isrow_new)); 1937 PetscCall(ISDestroy(&iscol_new)); 1938 /* 1939 m - number of local rows 1940 n - number of columns (same on all processors) 1941 rstart - first row in new global matrix generated 1942 */ 1943 PetscCall(MatGetBlockSize(mat,&bs)); 1944 PetscCall(MatGetSize(Mreuse,&m,&n)); 1945 m = m/bs; 1946 n = n/bs; 1947 1948 if (call == MAT_INITIAL_MATRIX) { 1949 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 1950 ii = aij->i; 1951 jj = aij->j; 1952 1953 /* 1954 Determine the number of non-zeros in the diagonal and off-diagonal 1955 portions of the matrix in order to do correct preallocation 1956 */ 1957 1958 /* first get start and end of "diagonal" columns */ 1959 if (csize == PETSC_DECIDE) { 1960 PetscCall(ISGetSize(isrow,&mglobal)); 1961 if (mglobal == n*bs) { /* square matrix */ 1962 nlocal = m; 1963 } else { 1964 nlocal = n/size + ((n % size) > rank); 1965 } 1966 } else { 1967 nlocal = csize/bs; 1968 } 1969 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 1970 rstart = rend - nlocal; 1971 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 1972 1973 /* next, compute all the lengths */ 1974 PetscCall(PetscMalloc2(m+1,&dlens,m+1,&olens)); 1975 for (i=0; i<m; i++) { 1976 jend = ii[i+1] - ii[i]; 1977 olen = 0; 1978 dlen = 0; 1979 for (j=0; j<jend; j++) { 1980 if (*jj < rstart || *jj >= rend) olen++; 1981 else dlen++; 1982 jj++; 1983 } 1984 olens[i] = olen; 1985 dlens[i] = dlen; 1986 } 1987 PetscCall(MatCreate(comm,&M)); 1988 PetscCall(MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n)); 1989 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 1990 PetscCall(MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1991 PetscCall(MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens)); 1992 PetscCall(PetscFree2(dlens,olens)); 1993 } else { 1994 PetscInt ml,nl; 1995 1996 M = *newmat; 1997 PetscCall(MatGetLocalSize(M,&ml,&nl)); 1998 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1999 PetscCall(MatZeroEntries(M)); 2000 /* 2001 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2002 rather than the slower MatSetValues(). 2003 */ 2004 M->was_assembled = PETSC_TRUE; 2005 M->assembled = PETSC_FALSE; 2006 } 2007 PetscCall(MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE)); 2008 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 2009 aij = (Mat_SeqBAIJ*)(Mreuse)->data; 2010 ii = aij->i; 2011 jj = aij->j; 2012 aa = aij->a; 2013 for (i=0; i<m; i++) { 2014 row = rstart/bs + i; 2015 nz = ii[i+1] - ii[i]; 2016 cwork = jj; jj += nz; 2017 vwork = aa; aa += nz*bs*bs; 2018 PetscCall(MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 2019 } 2020 2021 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 2022 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 2023 *newmat = M; 2024 2025 /* save submatrix used in processor for next request */ 2026 if (call == MAT_INITIAL_MATRIX) { 2027 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 2028 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 2034 { 2035 MPI_Comm comm,pcomm; 2036 PetscInt clocal_size,nrows; 2037 const PetscInt *rows; 2038 PetscMPIInt size; 2039 IS crowp,lcolp; 2040 2041 PetscFunctionBegin; 2042 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2043 /* make a collective version of 'rowp' */ 2044 PetscCall(PetscObjectGetComm((PetscObject)rowp,&pcomm)); 2045 if (pcomm==comm) { 2046 crowp = rowp; 2047 } else { 2048 PetscCall(ISGetSize(rowp,&nrows)); 2049 PetscCall(ISGetIndices(rowp,&rows)); 2050 PetscCall(ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp)); 2051 PetscCall(ISRestoreIndices(rowp,&rows)); 2052 } 2053 PetscCall(ISSetPermutation(crowp)); 2054 /* make a local version of 'colp' */ 2055 PetscCall(PetscObjectGetComm((PetscObject)colp,&pcomm)); 2056 PetscCallMPI(MPI_Comm_size(pcomm,&size)); 2057 if (size==1) { 2058 lcolp = colp; 2059 } else { 2060 PetscCall(ISAllGather(colp,&lcolp)); 2061 } 2062 PetscCall(ISSetPermutation(lcolp)); 2063 /* now we just get the submatrix */ 2064 PetscCall(MatGetLocalSize(A,NULL,&clocal_size)); 2065 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B)); 2066 /* clean up */ 2067 if (pcomm!=comm) { 2068 PetscCall(ISDestroy(&crowp)); 2069 } 2070 if (size>1) { 2071 PetscCall(ISDestroy(&lcolp)); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 2077 { 2078 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 2079 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 2080 2081 PetscFunctionBegin; 2082 if (nghosts) *nghosts = B->nbs; 2083 if (ghosts) *ghosts = baij->garray; 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2088 { 2089 Mat B; 2090 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2091 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2092 Mat_SeqAIJ *b; 2093 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2094 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2095 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2096 2097 PetscFunctionBegin; 2098 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 2099 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 2100 2101 /* ---------------------------------------------------------------- 2102 Tell every processor the number of nonzeros per row 2103 */ 2104 PetscCall(PetscMalloc1(A->rmap->N/bs,&lens)); 2105 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2106 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2107 } 2108 PetscCall(PetscMalloc1(2*size,&recvcounts)); 2109 displs = recvcounts + size; 2110 for (i=0; i<size; i++) { 2111 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2112 displs[i] = A->rmap->range[i]/bs; 2113 } 2114 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2115 /* --------------------------------------------------------------- 2116 Create the sequential matrix of the same type as the local block diagonal 2117 */ 2118 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 2119 PetscCall(MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE)); 2120 PetscCall(MatSetType(B,MATSEQAIJ)); 2121 PetscCall(MatSeqAIJSetPreallocation(B,0,lens)); 2122 b = (Mat_SeqAIJ*)B->data; 2123 2124 /*-------------------------------------------------------------------- 2125 Copy my part of matrix column indices over 2126 */ 2127 sendcount = ad->nz + bd->nz; 2128 jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2129 a_jsendbuf = ad->j; 2130 b_jsendbuf = bd->j; 2131 n = A->rmap->rend/bs - A->rmap->rstart/bs; 2132 cnt = 0; 2133 for (i=0; i<n; i++) { 2134 2135 /* put in lower diagonal portion */ 2136 m = bd->i[i+1] - bd->i[i]; 2137 while (m > 0) { 2138 /* is it above diagonal (in bd (compressed) numbering) */ 2139 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2140 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2141 m--; 2142 } 2143 2144 /* put in diagonal portion */ 2145 for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2146 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2147 } 2148 2149 /* put in upper diagonal portion */ 2150 while (m-- > 0) { 2151 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2152 } 2153 } 2154 PetscCheck(cnt == sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2155 2156 /*-------------------------------------------------------------------- 2157 Gather all column indices to all processors 2158 */ 2159 for (i=0; i<size; i++) { 2160 recvcounts[i] = 0; 2161 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2162 recvcounts[i] += lens[j]; 2163 } 2164 } 2165 displs[0] = 0; 2166 for (i=1; i<size; i++) { 2167 displs[i] = displs[i-1] + recvcounts[i-1]; 2168 } 2169 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A))); 2170 /*-------------------------------------------------------------------- 2171 Assemble the matrix into useable form (note numerical values not yet set) 2172 */ 2173 /* set the b->ilen (length of each row) values */ 2174 PetscCall(PetscArraycpy(b->ilen,lens,A->rmap->N/bs)); 2175 /* set the b->i indices */ 2176 b->i[0] = 0; 2177 for (i=1; i<=A->rmap->N/bs; i++) { 2178 b->i[i] = b->i[i-1] + lens[i-1]; 2179 } 2180 PetscCall(PetscFree(lens)); 2181 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2182 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2183 PetscCall(PetscFree(recvcounts)); 2184 2185 if (A->symmetric) { 2186 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 2187 } else if (A->hermitian) { 2188 PetscCall(MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE)); 2189 } else if (A->structurally_symmetric) { 2190 PetscCall(MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE)); 2191 } 2192 *newmat = B; 2193 PetscFunctionReturn(0); 2194 } 2195 2196 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2197 { 2198 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2199 Vec bb1 = NULL; 2200 2201 PetscFunctionBegin; 2202 if (flag == SOR_APPLY_UPPER) { 2203 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2204 PetscFunctionReturn(0); 2205 } 2206 2207 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 2208 PetscCall(VecDuplicate(bb,&bb1)); 2209 } 2210 2211 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2212 if (flag & SOR_ZERO_INITIAL_GUESS) { 2213 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2214 its--; 2215 } 2216 2217 while (its--) { 2218 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2219 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2220 2221 /* update rhs: bb1 = bb - B*x */ 2222 PetscCall(VecScale(mat->lvec,-1.0)); 2223 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2224 2225 /* local sweep */ 2226 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 2227 } 2228 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2229 if (flag & SOR_ZERO_INITIAL_GUESS) { 2230 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2231 its--; 2232 } 2233 while (its--) { 2234 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2235 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2236 2237 /* update rhs: bb1 = bb - B*x */ 2238 PetscCall(VecScale(mat->lvec,-1.0)); 2239 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2240 2241 /* local sweep */ 2242 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 2243 } 2244 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2245 if (flag & SOR_ZERO_INITIAL_GUESS) { 2246 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 2247 its--; 2248 } 2249 while (its--) { 2250 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2251 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2252 2253 /* update rhs: bb1 = bb - B*x */ 2254 PetscCall(VecScale(mat->lvec,-1.0)); 2255 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 2256 2257 /* local sweep */ 2258 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 2259 } 2260 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2261 2262 PetscCall(VecDestroy(&bb1)); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 2267 { 2268 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2269 PetscInt m,N,i,*garray = aij->garray; 2270 PetscInt ib,jb,bs = A->rmap->bs; 2271 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 2272 MatScalar *a_val = a_aij->a; 2273 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 2274 MatScalar *b_val = b_aij->a; 2275 PetscReal *work; 2276 2277 PetscFunctionBegin; 2278 PetscCall(MatGetSize(A,&m,&N)); 2279 PetscCall(PetscCalloc1(N,&work)); 2280 if (type == NORM_2) { 2281 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2282 for (jb=0; jb<bs; jb++) { 2283 for (ib=0; ib<bs; ib++) { 2284 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2285 a_val++; 2286 } 2287 } 2288 } 2289 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2290 for (jb=0; jb<bs; jb++) { 2291 for (ib=0; ib<bs; ib++) { 2292 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2293 b_val++; 2294 } 2295 } 2296 } 2297 } else if (type == NORM_1) { 2298 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2299 for (jb=0; jb<bs; jb++) { 2300 for (ib=0; ib<bs; ib++) { 2301 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2302 a_val++; 2303 } 2304 } 2305 } 2306 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2307 for (jb=0; jb<bs; jb++) { 2308 for (ib=0; ib<bs; ib++) { 2309 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2310 b_val++; 2311 } 2312 } 2313 } 2314 } else if (type == NORM_INFINITY) { 2315 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2316 for (jb=0; jb<bs; jb++) { 2317 for (ib=0; ib<bs; ib++) { 2318 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2319 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2320 a_val++; 2321 } 2322 } 2323 } 2324 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2325 for (jb=0; jb<bs; jb++) { 2326 for (ib=0; ib<bs; ib++) { 2327 int col = garray[b_aij->j[i]] * bs + jb; 2328 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2329 b_val++; 2330 } 2331 } 2332 } 2333 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2334 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2335 for (jb=0; jb<bs; jb++) { 2336 for (ib=0; ib<bs; ib++) { 2337 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2338 a_val++; 2339 } 2340 } 2341 } 2342 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2343 for (jb=0; jb<bs; jb++) { 2344 for (ib=0; ib<bs; ib++) { 2345 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2346 b_val++; 2347 } 2348 } 2349 } 2350 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2351 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2352 for (jb=0; jb<bs; jb++) { 2353 for (ib=0; ib<bs; ib++) { 2354 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2355 a_val++; 2356 } 2357 } 2358 } 2359 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2360 for (jb=0; jb<bs; jb++) { 2361 for (ib=0; ib<bs; ib++) { 2362 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2363 b_val++; 2364 } 2365 } 2366 } 2367 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2368 if (type == NORM_INFINITY) { 2369 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 2370 } else { 2371 PetscCall(MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 2372 } 2373 PetscCall(PetscFree(work)); 2374 if (type == NORM_2) { 2375 for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2376 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2377 for (i=0; i<N; i++) reductions[i] /= m; 2378 } 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2383 { 2384 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2385 2386 PetscFunctionBegin; 2387 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2388 A->factorerrortype = a->A->factorerrortype; 2389 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2390 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2391 PetscFunctionReturn(0); 2392 } 2393 2394 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 2395 { 2396 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 2397 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 2398 2399 PetscFunctionBegin; 2400 if (!Y->preallocated) { 2401 PetscCall(MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL)); 2402 } else if (!aij->nz) { 2403 PetscInt nonew = aij->nonew; 2404 PetscCall(MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL)); 2405 aij->nonew = nonew; 2406 } 2407 PetscCall(MatShift_Basic(Y,a)); 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 2412 { 2413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2414 2415 PetscFunctionBegin; 2416 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2417 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2418 if (d) { 2419 PetscInt rstart; 2420 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2421 *d += rstart/A->rmap->bs; 2422 2423 } 2424 PetscFunctionReturn(0); 2425 } 2426 2427 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2428 { 2429 PetscFunctionBegin; 2430 *a = ((Mat_MPIBAIJ*)A->data)->A; 2431 PetscFunctionReturn(0); 2432 } 2433 2434 /* -------------------------------------------------------------------*/ 2435 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2436 MatGetRow_MPIBAIJ, 2437 MatRestoreRow_MPIBAIJ, 2438 MatMult_MPIBAIJ, 2439 /* 4*/ MatMultAdd_MPIBAIJ, 2440 MatMultTranspose_MPIBAIJ, 2441 MatMultTransposeAdd_MPIBAIJ, 2442 NULL, 2443 NULL, 2444 NULL, 2445 /*10*/ NULL, 2446 NULL, 2447 NULL, 2448 MatSOR_MPIBAIJ, 2449 MatTranspose_MPIBAIJ, 2450 /*15*/ MatGetInfo_MPIBAIJ, 2451 MatEqual_MPIBAIJ, 2452 MatGetDiagonal_MPIBAIJ, 2453 MatDiagonalScale_MPIBAIJ, 2454 MatNorm_MPIBAIJ, 2455 /*20*/ MatAssemblyBegin_MPIBAIJ, 2456 MatAssemblyEnd_MPIBAIJ, 2457 MatSetOption_MPIBAIJ, 2458 MatZeroEntries_MPIBAIJ, 2459 /*24*/ MatZeroRows_MPIBAIJ, 2460 NULL, 2461 NULL, 2462 NULL, 2463 NULL, 2464 /*29*/ MatSetUp_MPIBAIJ, 2465 NULL, 2466 NULL, 2467 MatGetDiagonalBlock_MPIBAIJ, 2468 NULL, 2469 /*34*/ MatDuplicate_MPIBAIJ, 2470 NULL, 2471 NULL, 2472 NULL, 2473 NULL, 2474 /*39*/ MatAXPY_MPIBAIJ, 2475 MatCreateSubMatrices_MPIBAIJ, 2476 MatIncreaseOverlap_MPIBAIJ, 2477 MatGetValues_MPIBAIJ, 2478 MatCopy_MPIBAIJ, 2479 /*44*/ NULL, 2480 MatScale_MPIBAIJ, 2481 MatShift_MPIBAIJ, 2482 NULL, 2483 MatZeroRowsColumns_MPIBAIJ, 2484 /*49*/ NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 NULL, 2489 /*54*/ MatFDColoringCreate_MPIXAIJ, 2490 NULL, 2491 MatSetUnfactored_MPIBAIJ, 2492 MatPermute_MPIBAIJ, 2493 MatSetValuesBlocked_MPIBAIJ, 2494 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2495 MatDestroy_MPIBAIJ, 2496 MatView_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 /*64*/ NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2505 NULL, 2506 NULL, 2507 NULL, 2508 NULL, 2509 /*74*/ NULL, 2510 MatFDColoringApply_BAIJ, 2511 NULL, 2512 NULL, 2513 NULL, 2514 /*79*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 MatLoad_MPIBAIJ, 2519 /*84*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*89*/ NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 NULL, 2529 /*94*/ NULL, 2530 NULL, 2531 NULL, 2532 NULL, 2533 NULL, 2534 /*99*/ NULL, 2535 NULL, 2536 NULL, 2537 MatConjugate_MPIBAIJ, 2538 NULL, 2539 /*104*/NULL, 2540 MatRealPart_MPIBAIJ, 2541 MatImaginaryPart_MPIBAIJ, 2542 NULL, 2543 NULL, 2544 /*109*/NULL, 2545 NULL, 2546 NULL, 2547 NULL, 2548 MatMissingDiagonal_MPIBAIJ, 2549 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2550 NULL, 2551 MatGetGhosts_MPIBAIJ, 2552 NULL, 2553 NULL, 2554 /*119*/NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 MatGetMultiProcBlock_MPIBAIJ, 2559 /*124*/NULL, 2560 MatGetColumnReductions_MPIBAIJ, 2561 MatInvertBlockDiagonal_MPIBAIJ, 2562 NULL, 2563 NULL, 2564 /*129*/ NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 NULL, 2569 /*134*/ NULL, 2570 NULL, 2571 NULL, 2572 NULL, 2573 NULL, 2574 /*139*/ MatSetBlockSizes_Default, 2575 NULL, 2576 NULL, 2577 MatFDColoringSetUp_MPIXAIJ, 2578 NULL, 2579 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2580 NULL, 2581 NULL, 2582 NULL 2583 }; 2584 2585 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2586 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2587 2588 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2589 { 2590 PetscInt m,rstart,cstart,cend; 2591 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2592 const PetscInt *JJ =NULL; 2593 PetscScalar *values=NULL; 2594 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2595 PetscBool nooffprocentries; 2596 2597 PetscFunctionBegin; 2598 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 2599 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 2600 PetscCall(PetscLayoutSetUp(B->rmap)); 2601 PetscCall(PetscLayoutSetUp(B->cmap)); 2602 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2603 m = B->rmap->n/bs; 2604 rstart = B->rmap->rstart/bs; 2605 cstart = B->cmap->rstart/bs; 2606 cend = B->cmap->rend/bs; 2607 2608 PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2609 PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz)); 2610 for (i=0; i<m; i++) { 2611 nz = ii[i+1] - ii[i]; 2612 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2613 nz_max = PetscMax(nz_max,nz); 2614 dlen = 0; 2615 olen = 0; 2616 JJ = jj + ii[i]; 2617 for (j=0; j<nz; j++) { 2618 if (*JJ < cstart || *JJ >= cend) olen++; 2619 else dlen++; 2620 JJ++; 2621 } 2622 d_nnz[i] = dlen; 2623 o_nnz[i] = olen; 2624 } 2625 PetscCall(MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz)); 2626 PetscCall(PetscFree2(d_nnz,o_nnz)); 2627 2628 values = (PetscScalar*)V; 2629 if (!values) { 2630 PetscCall(PetscCalloc1(bs*bs*nz_max,&values)); 2631 } 2632 for (i=0; i<m; i++) { 2633 PetscInt row = i + rstart; 2634 PetscInt ncols = ii[i+1] - ii[i]; 2635 const PetscInt *icols = jj + ii[i]; 2636 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2637 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2638 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES)); 2639 } else { /* block ordering does not match so we can only insert one block at a time. */ 2640 PetscInt j; 2641 for (j=0; j<ncols; j++) { 2642 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2643 PetscCall(MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES)); 2644 } 2645 } 2646 } 2647 2648 if (!V) PetscCall(PetscFree(values)); 2649 nooffprocentries = B->nooffprocentries; 2650 B->nooffprocentries = PETSC_TRUE; 2651 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2652 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2653 B->nooffprocentries = nooffprocentries; 2654 2655 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2656 PetscFunctionReturn(0); 2657 } 2658 2659 /*@C 2660 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2661 2662 Collective 2663 2664 Input Parameters: 2665 + B - the matrix 2666 . bs - the block size 2667 . i - the indices into j for the start of each local row (starts with zero) 2668 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2669 - v - optional values in the matrix 2670 2671 Level: advanced 2672 2673 Notes: 2674 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 2675 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 2676 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2677 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2678 block column and the second index is over columns within a block. 2679 2680 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2681 2682 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2683 @*/ 2684 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2685 { 2686 PetscFunctionBegin; 2687 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2688 PetscValidType(B,1); 2689 PetscValidLogicalCollectiveInt(B,bs,2); 2690 PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2695 { 2696 Mat_MPIBAIJ *b; 2697 PetscInt i; 2698 PetscMPIInt size; 2699 2700 PetscFunctionBegin; 2701 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2702 PetscCall(PetscLayoutSetUp(B->rmap)); 2703 PetscCall(PetscLayoutSetUp(B->cmap)); 2704 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2705 2706 if (d_nnz) { 2707 for (i=0; i<B->rmap->n/bs; i++) { 2708 PetscCheck(d_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2709 } 2710 } 2711 if (o_nnz) { 2712 for (i=0; i<B->rmap->n/bs; i++) { 2713 PetscCheck(o_nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2714 } 2715 } 2716 2717 b = (Mat_MPIBAIJ*)B->data; 2718 b->bs2 = bs*bs; 2719 b->mbs = B->rmap->n/bs; 2720 b->nbs = B->cmap->n/bs; 2721 b->Mbs = B->rmap->N/bs; 2722 b->Nbs = B->cmap->N/bs; 2723 2724 for (i=0; i<=b->size; i++) { 2725 b->rangebs[i] = B->rmap->range[i]/bs; 2726 } 2727 b->rstartbs = B->rmap->rstart/bs; 2728 b->rendbs = B->rmap->rend/bs; 2729 b->cstartbs = B->cmap->rstart/bs; 2730 b->cendbs = B->cmap->rend/bs; 2731 2732 #if defined(PETSC_USE_CTABLE) 2733 PetscCall(PetscTableDestroy(&b->colmap)); 2734 #else 2735 PetscCall(PetscFree(b->colmap)); 2736 #endif 2737 PetscCall(PetscFree(b->garray)); 2738 PetscCall(VecDestroy(&b->lvec)); 2739 PetscCall(VecScatterDestroy(&b->Mvctx)); 2740 2741 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2742 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2743 PetscCall(MatDestroy(&b->B)); 2744 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2745 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2746 PetscCall(MatSetType(b->B,MATSEQBAIJ)); 2747 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2748 2749 if (!B->preallocated) { 2750 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2751 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2752 PetscCall(MatSetType(b->A,MATSEQBAIJ)); 2753 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2754 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash)); 2755 } 2756 2757 PetscCall(MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz)); 2758 PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz)); 2759 B->preallocated = PETSC_TRUE; 2760 B->was_assembled = PETSC_FALSE; 2761 B->assembled = PETSC_FALSE; 2762 PetscFunctionReturn(0); 2763 } 2764 2765 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 2766 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 2767 2768 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 2769 { 2770 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 2771 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 2772 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 2773 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2774 2775 PetscFunctionBegin; 2776 PetscCall(PetscMalloc1(M+1,&ii)); 2777 ii[0] = 0; 2778 for (i=0; i<M; i++) { 2779 PetscCheck((id[i+1] - id[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 2780 PetscCheck((io[i+1] - io[i]) >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 2781 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 2782 /* remove one from count of matrix has diagonal */ 2783 for (j=id[i]; j<id[i+1]; j++) { 2784 if (jd[j] == i) {ii[i+1]--;break;} 2785 } 2786 } 2787 PetscCall(PetscMalloc1(ii[M],&jj)); 2788 cnt = 0; 2789 for (i=0; i<M; i++) { 2790 for (j=io[i]; j<io[i+1]; j++) { 2791 if (garray[jo[j]] > rstart) break; 2792 jj[cnt++] = garray[jo[j]]; 2793 } 2794 for (k=id[i]; k<id[i+1]; k++) { 2795 if (jd[k] != i) { 2796 jj[cnt++] = rstart + jd[k]; 2797 } 2798 } 2799 for (; j<io[i+1]; j++) { 2800 jj[cnt++] = garray[jo[j]]; 2801 } 2802 } 2803 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj)); 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2808 2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2810 2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 2812 { 2813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2814 Mat_MPIAIJ *b; 2815 Mat B; 2816 2817 PetscFunctionBegin; 2818 PetscCheck(A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 2819 2820 if (reuse == MAT_REUSE_MATRIX) { 2821 B = *newmat; 2822 } else { 2823 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 2824 PetscCall(MatSetType(B,MATMPIAIJ)); 2825 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2826 PetscCall(MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs)); 2827 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 2828 PetscCall(MatMPIAIJSetPreallocation(B,0,NULL,0,NULL)); 2829 } 2830 b = (Mat_MPIAIJ*) B->data; 2831 2832 if (reuse == MAT_REUSE_MATRIX) { 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2835 } else { 2836 PetscCall(MatDestroy(&b->A)); 2837 PetscCall(MatDestroy(&b->B)); 2838 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2839 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2840 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2841 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 2842 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 2843 } 2844 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 2845 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 2846 2847 if (reuse == MAT_INPLACE_MATRIX) { 2848 PetscCall(MatHeaderReplace(A,&B)); 2849 } else { 2850 *newmat = B; 2851 } 2852 PetscFunctionReturn(0); 2853 } 2854 2855 /*MC 2856 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2857 2858 Options Database Keys: 2859 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 2860 . -mat_block_size <bs> - set the blocksize used to store the matrix 2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2862 - -mat_use_hash_table <fact> - set hash table factor 2863 2864 Level: beginner 2865 2866 Notes: 2867 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 2868 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 2869 2870 .seealso: `MatCreateBAIJ` 2871 M*/ 2872 2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2874 2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2876 { 2877 Mat_MPIBAIJ *b; 2878 PetscBool flg = PETSC_FALSE; 2879 2880 PetscFunctionBegin; 2881 PetscCall(PetscNewLog(B,&b)); 2882 B->data = (void*)b; 2883 2884 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 2885 B->assembled = PETSC_FALSE; 2886 2887 B->insertmode = NOT_SET_VALUES; 2888 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 2889 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size)); 2890 2891 /* build local table of row and column ownerships */ 2892 PetscCall(PetscMalloc1(b->size+1,&b->rangebs)); 2893 2894 /* build cache for off array entries formed */ 2895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 2896 2897 b->donotstash = PETSC_FALSE; 2898 b->colmap = NULL; 2899 b->garray = NULL; 2900 b->roworiented = PETSC_TRUE; 2901 2902 /* stuff used in block assembly */ 2903 b->barray = NULL; 2904 2905 /* stuff used for matrix vector multiply */ 2906 b->lvec = NULL; 2907 b->Mvctx = NULL; 2908 2909 /* stuff for MatGetRow() */ 2910 b->rowindices = NULL; 2911 b->rowvalues = NULL; 2912 b->getrowactive = PETSC_FALSE; 2913 2914 /* hash table stuff */ 2915 b->ht = NULL; 2916 b->hd = NULL; 2917 b->ht_size = 0; 2918 b->ht_flag = PETSC_FALSE; 2919 b->ht_fact = 0; 2920 b->ht_total_ct = 0; 2921 b->ht_insert_ct = 0; 2922 2923 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2924 b->ijonly = PETSC_FALSE; 2925 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ)); 2929 #if defined(PETSC_HAVE_HYPRE) 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE)); 2931 #endif 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS)); 2939 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ)); 2940 2941 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat"); 2942 PetscCall(PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg)); 2943 if (flg) { 2944 PetscReal fact = 1.39; 2945 PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE)); 2946 PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL)); 2947 if (fact <= 1.0) fact = 1.39; 2948 PetscCall(MatMPIBAIJSetHashTableFactor(B,fact)); 2949 PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact)); 2950 } 2951 PetscOptionsEnd(); 2952 PetscFunctionReturn(0); 2953 } 2954 2955 /*MC 2956 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2957 2958 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2959 and MATMPIBAIJ otherwise. 2960 2961 Options Database Keys: 2962 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2963 2964 Level: beginner 2965 2966 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2967 M*/ 2968 2969 /*@C 2970 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 2971 (block compressed row). For good matrix assembly performance 2972 the user should preallocate the matrix storage by setting the parameters 2973 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2974 performance can be increased by more than a factor of 50. 2975 2976 Collective on Mat 2977 2978 Input Parameters: 2979 + B - the matrix 2980 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2981 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2982 . d_nz - number of block nonzeros per block row in diagonal portion of local 2983 submatrix (same for all local rows) 2984 . d_nnz - array containing the number of block nonzeros in the various block rows 2985 of the in diagonal portion of the local (possibly different for each block 2986 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2987 set it even if it is zero. 2988 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2989 submatrix (same for all local rows). 2990 - o_nnz - array containing the number of nonzeros in the various block rows of the 2991 off-diagonal portion of the local submatrix (possibly different for 2992 each block row) or NULL. 2993 2994 If the *_nnz parameter is given then the *_nz parameter is ignored 2995 2996 Options Database Keys: 2997 + -mat_block_size - size of the blocks to use 2998 - -mat_use_hash_table <fact> - set hash table factor 2999 3000 Notes: 3001 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3002 than it must be used on all processors that share the object for that argument. 3003 3004 Storage Information: 3005 For a square global matrix we define each processor's diagonal portion 3006 to be its local rows and the corresponding columns (a square submatrix); 3007 each processor's off-diagonal portion encompasses the remainder of the 3008 local matrix (a rectangular submatrix). 3009 3010 The user can specify preallocated storage for the diagonal part of 3011 the local submatrix with either d_nz or d_nnz (not both). Set 3012 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3013 memory allocation. Likewise, specify preallocated storage for the 3014 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3015 3016 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3017 the figure below we depict these three local rows and all columns (0-11). 3018 3019 .vb 3020 0 1 2 3 4 5 6 7 8 9 10 11 3021 -------------------------- 3022 row 3 |o o o d d d o o o o o o 3023 row 4 |o o o d d d o o o o o o 3024 row 5 |o o o d d d o o o o o o 3025 -------------------------- 3026 .ve 3027 3028 Thus, any entries in the d locations are stored in the d (diagonal) 3029 submatrix, and any entries in the o locations are stored in the 3030 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3031 stored simply in the MATSEQBAIJ format for compressed row storage. 3032 3033 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3034 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3035 In general, for PDE problems in which most nonzeros are near the diagonal, 3036 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3037 or you will get TERRIBLE performance; see the users' manual chapter on 3038 matrices. 3039 3040 You can call MatGetInfo() to get information on how effective the preallocation was; 3041 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3042 You can also run with the option -info and look for messages with the string 3043 malloc in them to see if additional memory allocation was needed. 3044 3045 Level: intermediate 3046 3047 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3048 @*/ 3049 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3050 { 3051 PetscFunctionBegin; 3052 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3053 PetscValidType(B,1); 3054 PetscValidLogicalCollectiveInt(B,bs,2); 3055 PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz)); 3056 PetscFunctionReturn(0); 3057 } 3058 3059 /*@C 3060 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 3061 (block compressed row). For good matrix assembly performance 3062 the user should preallocate the matrix storage by setting the parameters 3063 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3064 performance can be increased by more than a factor of 50. 3065 3066 Collective 3067 3068 Input Parameters: 3069 + comm - MPI communicator 3070 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3071 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3072 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3073 This value should be the same as the local size used in creating the 3074 y vector for the matrix-vector product y = Ax. 3075 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 3076 This value should be the same as the local size used in creating the 3077 x vector for the matrix-vector product y = Ax. 3078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3080 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3081 submatrix (same for all local rows) 3082 . d_nnz - array containing the number of nonzero blocks in the various block rows 3083 of the in diagonal portion of the local (possibly different for each block 3084 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3085 and set it even if it is zero. 3086 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or NULL. 3091 3092 Output Parameter: 3093 . A - the matrix 3094 3095 Options Database Keys: 3096 + -mat_block_size - size of the blocks to use 3097 - -mat_use_hash_table <fact> - set hash table factor 3098 3099 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3100 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3101 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3102 3103 Notes: 3104 If the *_nnz parameter is given then the *_nz parameter is ignored 3105 3106 A nonzero block is any block that as 1 or more nonzeros in it 3107 3108 The user MUST specify either the local or global matrix dimensions 3109 (possibly both). 3110 3111 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3112 than it must be used on all processors that share the object for that argument. 3113 3114 Storage Information: 3115 For a square global matrix we define each processor's diagonal portion 3116 to be its local rows and the corresponding columns (a square submatrix); 3117 each processor's off-diagonal portion encompasses the remainder of the 3118 local matrix (a rectangular submatrix). 3119 3120 The user can specify preallocated storage for the diagonal part of 3121 the local submatrix with either d_nz or d_nnz (not both). Set 3122 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3123 memory allocation. Likewise, specify preallocated storage for the 3124 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3125 3126 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3127 the figure below we depict these three local rows and all columns (0-11). 3128 3129 .vb 3130 0 1 2 3 4 5 6 7 8 9 10 11 3131 -------------------------- 3132 row 3 |o o o d d d o o o o o o 3133 row 4 |o o o d d d o o o o o o 3134 row 5 |o o o d d d o o o o o o 3135 -------------------------- 3136 .ve 3137 3138 Thus, any entries in the d locations are stored in the d (diagonal) 3139 submatrix, and any entries in the o locations are stored in the 3140 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3141 stored simply in the MATSEQBAIJ format for compressed row storage. 3142 3143 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3144 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3145 In general, for PDE problems in which most nonzeros are near the diagonal, 3146 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3147 or you will get TERRIBLE performance; see the users' manual chapter on 3148 matrices. 3149 3150 Level: intermediate 3151 3152 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3153 @*/ 3154 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3155 { 3156 PetscMPIInt size; 3157 3158 PetscFunctionBegin; 3159 PetscCall(MatCreate(comm,A)); 3160 PetscCall(MatSetSizes(*A,m,n,M,N)); 3161 PetscCallMPI(MPI_Comm_size(comm,&size)); 3162 if (size > 1) { 3163 PetscCall(MatSetType(*A,MATMPIBAIJ)); 3164 PetscCall(MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz)); 3165 } else { 3166 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3167 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz)); 3168 } 3169 PetscFunctionReturn(0); 3170 } 3171 3172 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3173 { 3174 Mat mat; 3175 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3176 PetscInt len=0; 3177 3178 PetscFunctionBegin; 3179 *newmat = NULL; 3180 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 3181 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 3182 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 3183 3184 mat->factortype = matin->factortype; 3185 mat->preallocated = PETSC_TRUE; 3186 mat->assembled = PETSC_TRUE; 3187 mat->insertmode = NOT_SET_VALUES; 3188 3189 a = (Mat_MPIBAIJ*)mat->data; 3190 mat->rmap->bs = matin->rmap->bs; 3191 a->bs2 = oldmat->bs2; 3192 a->mbs = oldmat->mbs; 3193 a->nbs = oldmat->nbs; 3194 a->Mbs = oldmat->Mbs; 3195 a->Nbs = oldmat->Nbs; 3196 3197 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 3198 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 3199 3200 a->size = oldmat->size; 3201 a->rank = oldmat->rank; 3202 a->donotstash = oldmat->donotstash; 3203 a->roworiented = oldmat->roworiented; 3204 a->rowindices = NULL; 3205 a->rowvalues = NULL; 3206 a->getrowactive = PETSC_FALSE; 3207 a->barray = NULL; 3208 a->rstartbs = oldmat->rstartbs; 3209 a->rendbs = oldmat->rendbs; 3210 a->cstartbs = oldmat->cstartbs; 3211 a->cendbs = oldmat->cendbs; 3212 3213 /* hash table stuff */ 3214 a->ht = NULL; 3215 a->hd = NULL; 3216 a->ht_size = 0; 3217 a->ht_flag = oldmat->ht_flag; 3218 a->ht_fact = oldmat->ht_fact; 3219 a->ht_total_ct = 0; 3220 a->ht_insert_ct = 0; 3221 3222 PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1)); 3223 if (oldmat->colmap) { 3224 #if defined(PETSC_USE_CTABLE) 3225 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 3226 #else 3227 PetscCall(PetscMalloc1(a->Nbs,&a->colmap)); 3228 PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt))); 3229 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs)); 3230 #endif 3231 } else a->colmap = NULL; 3232 3233 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3234 PetscCall(PetscMalloc1(len,&a->garray)); 3235 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 3236 PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 3237 } else a->garray = NULL; 3238 3239 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash)); 3240 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 3241 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 3242 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 3243 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 3244 3245 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 3246 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 3247 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 3248 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 3249 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 3250 *newmat = mat; 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3255 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3256 { 3257 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3258 PetscInt *rowidxs,*colidxs,rs,cs,ce; 3259 PetscScalar *matvals; 3260 3261 PetscFunctionBegin; 3262 PetscCall(PetscViewerSetUp(viewer)); 3263 3264 /* read in matrix header */ 3265 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3266 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3267 M = header[1]; N = header[2]; nz = header[3]; 3268 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3269 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3270 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3271 3272 /* set block sizes from the viewer's .info file */ 3273 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3274 /* set local sizes if not set already */ 3275 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3276 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3277 /* set global sizes if not set already */ 3278 if (mat->rmap->N < 0) mat->rmap->N = M; 3279 if (mat->cmap->N < 0) mat->cmap->N = N; 3280 PetscCall(PetscLayoutSetUp(mat->rmap)); 3281 PetscCall(PetscLayoutSetUp(mat->cmap)); 3282 3283 /* check if the matrix sizes are correct */ 3284 PetscCall(MatGetSize(mat,&rows,&cols)); 3285 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3286 PetscCall(MatGetBlockSize(mat,&bs)); 3287 PetscCall(MatGetLocalSize(mat,&m,&n)); 3288 PetscCall(PetscLayoutGetRange(mat->rmap,&rs,NULL)); 3289 PetscCall(PetscLayoutGetRange(mat->cmap,&cs,&ce)); 3290 mbs = m/bs; nbs = n/bs; 3291 3292 /* read in row lengths and build row indices */ 3293 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3294 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3295 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3296 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3297 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3298 3299 /* read in column indices and matrix values */ 3300 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3302 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3303 3304 { /* preallocate matrix storage */ 3305 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3306 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3307 PetscBool sbaij,done; 3308 PetscInt *d_nnz,*o_nnz; 3309 3310 PetscCall(PetscBTCreate(nbs,&bt)); 3311 PetscCall(PetscHSetICreate(&ht)); 3312 PetscCall(PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz)); 3313 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij)); 3314 for (i=0; i<mbs; i++) { 3315 PetscCall(PetscBTMemzero(nbs,bt)); 3316 PetscCall(PetscHSetIClear(ht)); 3317 for (k=0; k<bs; k++) { 3318 PetscInt row = bs*i + k; 3319 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3320 PetscInt col = colidxs[j]; 3321 if (!sbaij || col >= row) { 3322 if (col >= cs && col < ce) { 3323 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3324 } else { 3325 PetscCall(PetscHSetIQueryAdd(ht,col/bs,&done)); 3326 if (done) o_nnz[i]++; 3327 } 3328 } 3329 } 3330 } 3331 } 3332 PetscCall(PetscBTDestroy(&bt)); 3333 PetscCall(PetscHSetIDestroy(&ht)); 3334 PetscCall(MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3335 PetscCall(MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz)); 3336 PetscCall(PetscFree2(d_nnz,o_nnz)); 3337 } 3338 3339 /* store matrix values */ 3340 for (i=0; i<m; i++) { 3341 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3342 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3343 } 3344 3345 PetscCall(PetscFree(rowidxs)); 3346 PetscCall(PetscFree2(colidxs,matvals)); 3347 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3348 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 3353 { 3354 PetscBool isbinary; 3355 3356 PetscFunctionBegin; 3357 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3358 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3359 PetscCall(MatLoad_MPIBAIJ_Binary(mat,viewer)); 3360 PetscFunctionReturn(0); 3361 } 3362 3363 /*@ 3364 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3365 3366 Input Parameters: 3367 + mat - the matrix 3368 - fact - factor 3369 3370 Not Collective, each process can use a different factor 3371 3372 Level: advanced 3373 3374 Notes: 3375 This can also be set by the command line option: -mat_use_hash_table <fact> 3376 3377 .seealso: `MatSetOption()` 3378 @*/ 3379 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3380 { 3381 PetscFunctionBegin; 3382 PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact)); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 3387 { 3388 Mat_MPIBAIJ *baij; 3389 3390 PetscFunctionBegin; 3391 baij = (Mat_MPIBAIJ*)mat->data; 3392 baij->ht_fact = fact; 3393 PetscFunctionReturn(0); 3394 } 3395 3396 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3397 { 3398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3399 PetscBool flg; 3400 3401 PetscFunctionBegin; 3402 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg)); 3403 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 3404 if (Ad) *Ad = a->A; 3405 if (Ao) *Ao = a->B; 3406 if (colmap) *colmap = a->garray; 3407 PetscFunctionReturn(0); 3408 } 3409 3410 /* 3411 Special version for direct calls from Fortran (to eliminate two function call overheads 3412 */ 3413 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3414 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3415 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3416 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3417 #endif 3418 3419 /*@C 3420 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 3421 3422 Collective on Mat 3423 3424 Input Parameters: 3425 + mat - the matrix 3426 . min - number of input rows 3427 . im - input rows 3428 . nin - number of input columns 3429 . in - input columns 3430 . v - numerical values input 3431 - addvin - INSERT_VALUES or ADD_VALUES 3432 3433 Notes: 3434 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 3435 3436 Level: advanced 3437 3438 .seealso: `MatSetValuesBlocked()` 3439 @*/ 3440 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 3441 { 3442 /* convert input arguments to C version */ 3443 Mat mat = *matin; 3444 PetscInt m = *min, n = *nin; 3445 InsertMode addv = *addvin; 3446 3447 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 3448 const MatScalar *value; 3449 MatScalar *barray = baij->barray; 3450 PetscBool roworiented = baij->roworiented; 3451 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 3452 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3453 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 3454 3455 PetscFunctionBegin; 3456 /* tasks normally handled by MatSetValuesBlocked() */ 3457 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3458 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3459 PetscCheck(!mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3460 if (mat->assembled) { 3461 mat->was_assembled = PETSC_TRUE; 3462 mat->assembled = PETSC_FALSE; 3463 } 3464 PetscCall(PetscLogEventBegin(MAT_SetValues,mat,0,0,0)); 3465 3466 if (!barray) { 3467 PetscCall(PetscMalloc1(bs2,&barray)); 3468 baij->barray = barray; 3469 } 3470 3471 if (roworiented) stepval = (n-1)*bs; 3472 else stepval = (m-1)*bs; 3473 3474 for (i=0; i<m; i++) { 3475 if (im[i] < 0) continue; 3476 PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 3477 if (im[i] >= rstart && im[i] < rend) { 3478 row = im[i] - rstart; 3479 for (j=0; j<n; j++) { 3480 /* If NumCol = 1 then a copy is not required */ 3481 if ((roworiented) && (n == 1)) { 3482 barray = (MatScalar*)v + i*bs2; 3483 } else if ((!roworiented) && (m == 1)) { 3484 barray = (MatScalar*)v + j*bs2; 3485 } else { /* Here a copy is required */ 3486 if (roworiented) { 3487 value = v + i*(stepval+bs)*bs + j*bs; 3488 } else { 3489 value = v + j*(stepval+bs)*bs + i*bs; 3490 } 3491 for (ii=0; ii<bs; ii++,value+=stepval) { 3492 for (jj=0; jj<bs; jj++) { 3493 *barray++ = *value++; 3494 } 3495 } 3496 barray -=bs2; 3497 } 3498 3499 if (in[j] >= cstart && in[j] < cend) { 3500 col = in[j] - cstart; 3501 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j])); 3502 } else if (in[j] < 0) continue; 3503 else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 3504 else { 3505 if (mat->was_assembled) { 3506 if (!baij->colmap) { 3507 PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3508 } 3509 3510 #if defined(PETSC_USE_DEBUG) 3511 #if defined(PETSC_USE_CTABLE) 3512 { PetscInt data; 3513 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data)); 3514 PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3515 } 3516 #else 3517 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 3518 #endif 3519 #endif 3520 #if defined(PETSC_USE_CTABLE) 3521 PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col)); 3522 col = (col - 1)/bs; 3523 #else 3524 col = (baij->colmap[in[j]] - 1)/bs; 3525 #endif 3526 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3527 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3528 col = in[j]; 3529 } 3530 } else col = in[j]; 3531 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j])); 3532 } 3533 } 3534 } else { 3535 if (!baij->donotstash) { 3536 if (roworiented) { 3537 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3538 } else { 3539 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i)); 3540 } 3541 } 3542 } 3543 } 3544 3545 /* task normally handled by MatSetValuesBlocked() */ 3546 PetscCall(PetscLogEventEnd(MAT_SetValues,mat,0,0,0)); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 /*@ 3551 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3552 CSR format the local rows. 3553 3554 Collective 3555 3556 Input Parameters: 3557 + comm - MPI communicator 3558 . bs - the block size, only a block size of 1 is supported 3559 . m - number of local rows (Cannot be PETSC_DECIDE) 3560 . n - This value should be the same as the local size used in creating the 3561 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3562 calculated if N is given) For square matrices n is almost always m. 3563 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3564 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3565 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3566 . j - column indices 3567 - a - matrix values 3568 3569 Output Parameter: 3570 . mat - the matrix 3571 3572 Level: intermediate 3573 3574 Notes: 3575 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3576 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3577 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3578 3579 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3580 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3581 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3582 with column-major ordering within blocks. 3583 3584 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3585 3586 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3587 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3588 @*/ 3589 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3590 { 3591 PetscFunctionBegin; 3592 PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3593 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3594 PetscCall(MatCreate(comm,mat)); 3595 PetscCall(MatSetSizes(*mat,m,n,M,N)); 3596 PetscCall(MatSetType(*mat,MATMPIBAIJ)); 3597 PetscCall(MatSetBlockSize(*mat,bs)); 3598 PetscCall(MatSetUp(*mat)); 3599 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE)); 3600 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a)); 3601 PetscCall(MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE)); 3602 PetscFunctionReturn(0); 3603 } 3604 3605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3606 { 3607 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3608 PetscInt *indx; 3609 PetscScalar *values; 3610 3611 PetscFunctionBegin; 3612 PetscCall(MatGetSize(inmat,&m,&N)); 3613 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3615 PetscInt *dnz,*onz,mbs,Nbs,nbs; 3616 PetscInt *bindx,rmax=a->rmax,j; 3617 PetscMPIInt rank,size; 3618 3619 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3620 mbs = m/bs; Nbs = N/cbs; 3621 if (n == PETSC_DECIDE) { 3622 PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N)); 3623 } 3624 nbs = n/cbs; 3625 3626 PetscCall(PetscMalloc1(rmax,&bindx)); 3627 MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */ 3628 3629 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3630 PetscCallMPI(MPI_Comm_rank(comm,&size)); 3631 if (rank == size-1) { 3632 /* Check sum(nbs) = Nbs */ 3633 PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 3634 } 3635 3636 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3637 for (i=0; i<mbs; i++) { 3638 PetscCall(MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */ 3639 nnz = nnz/bs; 3640 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3641 PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz)); 3642 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL)); 3643 } 3644 PetscCall(PetscFree(bindx)); 3645 3646 PetscCall(MatCreate(comm,outmat)); 3647 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 3648 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 3649 PetscCall(MatSetType(*outmat,MATBAIJ)); 3650 PetscCall(MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz)); 3651 PetscCall(MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz)); 3652 MatPreallocateEnd(dnz,onz); 3653 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3654 } 3655 3656 /* numeric phase */ 3657 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 3658 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 3659 3660 for (i=0; i<m; i++) { 3661 PetscCall(MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3662 Ii = i + rstart; 3663 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 3664 PetscCall(MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values)); 3665 } 3666 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 3667 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 3668 PetscFunctionReturn(0); 3669 } 3670