1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat); 8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat); 9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat); 10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt); 11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []); 13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); 18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec); 20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]); 21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec); 22 #if defined(PETSC_HAVE_ELEMENTAL) 23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 24 #endif 25 26 #undef __FUNCT__ 27 #define __FUNCT__ "MatStoreValues_MPISBAIJ" 28 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 29 { 30 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 31 PetscErrorCode ierr; 32 33 PetscFunctionBegin; 34 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 35 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 36 PetscFunctionReturn(0); 37 } 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ" 41 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 42 { 43 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data; 44 PetscErrorCode ierr; 45 46 PetscFunctionBegin; 47 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 48 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 53 { \ 54 \ 55 brow = row/bs; \ 56 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 57 rmax = aimax[brow]; nrow = ailen[brow]; \ 58 bcol = col/bs; \ 59 ridx = row % bs; cidx = col % bs; \ 60 low = 0; high = nrow; \ 61 while (high-low > 3) { \ 62 t = (low+high)/2; \ 63 if (rp[t] > bcol) high = t; \ 64 else low = t; \ 65 } \ 66 for (_i=low; _i<high; _i++) { \ 67 if (rp[_i] > bcol) break; \ 68 if (rp[_i] == bcol) { \ 69 bap = ap + bs2*_i + bs*cidx + ridx; \ 70 if (addv == ADD_VALUES) *bap += value; \ 71 else *bap = value; \ 72 goto a_noinsert; \ 73 } \ 74 } \ 75 if (a->nonew == 1) goto a_noinsert; \ 76 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 77 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 78 N = nrow++ - 1; \ 79 /* shift up all the later entries in this row */ \ 80 for (ii=N; ii>=_i; ii--) { \ 81 rp[ii+1] = rp[ii]; \ 82 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 83 } \ 84 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 85 rp[_i] = bcol; \ 86 ap[bs2*_i + bs*cidx + ridx] = value; \ 87 A->nonzerostate++;\ 88 a_noinsert:; \ 89 ailen[brow] = nrow; \ 90 } 91 92 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 93 { \ 94 brow = row/bs; \ 95 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 96 rmax = bimax[brow]; nrow = bilen[brow]; \ 97 bcol = col/bs; \ 98 ridx = row % bs; cidx = col % bs; \ 99 low = 0; high = nrow; \ 100 while (high-low > 3) { \ 101 t = (low+high)/2; \ 102 if (rp[t] > bcol) high = t; \ 103 else low = t; \ 104 } \ 105 for (_i=low; _i<high; _i++) { \ 106 if (rp[_i] > bcol) break; \ 107 if (rp[_i] == bcol) { \ 108 bap = ap + bs2*_i + bs*cidx + ridx; \ 109 if (addv == ADD_VALUES) *bap += value; \ 110 else *bap = value; \ 111 goto b_noinsert; \ 112 } \ 113 } \ 114 if (b->nonew == 1) goto b_noinsert; \ 115 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 116 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 117 N = nrow++ - 1; \ 118 /* shift up all the later entries in this row */ \ 119 for (ii=N; ii>=_i; ii--) { \ 120 rp[ii+1] = rp[ii]; \ 121 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 122 } \ 123 if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 124 rp[_i] = bcol; \ 125 ap[bs2*_i + bs*cidx + ridx] = value; \ 126 B->nonzerostate++;\ 127 b_noinsert:; \ 128 bilen[brow] = nrow; \ 129 } 130 131 /* Only add/insert a(i,j) with i<=j (blocks). 132 Any a(i,j) with i>j input by user is ingored. 133 */ 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetValues_MPISBAIJ" 136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 137 { 138 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 139 MatScalar value; 140 PetscBool roworiented = baij->roworiented; 141 PetscErrorCode ierr; 142 PetscInt i,j,row,col; 143 PetscInt rstart_orig=mat->rmap->rstart; 144 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 145 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 146 147 /* Some Variables required in the macro */ 148 Mat A = baij->A; 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 150 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 151 MatScalar *aa =a->a; 152 153 Mat B = baij->B; 154 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 155 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 156 MatScalar *ba =b->a; 157 158 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 159 PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 160 MatScalar *ap,*bap; 161 162 /* for stash */ 163 PetscInt n_loc, *in_loc = NULL; 164 MatScalar *v_loc = NULL; 165 166 PetscFunctionBegin; 167 if (!baij->donotstash) { 168 if (n > baij->n_loc) { 169 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 170 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 171 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr); 172 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr); 173 174 baij->n_loc = n; 175 } 176 in_loc = baij->in_loc; 177 v_loc = baij->v_loc; 178 } 179 180 for (i=0; i<m; i++) { 181 if (im[i] < 0) continue; 182 #if defined(PETSC_USE_DEBUG) 183 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 184 #endif 185 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 186 row = im[i] - rstart_orig; /* local row index */ 187 for (j=0; j<n; j++) { 188 if (im[i]/bs > in[j]/bs) { 189 if (a->ignore_ltriangular) { 190 continue; /* ignore lower triangular blocks */ 191 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 192 } 193 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 194 col = in[j] - cstart_orig; /* local col index */ 195 brow = row/bs; bcol = col/bs; 196 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 197 if (roworiented) value = v[i*n+j]; 198 else value = v[i+j*m]; 199 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 200 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 201 } else if (in[j] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 204 #endif 205 else { /* off-diag entry (B) */ 206 if (mat->was_assembled) { 207 if (!baij->colmap) { 208 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 209 } 210 #if defined(PETSC_USE_CTABLE) 211 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 212 col = col - 1; 213 #else 214 col = baij->colmap[in[j]/bs] - 1; 215 #endif 216 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 217 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 218 col = in[j]; 219 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 220 B = baij->B; 221 b = (Mat_SeqBAIJ*)(B)->data; 222 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 223 ba = b->a; 224 } else col += in[j]%bs; 225 } else col = in[j]; 226 if (roworiented) value = v[i*n+j]; 227 else value = v[i+j*m]; 228 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 229 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 230 } 231 } 232 } else { /* off processor entry */ 233 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 234 if (!baij->donotstash) { 235 mat->assembled = PETSC_FALSE; 236 n_loc = 0; 237 for (j=0; j<n; j++) { 238 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */ 239 in_loc[n_loc] = in[j]; 240 if (roworiented) { 241 v_loc[n_loc] = v[i*n+j]; 242 } else { 243 v_loc[n_loc] = v[j*m+i]; 244 } 245 n_loc++; 246 } 247 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr); 248 } 249 } 250 } 251 PetscFunctionReturn(0); 252 } 253 254 #undef __FUNCT__ 255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ" 256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv) 257 { 258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 259 const MatScalar *value; 260 MatScalar *barray =baij->barray; 261 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular; 262 PetscErrorCode ierr; 263 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 264 PetscInt rend=baij->rendbs,cstart=baij->rstartbs,stepval; 265 PetscInt cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2; 266 267 PetscFunctionBegin; 268 if (!barray) { 269 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 270 baij->barray = barray; 271 } 272 273 if (roworiented) { 274 stepval = (n-1)*bs; 275 } else { 276 stepval = (m-1)*bs; 277 } 278 for (i=0; i<m; i++) { 279 if (im[i] < 0) continue; 280 #if defined(PETSC_USE_DEBUG) 281 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 282 #endif 283 if (im[i] >= rstart && im[i] < rend) { 284 row = im[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (im[i] > in[j]) { 287 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 288 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 289 } 290 /* If NumCol = 1 then a copy is not required */ 291 if ((roworiented) && (n == 1)) { 292 barray = (MatScalar*) v + i*bs2; 293 } else if ((!roworiented) && (m == 1)) { 294 barray = (MatScalar*) v + j*bs2; 295 } else { /* Here a copy is required */ 296 if (roworiented) { 297 value = v + i*(stepval+bs)*bs + j*bs; 298 } else { 299 value = v + j*(stepval+bs)*bs + i*bs; 300 } 301 for (ii=0; ii<bs; ii++,value+=stepval) { 302 for (jj=0; jj<bs; jj++) { 303 *barray++ = *value++; 304 } 305 } 306 barray -=bs2; 307 } 308 309 if (in[j] >= cstart && in[j] < cend) { 310 col = in[j] - cstart; 311 ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 312 } else if (in[j] < 0) continue; 313 #if defined(PETSC_USE_DEBUG) 314 else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 315 #endif 316 else { 317 if (mat->was_assembled) { 318 if (!baij->colmap) { 319 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 320 } 321 322 #if defined(PETSC_USE_DEBUG) 323 #if defined(PETSC_USE_CTABLE) 324 { PetscInt data; 325 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 326 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 327 } 328 #else 329 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 330 #endif 331 #endif 332 #if defined(PETSC_USE_CTABLE) 333 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 334 col = (col - 1)/bs; 335 #else 336 col = (baij->colmap[in[j]] - 1)/bs; 337 #endif 338 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 339 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 340 col = in[j]; 341 } 342 } else col = in[j]; 343 ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 344 } 345 } 346 } else { 347 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 348 if (!baij->donotstash) { 349 if (roworiented) { 350 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 351 } else { 352 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 353 } 354 } 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatGetValues_MPISBAIJ" 362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 363 { 364 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 365 PetscErrorCode ierr; 366 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 367 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 368 369 PetscFunctionBegin; 370 for (i=0; i<m; i++) { 371 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */ 372 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 373 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 374 row = idxm[i] - bsrstart; 375 for (j=0; j<n; j++) { 376 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */ 377 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 378 if (idxn[j] >= bscstart && idxn[j] < bscend) { 379 col = idxn[j] - bscstart; 380 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 381 } else { 382 if (!baij->colmap) { 383 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 384 } 385 #if defined(PETSC_USE_CTABLE) 386 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 387 data--; 388 #else 389 data = baij->colmap[idxn[j]/bs]-1; 390 #endif 391 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 392 else { 393 col = data + idxn[j]%bs; 394 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 395 } 396 } 397 } 398 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 #undef __FUNCT__ 404 #define __FUNCT__ "MatNorm_MPISBAIJ" 405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 406 { 407 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscReal sum[2],*lnorm2; 410 411 PetscFunctionBegin; 412 if (baij->size == 1) { 413 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 414 } else { 415 if (type == NORM_FROBENIUS) { 416 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr); 417 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 418 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 419 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 420 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 421 ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 422 *norm = PetscSqrtReal(sum[0] + 2*sum[1]); 423 ierr = PetscFree(lnorm2);CHKERRQ(ierr); 424 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 425 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data; 426 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data; 427 PetscReal *rsum,*rsum2,vabs; 428 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz; 429 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs; 430 MatScalar *v; 431 432 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr); 433 ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 434 /* Amat */ 435 v = amat->a; jj = amat->j; 436 for (brow=0; brow<mbs; brow++) { 437 grow = bs*(rstart + brow); 438 nz = amat->i[brow+1] - amat->i[brow]; 439 for (bcol=0; bcol<nz; bcol++) { 440 gcol = bs*(rstart + *jj); jj++; 441 for (col=0; col<bs; col++) { 442 for (row=0; row<bs; row++) { 443 vabs = PetscAbsScalar(*v); v++; 444 rsum[gcol+col] += vabs; 445 /* non-diagonal block */ 446 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs; 447 } 448 } 449 } 450 } 451 /* Bmat */ 452 v = bmat->a; jj = bmat->j; 453 for (brow=0; brow<mbs; brow++) { 454 grow = bs*(rstart + brow); 455 nz = bmat->i[brow+1] - bmat->i[brow]; 456 for (bcol=0; bcol<nz; bcol++) { 457 gcol = bs*garray[*jj]; jj++; 458 for (col=0; col<bs; col++) { 459 for (row=0; row<bs; row++) { 460 vabs = PetscAbsScalar(*v); v++; 461 rsum[gcol+col] += vabs; 462 rsum[grow+row] += vabs; 463 } 464 } 465 } 466 } 467 ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 468 *norm = 0.0; 469 for (col=0; col<mat->cmap->N; col++) { 470 if (rsum2[col] > *norm) *norm = rsum2[col]; 471 } 472 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr); 473 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet"); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 #undef __FUNCT__ 479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ" 480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 481 { 482 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 483 PetscErrorCode ierr; 484 PetscInt nstash,reallocs; 485 InsertMode addv; 486 487 PetscFunctionBegin; 488 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 489 490 /* make sure all processors are either in INSERTMODE or ADDMODE */ 491 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 492 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 493 mat->insertmode = addv; /* in case this processor had no cache */ 494 495 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 496 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 497 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 498 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 499 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 500 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ" 506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 507 { 508 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 509 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data; 510 PetscErrorCode ierr; 511 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 512 PetscInt *row,*col; 513 PetscBool other_disassembled; 514 PetscMPIInt n; 515 PetscBool r1,r2,r3; 516 MatScalar *val; 517 InsertMode addv = mat->insertmode; 518 519 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 520 PetscFunctionBegin; 521 if (!baij->donotstash && !mat->nooffprocentries) { 522 while (1) { 523 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 524 if (!flg) break; 525 526 for (i=0; i<n;) { 527 /* Now identify the consecutive vals belonging to the same row */ 528 for (j=i,rstart=row[j]; j<n; j++) { 529 if (row[j] != rstart) break; 530 } 531 if (j < n) ncols = j-i; 532 else ncols = n-i; 533 /* Now assemble all these values with a single function call */ 534 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 535 i = j; 536 } 537 } 538 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 539 /* Now process the block-stash. Since the values are stashed column-oriented, 540 set the roworiented flag to column oriented, and after MatSetValues() 541 restore the original flags */ 542 r1 = baij->roworiented; 543 r2 = a->roworiented; 544 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 545 546 baij->roworiented = PETSC_FALSE; 547 a->roworiented = PETSC_FALSE; 548 549 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 550 while (1) { 551 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 552 if (!flg) break; 553 554 for (i=0; i<n;) { 555 /* Now identify the consecutive vals belonging to the same row */ 556 for (j=i,rstart=row[j]; j<n; j++) { 557 if (row[j] != rstart) break; 558 } 559 if (j < n) ncols = j-i; 560 else ncols = n-i; 561 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 562 i = j; 563 } 564 } 565 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 566 567 baij->roworiented = r1; 568 a->roworiented = r2; 569 570 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */ 571 } 572 573 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 574 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 575 576 /* determine if any processor has disassembled, if so we must 577 also disassemble ourselfs, in order that we may reassemble. */ 578 /* 579 if nonzero structure of submatrix B cannot change then we know that 580 no processor disassembled thus we can skip this stuff 581 */ 582 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 583 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 584 if (mat->was_assembled && !other_disassembled) { 585 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 586 } 587 } 588 589 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 590 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */ 591 } 592 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 593 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 594 595 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 596 597 baij->rowvalues = 0; 598 599 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 600 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 601 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 602 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer); 608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 609 #include <petscdraw.h> 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket" 612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 613 { 614 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 615 PetscErrorCode ierr; 616 PetscInt bs = mat->rmap->bs; 617 PetscMPIInt rank = baij->rank; 618 PetscBool iascii,isdraw; 619 PetscViewer sviewer; 620 PetscViewerFormat format; 621 622 PetscFunctionBegin; 623 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 624 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 625 if (iascii) { 626 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 627 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 628 MatInfo info; 629 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 630 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 631 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 632 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 633 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 634 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 635 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 636 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 637 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 638 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 639 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 640 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } else if (format == PETSC_VIEWER_ASCII_INFO) { 643 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 646 PetscFunctionReturn(0); 647 } 648 } 649 650 if (isdraw) { 651 PetscDraw draw; 652 PetscBool isnull; 653 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 654 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 655 } 656 657 { 658 /* assemble the entire matrix onto first processor. */ 659 Mat A; 660 Mat_SeqSBAIJ *Aloc; 661 Mat_SeqBAIJ *Bloc; 662 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 663 MatScalar *a; 664 const char *matname; 665 666 /* Should this be the same type as mat? */ 667 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 668 if (!rank) { 669 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 670 } else { 671 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 672 } 673 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr); 674 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 675 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 676 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 677 678 /* copy over the A part */ 679 Aloc = (Mat_SeqSBAIJ*)baij->A->data; 680 ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 681 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 682 683 for (i=0; i<mbs; i++) { 684 rvals[0] = bs*(baij->rstartbs + i); 685 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 686 for (j=ai[i]; j<ai[i+1]; j++) { 687 col = (baij->cstartbs+aj[j])*bs; 688 for (k=0; k<bs; k++) { 689 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 690 col++; 691 a += bs; 692 } 693 } 694 } 695 /* copy over the B part */ 696 Bloc = (Mat_SeqBAIJ*)baij->B->data; 697 ai = Bloc->i; aj = Bloc->j; a = Bloc->a; 698 for (i=0; i<mbs; i++) { 699 700 rvals[0] = bs*(baij->rstartbs + i); 701 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 702 for (j=ai[i]; j<ai[i+1]; j++) { 703 col = baij->garray[aj[j]]*bs; 704 for (k=0; k<bs; k++) { 705 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 706 col++; 707 a += bs; 708 } 709 } 710 } 711 ierr = PetscFree(rvals);CHKERRQ(ierr); 712 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 713 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 714 /* 715 Everyone has to call to draw the matrix since the graphics waits are 716 synchronized across all processors that share the PetscDraw object 717 */ 718 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 719 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 720 if (!rank) { 721 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 722 ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 723 } 724 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 725 ierr = MatDestroy(&A);CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatView_MPISBAIJ" 732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer) 733 { 734 PetscErrorCode ierr; 735 PetscBool iascii,isdraw,issocket,isbinary; 736 737 PetscFunctionBegin; 738 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 739 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 740 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 741 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 742 if (iascii || isdraw || issocket || isbinary) { 743 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 744 } 745 PetscFunctionReturn(0); 746 } 747 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatDestroy_MPISBAIJ" 750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 751 { 752 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 753 PetscErrorCode ierr; 754 755 PetscFunctionBegin; 756 #if defined(PETSC_USE_LOG) 757 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 758 #endif 759 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 760 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 761 ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 762 ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 763 #if defined(PETSC_USE_CTABLE) 764 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 765 #else 766 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 767 #endif 768 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 769 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 771 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 772 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 773 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 774 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 775 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr); 777 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 778 ierr = PetscFree(baij->barray);CHKERRQ(ierr); 779 ierr = PetscFree(baij->hd);CHKERRQ(ierr); 780 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr); 781 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr); 782 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr); 783 #if defined(PETSC_USE_REAL_MAT_SINGLE) 784 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr); 785 #endif 786 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr); 787 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr); 788 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 789 ierr = PetscFree(mat->data);CHKERRQ(ierr); 790 791 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 792 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 793 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 794 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 795 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 796 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr); 797 #if defined(PETSC_HAVE_ELEMENTAL) 798 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr); 799 #endif 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian" 805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy) 806 { 807 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 808 PetscErrorCode ierr; 809 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 810 PetscScalar *x,*from; 811 812 PetscFunctionBegin; 813 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 814 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 815 816 /* diagonal part */ 817 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 818 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 819 820 /* subdiagonal part */ 821 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 822 823 /* copy x into the vec slvec0 */ 824 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 825 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 826 827 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 828 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 831 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 /* supperdiagonal part */ 834 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatMult_MPISBAIJ" 840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 841 { 842 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 843 PetscErrorCode ierr; 844 PetscInt nt,mbs=a->mbs,bs=A->rmap->bs; 845 PetscScalar *from; 846 const PetscScalar *x; 847 848 PetscFunctionBegin; 849 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 850 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 851 852 /* diagonal part */ 853 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr); 854 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr); 855 856 /* subdiagonal part */ 857 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 858 859 /* copy x into the vec slvec0 */ 860 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 861 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 862 863 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 864 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 865 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 866 867 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 /* supperdiagonal part */ 870 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr); 871 PetscFunctionReturn(0); 872 } 873 874 #undef __FUNCT__ 875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm" 876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy) 877 { 878 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 879 PetscErrorCode ierr; 880 PetscInt nt; 881 882 PetscFunctionBegin; 883 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 884 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 885 886 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 887 if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 888 889 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 /* do diagonal part */ 891 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 892 /* do supperdiagonal part */ 893 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 895 /* do subdiagonal part */ 896 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 897 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 898 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatMultAdd_MPISBAIJ" 904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 905 { 906 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt mbs=a->mbs,bs=A->rmap->bs; 909 PetscScalar *from,zero=0.0; 910 const PetscScalar *x; 911 912 PetscFunctionBegin; 913 /* 914 PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n"); 915 PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT); 916 */ 917 /* diagonal part */ 918 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr); 919 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr); 920 921 /* subdiagonal part */ 922 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr); 923 924 /* copy x into the vec slvec0 */ 925 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr); 926 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 927 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 928 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr); 929 930 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 932 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 933 934 /* supperdiagonal part */ 935 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm" 941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz) 942 { 943 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 944 PetscErrorCode ierr; 945 946 PetscFunctionBegin; 947 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 /* do diagonal part */ 949 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 950 /* do supperdiagonal part */ 951 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 953 954 /* do subdiagonal part */ 955 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 956 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 957 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 /* 962 This only works correctly for square matrices where the subblock A->A is the 963 diagonal block 964 */ 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ" 967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 968 { 969 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 974 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatScale_MPISBAIJ" 980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa) 981 { 982 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 987 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatGetRow_MPISBAIJ" 993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 994 { 995 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 996 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 997 PetscErrorCode ierr; 998 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 999 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1000 PetscInt *cmap,*idx_p,cstart = mat->rstartbs; 1001 1002 PetscFunctionBegin; 1003 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1004 mat->getrowactive = PETSC_TRUE; 1005 1006 if (!mat->rowvalues && (idx || v)) { 1007 /* 1008 allocate enough space to hold information from the longest row. 1009 */ 1010 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1011 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1012 PetscInt max = 1,mbs = mat->mbs,tmp; 1013 for (i=0; i<mbs; i++) { 1014 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1015 if (max < tmp) max = tmp; 1016 } 1017 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1018 } 1019 1020 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1021 lrow = row - brstart; /* local row index */ 1022 1023 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1024 if (!v) {pvA = 0; pvB = 0;} 1025 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1026 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1028 nztot = nzA + nzB; 1029 1030 cmap = mat->garray; 1031 if (v || idx) { 1032 if (nztot) { 1033 /* Sort by increasing column numbers, assuming A and B already sorted */ 1034 PetscInt imark = -1; 1035 if (v) { 1036 *v = v_p = mat->rowvalues; 1037 for (i=0; i<nzB; i++) { 1038 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1039 else break; 1040 } 1041 imark = i; 1042 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1043 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1044 } 1045 if (idx) { 1046 *idx = idx_p = mat->rowindices; 1047 if (imark > -1) { 1048 for (i=0; i<imark; i++) { 1049 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1050 } 1051 } else { 1052 for (i=0; i<nzB; i++) { 1053 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1054 else break; 1055 } 1056 imark = i; 1057 } 1058 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1059 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1060 } 1061 } else { 1062 if (idx) *idx = 0; 1063 if (v) *v = 0; 1064 } 1065 } 1066 *nz = nztot; 1067 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1068 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ" 1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1075 { 1076 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1077 1078 PetscFunctionBegin; 1079 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1080 baij->getrowactive = PETSC_FALSE; 1081 PetscFunctionReturn(0); 1082 } 1083 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ" 1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1087 { 1088 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1089 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1090 1091 PetscFunctionBegin; 1092 aA->getrow_utriangular = PETSC_TRUE; 1093 PetscFunctionReturn(0); 1094 } 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ" 1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1098 { 1099 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1100 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1101 1102 PetscFunctionBegin; 1103 aA->getrow_utriangular = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatRealPart_MPISBAIJ" 1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1110 { 1111 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1116 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ" 1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1129 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ() 1134 Input: isrow - distributed(parallel), 1135 iscol_local - locally owned (seq) 1136 */ 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "ISEqual_private" 1139 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg) 1140 { 1141 PetscErrorCode ierr; 1142 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch; 1143 const PetscInt *ptr1,*ptr2; 1144 1145 PetscFunctionBegin; 1146 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr); 1147 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr); 1148 if (sz1 > sz2) *flg = PETSC_FALSE; 1149 1150 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr); 1151 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1152 1153 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr); 1154 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr); 1155 ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr); 1156 ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr); 1157 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr); 1158 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr); 1159 1160 nmatch=0; 1161 k = 0; 1162 for (i=0; i<sz1; i++){ 1163 for (j=k; j<sz2; j++){ 1164 if (a1[i] == a2[j]) { 1165 k = j; nmatch++; 1166 break; 1167 } 1168 } 1169 } 1170 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr); 1171 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr); 1172 ierr = PetscFree(a1);CHKERRQ(ierr); 1173 ierr = PetscFree(a2);CHKERRQ(ierr); 1174 if (nmatch < sz1) *flg = PETSC_FALSE; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ" 1180 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 1181 { 1182 PetscErrorCode ierr; 1183 IS iscol_local; 1184 PetscInt csize; 1185 PetscBool isequal; 1186 1187 PetscFunctionBegin; 1188 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1189 if (call == MAT_REUSE_MATRIX) { 1190 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1191 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1192 } else { 1193 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1194 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr); 1195 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow"); 1196 } 1197 1198 /* now call MatGetSubMatrix_MPIBAIJ() */ 1199 ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1200 if (call == MAT_INITIAL_MATRIX) { 1201 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 1202 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1203 } 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatZeroEntries_MPISBAIJ" 1209 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1210 { 1211 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 1214 PetscFunctionBegin; 1215 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1216 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatGetInfo_MPISBAIJ" 1222 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1223 { 1224 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1225 Mat A = a->A,B = a->B; 1226 PetscErrorCode ierr; 1227 PetscReal isend[5],irecv[5]; 1228 1229 PetscFunctionBegin; 1230 info->block_size = (PetscReal)matin->rmap->bs; 1231 1232 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1233 1234 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1235 isend[3] = info->memory; isend[4] = info->mallocs; 1236 1237 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1238 1239 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1240 isend[3] += info->memory; isend[4] += info->mallocs; 1241 if (flag == MAT_LOCAL) { 1242 info->nz_used = isend[0]; 1243 info->nz_allocated = isend[1]; 1244 info->nz_unneeded = isend[2]; 1245 info->memory = isend[3]; 1246 info->mallocs = isend[4]; 1247 } else if (flag == MAT_GLOBAL_MAX) { 1248 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1249 1250 info->nz_used = irecv[0]; 1251 info->nz_allocated = irecv[1]; 1252 info->nz_unneeded = irecv[2]; 1253 info->memory = irecv[3]; 1254 info->mallocs = irecv[4]; 1255 } else if (flag == MAT_GLOBAL_SUM) { 1256 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1257 1258 info->nz_used = irecv[0]; 1259 info->nz_allocated = irecv[1]; 1260 info->nz_unneeded = irecv[2]; 1261 info->memory = irecv[3]; 1262 info->mallocs = irecv[4]; 1263 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 1264 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1265 info->fill_ratio_needed = 0; 1266 info->factor_mallocs = 0; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatSetOption_MPISBAIJ" 1272 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg) 1273 { 1274 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1275 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data; 1276 PetscErrorCode ierr; 1277 1278 PetscFunctionBegin; 1279 switch (op) { 1280 case MAT_NEW_NONZERO_LOCATIONS: 1281 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1282 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1283 case MAT_KEEP_NONZERO_PATTERN: 1284 case MAT_NEW_NONZERO_LOCATION_ERR: 1285 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1286 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1287 break; 1288 case MAT_ROW_ORIENTED: 1289 a->roworiented = flg; 1290 1291 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1292 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1293 break; 1294 case MAT_NEW_DIAGONALS: 1295 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1296 break; 1297 case MAT_IGNORE_OFF_PROC_ENTRIES: 1298 a->donotstash = flg; 1299 break; 1300 case MAT_USE_HASH_TABLE: 1301 a->ht_flag = flg; 1302 break; 1303 case MAT_HERMITIAN: 1304 if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first"); 1305 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1306 1307 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1308 break; 1309 case MAT_SPD: 1310 A->spd_set = PETSC_TRUE; 1311 A->spd = flg; 1312 if (flg) { 1313 A->symmetric = PETSC_TRUE; 1314 A->structurally_symmetric = PETSC_TRUE; 1315 A->symmetric_set = PETSC_TRUE; 1316 A->structurally_symmetric_set = PETSC_TRUE; 1317 } 1318 break; 1319 case MAT_SYMMETRIC: 1320 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1321 break; 1322 case MAT_STRUCTURALLY_SYMMETRIC: 1323 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1324 break; 1325 case MAT_SYMMETRY_ETERNAL: 1326 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric"); 1327 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1328 break; 1329 case MAT_IGNORE_LOWER_TRIANGULAR: 1330 aA->ignore_ltriangular = flg; 1331 break; 1332 case MAT_ERROR_LOWER_TRIANGULAR: 1333 aA->ignore_ltriangular = flg; 1334 break; 1335 case MAT_GETROW_UPPERTRIANGULAR: 1336 aA->getrow_utriangular = flg; 1337 break; 1338 default: 1339 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatTranspose_MPISBAIJ" 1346 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B) 1347 { 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 if (MAT_INITIAL_MATRIX || *B != A) { 1352 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ" 1359 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1360 { 1361 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1362 Mat a = baij->A, b=baij->B; 1363 PetscErrorCode ierr; 1364 PetscInt nv,m,n; 1365 PetscBool flg; 1366 1367 PetscFunctionBegin; 1368 if (ll != rr) { 1369 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr); 1370 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n"); 1371 } 1372 if (!ll) PetscFunctionReturn(0); 1373 1374 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 1375 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n); 1376 1377 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr); 1378 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size"); 1379 1380 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1381 1382 /* left diagonalscale the off-diagonal part */ 1383 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 1384 1385 /* scale the diagonal part */ 1386 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1387 1388 /* right diagonalscale the off-diagonal part */ 1389 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1390 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ" 1396 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1397 { 1398 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1399 PetscErrorCode ierr; 1400 1401 PetscFunctionBegin; 1402 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*); 1407 1408 #undef __FUNCT__ 1409 #define __FUNCT__ "MatEqual_MPISBAIJ" 1410 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag) 1411 { 1412 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1413 Mat a,b,c,d; 1414 PetscBool flg; 1415 PetscErrorCode ierr; 1416 1417 PetscFunctionBegin; 1418 a = matA->A; b = matA->B; 1419 c = matB->A; d = matB->B; 1420 1421 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1422 if (flg) { 1423 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1424 } 1425 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatCopy_MPISBAIJ" 1431 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str) 1432 { 1433 PetscErrorCode ierr; 1434 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1435 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data; 1436 1437 PetscFunctionBegin; 1438 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1439 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1440 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1441 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1442 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1443 } else { 1444 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1445 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatSetUp_MPISBAIJ" 1452 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1453 { 1454 PetscErrorCode ierr; 1455 1456 PetscFunctionBegin; 1457 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } 1460 1461 #undef __FUNCT__ 1462 #define __FUNCT__ "MatAXPY_MPISBAIJ" 1463 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1464 { 1465 PetscErrorCode ierr; 1466 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data; 1467 PetscBLASInt bnz,one=1; 1468 Mat_SeqSBAIJ *xa,*ya; 1469 Mat_SeqBAIJ *xb,*yb; 1470 1471 PetscFunctionBegin; 1472 if (str == SAME_NONZERO_PATTERN) { 1473 PetscScalar alpha = a; 1474 xa = (Mat_SeqSBAIJ*)xx->A->data; 1475 ya = (Mat_SeqSBAIJ*)yy->A->data; 1476 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr); 1477 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one)); 1478 xb = (Mat_SeqBAIJ*)xx->B->data; 1479 yb = (Mat_SeqBAIJ*)yy->B->data; 1480 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr); 1481 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one)); 1482 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1483 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1484 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 1485 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1486 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 1487 } else { 1488 Mat B; 1489 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 1490 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 1491 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1492 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr); 1493 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 1494 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 1495 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 1496 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 1497 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 1498 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 1499 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr); 1500 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 1501 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 1502 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 1503 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 1504 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 1505 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 1506 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 1507 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1508 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr); 1509 } 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ" 1515 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[]) 1516 { 1517 PetscErrorCode ierr; 1518 PetscInt i; 1519 PetscBool flg; 1520 1521 PetscFunctionBegin; 1522 ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); 1523 for (i=0; i<n; i++) { 1524 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr); 1525 if (!flg) { /* *B[i] is non-symmetric, set flag */ 1526 ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr); 1527 } 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 /* -------------------------------------------------------------------*/ 1533 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1534 MatGetRow_MPISBAIJ, 1535 MatRestoreRow_MPISBAIJ, 1536 MatMult_MPISBAIJ, 1537 /* 4*/ MatMultAdd_MPISBAIJ, 1538 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1539 MatMultAdd_MPISBAIJ, 1540 0, 1541 0, 1542 0, 1543 /* 10*/ 0, 1544 0, 1545 0, 1546 MatSOR_MPISBAIJ, 1547 MatTranspose_MPISBAIJ, 1548 /* 15*/ MatGetInfo_MPISBAIJ, 1549 MatEqual_MPISBAIJ, 1550 MatGetDiagonal_MPISBAIJ, 1551 MatDiagonalScale_MPISBAIJ, 1552 MatNorm_MPISBAIJ, 1553 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1554 MatAssemblyEnd_MPISBAIJ, 1555 MatSetOption_MPISBAIJ, 1556 MatZeroEntries_MPISBAIJ, 1557 /* 24*/ 0, 1558 0, 1559 0, 1560 0, 1561 0, 1562 /* 29*/ MatSetUp_MPISBAIJ, 1563 0, 1564 0, 1565 0, 1566 0, 1567 /* 34*/ MatDuplicate_MPISBAIJ, 1568 0, 1569 0, 1570 0, 1571 0, 1572 /* 39*/ MatAXPY_MPISBAIJ, 1573 MatGetSubMatrices_MPISBAIJ, 1574 MatIncreaseOverlap_MPISBAIJ, 1575 MatGetValues_MPISBAIJ, 1576 MatCopy_MPISBAIJ, 1577 /* 44*/ 0, 1578 MatScale_MPISBAIJ, 1579 0, 1580 0, 1581 0, 1582 /* 49*/ 0, 1583 0, 1584 0, 1585 0, 1586 0, 1587 /* 54*/ 0, 1588 0, 1589 MatSetUnfactored_MPISBAIJ, 1590 0, 1591 MatSetValuesBlocked_MPISBAIJ, 1592 /* 59*/ MatGetSubMatrix_MPISBAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 /* 64*/ 0, 1598 0, 1599 0, 1600 0, 1601 0, 1602 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1603 0, 1604 0, 1605 0, 1606 0, 1607 /* 74*/ 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 /* 79*/ 0, 1613 0, 1614 0, 1615 0, 1616 MatLoad_MPISBAIJ, 1617 /* 84*/ 0, 1618 0, 1619 0, 1620 0, 1621 0, 1622 /* 89*/ 0, 1623 0, 1624 0, 1625 0, 1626 0, 1627 /* 94*/ 0, 1628 0, 1629 0, 1630 0, 1631 0, 1632 /* 99*/ 0, 1633 0, 1634 0, 1635 0, 1636 0, 1637 /*104*/ 0, 1638 MatRealPart_MPISBAIJ, 1639 MatImaginaryPart_MPISBAIJ, 1640 MatGetRowUpperTriangular_MPISBAIJ, 1641 MatRestoreRowUpperTriangular_MPISBAIJ, 1642 /*109*/ 0, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*114*/ 0, 1648 0, 1649 0, 1650 0, 1651 0, 1652 /*119*/ 0, 1653 0, 1654 0, 1655 0, 1656 0, 1657 /*124*/ 0, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*129*/ 0, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*134*/ 0, 1668 0, 1669 0, 1670 0, 1671 0, 1672 /*139*/ 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ 1678 }; 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ" 1682 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a) 1683 { 1684 PetscFunctionBegin; 1685 *a = ((Mat_MPISBAIJ*)A->data)->A; 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ" 1691 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 1692 { 1693 Mat_MPISBAIJ *b; 1694 PetscErrorCode ierr; 1695 PetscInt i,mbs,Mbs; 1696 1697 PetscFunctionBegin; 1698 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 1699 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1700 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1701 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1702 1703 b = (Mat_MPISBAIJ*)B->data; 1704 mbs = B->rmap->n/bs; 1705 Mbs = B->rmap->N/bs; 1706 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs); 1707 1708 B->rmap->bs = bs; 1709 b->bs2 = bs*bs; 1710 b->mbs = mbs; 1711 b->Mbs = Mbs; 1712 b->nbs = B->cmap->n/bs; 1713 b->Nbs = B->cmap->N/bs; 1714 1715 for (i=0; i<=b->size; i++) { 1716 b->rangebs[i] = B->rmap->range[i]/bs; 1717 } 1718 b->rstartbs = B->rmap->rstart/bs; 1719 b->rendbs = B->rmap->rend/bs; 1720 1721 b->cstartbs = B->cmap->rstart/bs; 1722 b->cendbs = B->cmap->rend/bs; 1723 1724 if (!B->preallocated) { 1725 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1726 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 1727 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr); 1728 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 1729 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1730 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 1731 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 1732 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 1733 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 1734 } 1735 1736 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 1737 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 1738 1739 B->preallocated = PETSC_TRUE; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ" 1745 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 1746 { 1747 PetscInt m,rstart,cstart,cend; 1748 PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 1749 const PetscInt *JJ =0; 1750 PetscScalar *values=0; 1751 PetscErrorCode ierr; 1752 1753 PetscFunctionBegin; 1754 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 1755 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 1756 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 1757 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 1758 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 1759 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 1760 m = B->rmap->n/bs; 1761 rstart = B->rmap->rstart/bs; 1762 cstart = B->cmap->rstart/bs; 1763 cend = B->cmap->rend/bs; 1764 1765 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 1766 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 1767 for (i=0; i<m; i++) { 1768 nz = ii[i+1] - ii[i]; 1769 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 1770 nz_max = PetscMax(nz_max,nz); 1771 JJ = jj + ii[i]; 1772 for (j=0; j<nz; j++) { 1773 if (*JJ >= cstart) break; 1774 JJ++; 1775 } 1776 d = 0; 1777 for (; j<nz; j++) { 1778 if (*JJ++ >= cend) break; 1779 d++; 1780 } 1781 d_nnz[i] = d; 1782 o_nnz[i] = nz - d; 1783 } 1784 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1785 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 1786 1787 values = (PetscScalar*)V; 1788 if (!values) { 1789 ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 1790 ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 1791 } 1792 for (i=0; i<m; i++) { 1793 PetscInt row = i + rstart; 1794 PetscInt ncols = ii[i+1] - ii[i]; 1795 const PetscInt *icols = jj + ii[i]; 1796 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 1797 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 1798 } 1799 1800 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 1801 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1802 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1803 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 /*MC 1808 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1809 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1810 the matrix is stored. 1811 1812 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 1813 can call MatSetOption(Mat, MAT_HERMITIAN); 1814 1815 Options Database Keys: 1816 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions() 1817 1818 Level: beginner 1819 1820 .seealso: MatCreateMPISBAIJ 1821 M*/ 1822 1823 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*); 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatCreate_MPISBAIJ" 1827 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 1828 { 1829 Mat_MPISBAIJ *b; 1830 PetscErrorCode ierr; 1831 PetscBool flg = PETSC_FALSE; 1832 1833 PetscFunctionBegin; 1834 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 1835 B->data = (void*)b; 1836 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1837 1838 B->ops->destroy = MatDestroy_MPISBAIJ; 1839 B->ops->view = MatView_MPISBAIJ; 1840 B->assembled = PETSC_FALSE; 1841 B->insertmode = NOT_SET_VALUES; 1842 1843 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 1844 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 1845 1846 /* build local table of row and column ownerships */ 1847 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr); 1848 1849 /* build cache for off array entries formed */ 1850 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 1851 1852 b->donotstash = PETSC_FALSE; 1853 b->colmap = NULL; 1854 b->garray = NULL; 1855 b->roworiented = PETSC_TRUE; 1856 1857 /* stuff used in block assembly */ 1858 b->barray = 0; 1859 1860 /* stuff used for matrix vector multiply */ 1861 b->lvec = 0; 1862 b->Mvctx = 0; 1863 b->slvec0 = 0; 1864 b->slvec0b = 0; 1865 b->slvec1 = 0; 1866 b->slvec1a = 0; 1867 b->slvec1b = 0; 1868 b->sMvctx = 0; 1869 1870 /* stuff for MatGetRow() */ 1871 b->rowindices = 0; 1872 b->rowvalues = 0; 1873 b->getrowactive = PETSC_FALSE; 1874 1875 /* hash table stuff */ 1876 b->ht = 0; 1877 b->hd = 0; 1878 b->ht_size = 0; 1879 b->ht_flag = PETSC_FALSE; 1880 b->ht_fact = 0; 1881 b->ht_total_ct = 0; 1882 b->ht_insert_ct = 0; 1883 1884 /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 1885 b->ijonly = PETSC_FALSE; 1886 1887 b->in_loc = 0; 1888 b->v_loc = 0; 1889 b->n_loc = 0; 1890 1891 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr); 1896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr); 1897 #if defined(PETSC_HAVE_ELEMENTAL) 1898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr); 1899 #endif 1900 1901 B->symmetric = PETSC_TRUE; 1902 B->structurally_symmetric = PETSC_TRUE; 1903 B->symmetric_set = PETSC_TRUE; 1904 B->structurally_symmetric_set = PETSC_TRUE; 1905 1906 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr); 1907 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr); 1908 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 1909 if (flg) { 1910 PetscReal fact = 1.39; 1911 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 1912 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 1913 if (fact <= 1.0) fact = 1.39; 1914 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 1915 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 1916 } 1917 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 /*MC 1922 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 1923 1924 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator, 1925 and MATMPISBAIJ otherwise. 1926 1927 Options Database Keys: 1928 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 1929 1930 Level: beginner 1931 1932 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ 1933 M*/ 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatMPISBAIJSetPreallocation" 1937 /*@C 1938 MatMPISBAIJSetPreallocation - For good matrix assembly performance 1939 the user should preallocate the matrix storage by setting the parameters 1940 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1941 performance can be increased by more than a factor of 50. 1942 1943 Collective on Mat 1944 1945 Input Parameters: 1946 + B - the matrix 1947 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 1948 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 1949 . d_nz - number of block nonzeros per block row in diagonal portion of local 1950 submatrix (same for all local rows) 1951 . d_nnz - array containing the number of block nonzeros in the various block rows 1952 in the upper triangular and diagonal part of the in diagonal portion of the local 1953 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 1954 for the diagonal entry and set a value even if it is zero. 1955 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 1956 submatrix (same for all local rows). 1957 - o_nnz - array containing the number of nonzeros in the various block rows of the 1958 off-diagonal portion of the local submatrix that is right of the diagonal 1959 (possibly different for each block row) or NULL. 1960 1961 1962 Options Database Keys: 1963 . -mat_no_unroll - uses code that does not unroll the loops in the 1964 block calculations (much slower) 1965 . -mat_block_size - size of the blocks to use 1966 1967 Notes: 1968 1969 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 1970 than it must be used on all processors that share the object for that argument. 1971 1972 If the *_nnz parameter is given then the *_nz parameter is ignored 1973 1974 Storage Information: 1975 For a square global matrix we define each processor's diagonal portion 1976 to be its local rows and the corresponding columns (a square submatrix); 1977 each processor's off-diagonal portion encompasses the remainder of the 1978 local matrix (a rectangular submatrix). 1979 1980 The user can specify preallocated storage for the diagonal part of 1981 the local submatrix with either d_nz or d_nnz (not both). Set 1982 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 1983 memory allocation. Likewise, specify preallocated storage for the 1984 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1985 1986 You can call MatGetInfo() to get information on how effective the preallocation was; 1987 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1988 You can also run with the option -info and look for messages with the string 1989 malloc in them to see if additional memory allocation was needed. 1990 1991 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1992 the figure below we depict these three local rows and all columns (0-11). 1993 1994 .vb 1995 0 1 2 3 4 5 6 7 8 9 10 11 1996 -------------------------- 1997 row 3 |. . . d d d o o o o o o 1998 row 4 |. . . d d d o o o o o o 1999 row 5 |. . . d d d o o o o o o 2000 -------------------------- 2001 .ve 2002 2003 Thus, any entries in the d locations are stored in the d (diagonal) 2004 submatrix, and any entries in the o locations are stored in the 2005 o (off-diagonal) submatrix. Note that the d matrix is stored in 2006 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2007 2008 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2009 plus the diagonal part of the d matrix, 2010 and o_nz should indicate the number of block nonzeros per row in the o matrix 2011 2012 In general, for PDE problems in which most nonzeros are near the diagonal, 2013 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2014 or you will get TERRIBLE performance; see the users' manual chapter on 2015 matrices. 2016 2017 Level: intermediate 2018 2019 .keywords: matrix, block, aij, compressed row, sparse, parallel 2020 2021 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership() 2022 @*/ 2023 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2024 { 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2029 PetscValidType(B,1); 2030 PetscValidLogicalCollectiveInt(B,bs,2); 2031 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatCreateSBAIJ" 2037 /*@C 2038 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2039 (block compressed row). For good matrix assembly performance 2040 the user should preallocate the matrix storage by setting the parameters 2041 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2042 performance can be increased by more than a factor of 50. 2043 2044 Collective on MPI_Comm 2045 2046 Input Parameters: 2047 + comm - MPI communicator 2048 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2049 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2050 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2051 This value should be the same as the local size used in creating the 2052 y vector for the matrix-vector product y = Ax. 2053 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2054 This value should be the same as the local size used in creating the 2055 x vector for the matrix-vector product y = Ax. 2056 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2057 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2058 . d_nz - number of block nonzeros per block row in diagonal portion of local 2059 submatrix (same for all local rows) 2060 . d_nnz - array containing the number of block nonzeros in the various block rows 2061 in the upper triangular portion of the in diagonal portion of the local 2062 (possibly different for each block block row) or NULL. 2063 If you plan to factor the matrix you must leave room for the diagonal entry and 2064 set its value even if it is zero. 2065 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2066 submatrix (same for all local rows). 2067 - o_nnz - array containing the number of nonzeros in the various block rows of the 2068 off-diagonal portion of the local submatrix (possibly different for 2069 each block row) or NULL. 2070 2071 Output Parameter: 2072 . A - the matrix 2073 2074 Options Database Keys: 2075 . -mat_no_unroll - uses code that does not unroll the loops in the 2076 block calculations (much slower) 2077 . -mat_block_size - size of the blocks to use 2078 . -mat_mpi - use the parallel matrix data structures even on one processor 2079 (defaults to using SeqBAIJ format on one processor) 2080 2081 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 2082 MatXXXXSetPreallocation() paradgm instead of this routine directly. 2083 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 2084 2085 Notes: 2086 The number of rows and columns must be divisible by blocksize. 2087 This matrix type does not support complex Hermitian operation. 2088 2089 The user MUST specify either the local or global matrix dimensions 2090 (possibly both). 2091 2092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2093 than it must be used on all processors that share the object for that argument. 2094 2095 If the *_nnz parameter is given then the *_nz parameter is ignored 2096 2097 Storage Information: 2098 For a square global matrix we define each processor's diagonal portion 2099 to be its local rows and the corresponding columns (a square submatrix); 2100 each processor's off-diagonal portion encompasses the remainder of the 2101 local matrix (a rectangular submatrix). 2102 2103 The user can specify preallocated storage for the diagonal part of 2104 the local submatrix with either d_nz or d_nnz (not both). Set 2105 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2106 memory allocation. Likewise, specify preallocated storage for the 2107 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2108 2109 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2110 the figure below we depict these three local rows and all columns (0-11). 2111 2112 .vb 2113 0 1 2 3 4 5 6 7 8 9 10 11 2114 -------------------------- 2115 row 3 |. . . d d d o o o o o o 2116 row 4 |. . . d d d o o o o o o 2117 row 5 |. . . d d d o o o o o o 2118 -------------------------- 2119 .ve 2120 2121 Thus, any entries in the d locations are stored in the d (diagonal) 2122 submatrix, and any entries in the o locations are stored in the 2123 o (off-diagonal) submatrix. Note that the d matrix is stored in 2124 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format. 2125 2126 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2127 plus the diagonal part of the d matrix, 2128 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2129 In general, for PDE problems in which most nonzeros are near the diagonal, 2130 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2131 or you will get TERRIBLE performance; see the users' manual chapter on 2132 matrices. 2133 2134 Level: intermediate 2135 2136 .keywords: matrix, block, aij, compressed row, sparse, parallel 2137 2138 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ() 2139 @*/ 2140 2141 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2142 { 2143 PetscErrorCode ierr; 2144 PetscMPIInt size; 2145 2146 PetscFunctionBegin; 2147 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2148 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2149 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2150 if (size > 1) { 2151 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr); 2152 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2153 } else { 2154 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 2155 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatDuplicate_MPISBAIJ" 2163 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2164 { 2165 Mat mat; 2166 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2167 PetscErrorCode ierr; 2168 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs; 2169 PetscScalar *array; 2170 2171 PetscFunctionBegin; 2172 *newmat = 0; 2173 2174 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2175 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2176 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2177 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2178 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2179 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2180 2181 mat->factortype = matin->factortype; 2182 mat->preallocated = PETSC_TRUE; 2183 mat->assembled = PETSC_TRUE; 2184 mat->insertmode = NOT_SET_VALUES; 2185 2186 a = (Mat_MPISBAIJ*)mat->data; 2187 a->bs2 = oldmat->bs2; 2188 a->mbs = oldmat->mbs; 2189 a->nbs = oldmat->nbs; 2190 a->Mbs = oldmat->Mbs; 2191 a->Nbs = oldmat->Nbs; 2192 2193 2194 a->size = oldmat->size; 2195 a->rank = oldmat->rank; 2196 a->donotstash = oldmat->donotstash; 2197 a->roworiented = oldmat->roworiented; 2198 a->rowindices = 0; 2199 a->rowvalues = 0; 2200 a->getrowactive = PETSC_FALSE; 2201 a->barray = 0; 2202 a->rstartbs = oldmat->rstartbs; 2203 a->rendbs = oldmat->rendbs; 2204 a->cstartbs = oldmat->cstartbs; 2205 a->cendbs = oldmat->cendbs; 2206 2207 /* hash table stuff */ 2208 a->ht = 0; 2209 a->hd = 0; 2210 a->ht_size = 0; 2211 a->ht_flag = oldmat->ht_flag; 2212 a->ht_fact = oldmat->ht_fact; 2213 a->ht_total_ct = 0; 2214 a->ht_insert_ct = 0; 2215 2216 ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 2217 if (oldmat->colmap) { 2218 #if defined(PETSC_USE_CTABLE) 2219 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2220 #else 2221 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 2222 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2223 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 2224 #endif 2225 } else a->colmap = 0; 2226 2227 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2228 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 2229 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2230 ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 2231 } else a->garray = 0; 2232 2233 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 2234 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2235 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2236 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2237 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2238 2239 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr); 2240 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2241 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr); 2242 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2243 2244 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr); 2245 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr); 2246 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr); 2247 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr); 2248 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr); 2249 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr); 2250 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr); 2251 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr); 2252 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr); 2253 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr); 2254 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr); 2255 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr); 2257 2258 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2259 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr); 2260 a->sMvctx = oldmat->sMvctx; 2261 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr); 2262 2263 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2264 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2265 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2266 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2267 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2268 *newmat = mat; 2269 PetscFunctionReturn(0); 2270 } 2271 2272 #undef __FUNCT__ 2273 #define __FUNCT__ "MatLoad_MPISBAIJ" 2274 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer) 2275 { 2276 PetscErrorCode ierr; 2277 PetscInt i,nz,j,rstart,rend; 2278 PetscScalar *vals,*buf; 2279 MPI_Comm comm; 2280 MPI_Status status; 2281 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs; 2282 PetscInt header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens; 2283 PetscInt *procsnz = 0,jj,*mycols,*ibuf; 2284 PetscInt bs = newmat->rmap->bs,Mbs,mbs,extra_rows; 2285 PetscInt *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2286 PetscInt dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols; 2287 int fd; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2291 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr); 2292 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2293 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2294 if (bs < 0) bs = 1; 2295 2296 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2297 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2298 if (!rank) { 2299 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2300 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2301 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2302 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ"); 2303 } 2304 2305 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 2306 2307 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2308 M = header[1]; 2309 N = header[2]; 2310 2311 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2312 if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 2313 if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 2314 2315 /* If global sizes are set, check if they are consistent with that given in the file */ 2316 if (sizesset) { 2317 ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 2318 } 2319 if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2320 if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2321 2322 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 2323 2324 /* 2325 This code adds extra rows to make sure the number of rows is 2326 divisible by the blocksize 2327 */ 2328 Mbs = M/bs; 2329 extra_rows = bs - M + bs*(Mbs); 2330 if (extra_rows == bs) extra_rows = 0; 2331 else Mbs++; 2332 if (extra_rows &&!rank) { 2333 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 2334 } 2335 2336 /* determine ownership of all rows */ 2337 if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 2338 mbs = Mbs/size + ((Mbs % size) > rank); 2339 m = mbs*bs; 2340 } else { /* User Set */ 2341 m = newmat->rmap->n; 2342 mbs = m/bs; 2343 } 2344 ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 2345 ierr = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr); 2346 ierr = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2347 rowners[0] = 0; 2348 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2349 for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2350 rstart = rowners[rank]; 2351 rend = rowners[rank+1]; 2352 2353 /* distribute row lengths to all processors */ 2354 ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr); 2355 if (!rank) { 2356 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 2357 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2358 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2359 ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 2360 for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2361 ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2362 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2363 } else { 2364 ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr); 2365 } 2366 2367 if (!rank) { /* procs[0] */ 2368 /* calculate the number of nonzeros on each processor */ 2369 ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 2370 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2371 for (i=0; i<size; i++) { 2372 for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2373 procsnz[i] += rowlengths[j]; 2374 } 2375 } 2376 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2377 2378 /* determine max buffer needed and allocate it */ 2379 maxnz = 0; 2380 for (i=0; i<size; i++) { 2381 maxnz = PetscMax(maxnz,procsnz[i]); 2382 } 2383 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2384 2385 /* read in my part of the matrix column indices */ 2386 nz = procsnz[0]; 2387 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2388 mycols = ibuf; 2389 if (size == 1) nz -= extra_rows; 2390 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2391 if (size == 1) { 2392 for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 2393 } 2394 2395 /* read in every ones (except the last) and ship off */ 2396 for (i=1; i<size-1; i++) { 2397 nz = procsnz[i]; 2398 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2399 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2400 } 2401 /* read in the stuff for the last proc */ 2402 if (size != 1) { 2403 nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2404 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2405 for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2406 ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 2407 } 2408 ierr = PetscFree(cols);CHKERRQ(ierr); 2409 } else { /* procs[i], i>0 */ 2410 /* determine buffer space needed for message */ 2411 nz = 0; 2412 for (i=0; i<m; i++) nz += locrowlens[i]; 2413 ierr = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr); 2414 mycols = ibuf; 2415 /* receive message of column indices*/ 2416 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2417 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2418 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2419 } 2420 2421 /* loop over local rows, determining number of off diagonal entries */ 2422 ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 2423 ierr = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 2424 ierr = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2425 ierr = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2426 ierr = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr); 2427 rowcount = 0; 2428 nzcount = 0; 2429 for (i=0; i<mbs; i++) { 2430 dcount = 0; 2431 odcount = 0; 2432 for (j=0; j<bs; j++) { 2433 kmax = locrowlens[rowcount]; 2434 for (k=0; k<kmax; k++) { 2435 tmp = mycols[nzcount++]/bs; /* block col. index */ 2436 if (!mask[tmp]) { 2437 mask[tmp] = 1; 2438 if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */ 2439 else masked1[dcount++] = tmp; /* entry in diag portion */ 2440 } 2441 } 2442 rowcount++; 2443 } 2444 2445 dlens[i] = dcount; /* d_nzz[i] */ 2446 odlens[i] = odcount; /* o_nzz[i] */ 2447 2448 /* zero out the mask elements we set */ 2449 for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2450 for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2451 } 2452 if (!sizesset) { 2453 ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 2454 } 2455 ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 2456 ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2457 2458 if (!rank) { 2459 ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr); 2460 /* read in my part of the matrix numerical values */ 2461 nz = procsnz[0]; 2462 vals = buf; 2463 mycols = ibuf; 2464 if (size == 1) nz -= extra_rows; 2465 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2466 if (size == 1) { 2467 for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 2468 } 2469 2470 /* insert into matrix */ 2471 jj = rstart*bs; 2472 for (i=0; i<m; i++) { 2473 ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2474 mycols += locrowlens[i]; 2475 vals += locrowlens[i]; 2476 jj++; 2477 } 2478 2479 /* read in other processors (except the last one) and ship out */ 2480 for (i=1; i<size-1; i++) { 2481 nz = procsnz[i]; 2482 vals = buf; 2483 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2484 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2485 } 2486 /* the last proc */ 2487 if (size != 1) { 2488 nz = procsnz[i] - extra_rows; 2489 vals = buf; 2490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2491 for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2492 ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 2493 } 2494 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2495 2496 } else { 2497 /* receive numeric values */ 2498 ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr); 2499 2500 /* receive message of values*/ 2501 vals = buf; 2502 mycols = ibuf; 2503 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 2504 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2505 if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2506 2507 /* insert into matrix */ 2508 jj = rstart*bs; 2509 for (i=0; i<m; i++) { 2510 ierr = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2511 mycols += locrowlens[i]; 2512 vals += locrowlens[i]; 2513 jj++; 2514 } 2515 } 2516 2517 ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2518 ierr = PetscFree(buf);CHKERRQ(ierr); 2519 ierr = PetscFree(ibuf);CHKERRQ(ierr); 2520 ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 2521 ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 2522 ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 2523 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2524 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 #undef __FUNCT__ 2529 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor" 2530 /*XXXXX@ 2531 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2532 2533 Input Parameters: 2534 . mat - the matrix 2535 . fact - factor 2536 2537 Not Collective on Mat, each process can have a different hash factor 2538 2539 Level: advanced 2540 2541 Notes: 2542 This can also be set by the command line option: -mat_use_hash_table fact 2543 2544 .keywords: matrix, hashtable, factor, HT 2545 2546 .seealso: MatSetOption() 2547 @XXXXX*/ 2548 2549 2550 #undef __FUNCT__ 2551 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ" 2552 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[]) 2553 { 2554 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 2555 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data; 2556 PetscReal atmp; 2557 PetscReal *work,*svalues,*rvalues; 2558 PetscErrorCode ierr; 2559 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol; 2560 PetscMPIInt rank,size; 2561 PetscInt *rowners_bs,dest,count,source; 2562 PetscScalar *va; 2563 MatScalar *ba; 2564 MPI_Status stat; 2565 2566 PetscFunctionBegin; 2567 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov"); 2568 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr); 2569 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2570 2571 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2572 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2573 2574 bs = A->rmap->bs; 2575 mbs = a->mbs; 2576 Mbs = a->Mbs; 2577 ba = b->a; 2578 bi = b->i; 2579 bj = b->j; 2580 2581 /* find ownerships */ 2582 rowners_bs = A->rmap->range; 2583 2584 /* each proc creates an array to be distributed */ 2585 ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr); 2586 ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr); 2587 2588 /* row_max for B */ 2589 if (rank != size-1) { 2590 for (i=0; i<mbs; i++) { 2591 ncols = bi[1] - bi[0]; bi++; 2592 brow = bs*i; 2593 for (j=0; j<ncols; j++) { 2594 bcol = bs*(*bj); 2595 for (kcol=0; kcol<bs; kcol++) { 2596 col = bcol + kcol; /* local col index */ 2597 col += rowners_bs[rank+1]; /* global col index */ 2598 for (krow=0; krow<bs; krow++) { 2599 atmp = PetscAbsScalar(*ba); ba++; 2600 row = brow + krow; /* local row index */ 2601 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2602 if (work[col] < atmp) work[col] = atmp; 2603 } 2604 } 2605 bj++; 2606 } 2607 } 2608 2609 /* send values to its owners */ 2610 for (dest=rank+1; dest<size; dest++) { 2611 svalues = work + rowners_bs[dest]; 2612 count = rowners_bs[dest+1]-rowners_bs[dest]; 2613 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2614 } 2615 } 2616 2617 /* receive values */ 2618 if (rank) { 2619 rvalues = work; 2620 count = rowners_bs[rank+1]-rowners_bs[rank]; 2621 for (source=0; source<rank; source++) { 2622 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr); 2623 /* process values */ 2624 for (i=0; i<count; i++) { 2625 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2626 } 2627 } 2628 } 2629 2630 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2631 ierr = PetscFree(work);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "MatSOR_MPISBAIJ" 2637 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2638 { 2639 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2640 PetscErrorCode ierr; 2641 PetscInt mbs=mat->mbs,bs=matin->rmap->bs; 2642 PetscScalar *x,*ptr,*from; 2643 Vec bb1; 2644 const PetscScalar *b; 2645 2646 PetscFunctionBegin; 2647 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2648 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2649 2650 if (flag == SOR_APPLY_UPPER) { 2651 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2652 PetscFunctionReturn(0); 2653 } 2654 2655 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2656 if (flag & SOR_ZERO_INITIAL_GUESS) { 2657 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2658 its--; 2659 } 2660 2661 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2662 while (its--) { 2663 2664 /* lower triangular part: slvec0b = - B^T*xx */ 2665 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2666 2667 /* copy xx into slvec0a */ 2668 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2669 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2670 ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2671 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr); 2672 2673 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr); 2674 2675 /* copy bb into slvec1a */ 2676 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2677 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2678 ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2679 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr); 2680 2681 /* set slvec1b = 0 */ 2682 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2683 2684 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2686 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2688 2689 /* upper triangular part: bb1 = bb1 - B*x */ 2690 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr); 2691 2692 /* local diagonal sweep */ 2693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2694 } 2695 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2696 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2697 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2698 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2699 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2700 } else if (flag & SOR_EISENSTAT) { 2701 Vec xx1; 2702 PetscBool hasop; 2703 const PetscScalar *diag; 2704 PetscScalar *sl,scale = (omega - 2.0)/omega; 2705 PetscInt i,n; 2706 2707 if (!mat->xx1) { 2708 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr); 2709 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr); 2710 } 2711 xx1 = mat->xx1; 2712 bb1 = mat->bb1; 2713 2714 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 2715 2716 if (!mat->diag) { 2717 /* this is wrong for same matrix with new nonzero values */ 2718 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 2719 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 2720 } 2721 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 2722 2723 if (hasop) { 2724 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 2725 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2726 } else { 2727 /* 2728 These two lines are replaced by code that may be a bit faster for a good compiler 2729 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr); 2730 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr); 2731 */ 2732 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2733 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2734 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 2735 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2736 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr); 2737 if (omega == 1.0) { 2738 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i]; 2739 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); 2740 } else { 2741 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i]; 2742 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr); 2743 } 2744 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr); 2745 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr); 2746 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 2747 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2748 } 2749 2750 /* multiply off-diagonal portion of matrix */ 2751 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr); 2752 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr); 2753 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr); 2754 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2755 ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr); 2756 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr); 2757 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2758 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2759 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr); 2761 2762 /* local sweep */ 2763 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 2764 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 2765 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm" 2771 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2772 { 2773 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 2774 PetscErrorCode ierr; 2775 Vec lvec1,bb1; 2776 2777 PetscFunctionBegin; 2778 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 2779 if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2780 2781 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2782 if (flag & SOR_ZERO_INITIAL_GUESS) { 2783 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 2784 its--; 2785 } 2786 2787 ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr); 2788 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 2789 while (its--) { 2790 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2791 2792 /* lower diagonal part: bb1 = bb - B^T*xx */ 2793 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr); 2794 ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr); 2795 2796 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2797 ierr = VecCopy(bb,bb1);CHKERRQ(ierr); 2798 ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2799 2800 /* upper diagonal part: bb1 = bb1 - B*x */ 2801 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2802 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 2803 2804 ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2805 2806 /* diagonal sweep */ 2807 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr); 2808 } 2809 ierr = VecDestroy(&lvec1);CHKERRQ(ierr); 2810 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2811 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format"); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatCreateMPISBAIJWithArrays" 2817 /*@ 2818 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard 2819 CSR format the local rows. 2820 2821 Collective on MPI_Comm 2822 2823 Input Parameters: 2824 + comm - MPI communicator 2825 . bs - the block size, only a block size of 1 is supported 2826 . m - number of local rows (Cannot be PETSC_DECIDE) 2827 . n - This value should be the same as the local size used in creating the 2828 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2829 calculated if N is given) For square matrices n is almost always m. 2830 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2831 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2832 . i - row indices 2833 . j - column indices 2834 - a - matrix values 2835 2836 Output Parameter: 2837 . mat - the matrix 2838 2839 Level: intermediate 2840 2841 Notes: 2842 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2843 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2844 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2845 2846 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2847 2848 .keywords: matrix, aij, compressed row, sparse, parallel 2849 2850 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2851 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 2852 @*/ 2853 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2854 { 2855 PetscErrorCode ierr; 2856 2857 2858 PetscFunctionBegin; 2859 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2860 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2861 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2862 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 2863 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 2864 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR" 2871 /*@C 2872 MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2873 (the default parallel PETSc format). 2874 2875 Collective on MPI_Comm 2876 2877 Input Parameters: 2878 + B - the matrix 2879 . bs - the block size 2880 . i - the indices into j for the start of each local row (starts with zero) 2881 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2882 - v - optional values in the matrix 2883 2884 Level: developer 2885 2886 .keywords: matrix, aij, compressed row, sparse, parallel 2887 2888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ 2889 @*/ 2890 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2891 { 2892 PetscErrorCode ierr; 2893 2894 PetscFunctionBegin; 2895 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2896 PetscFunctionReturn(0); 2897 } 2898 2899 #undef __FUNCT__ 2900 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ" 2901 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2902 { 2903 PetscErrorCode ierr; 2904 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 2905 PetscInt *indx; 2906 PetscScalar *values; 2907 2908 PetscFunctionBegin; 2909 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2910 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2911 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data; 2912 PetscInt *dnz,*onz,sum,bs,cbs,mbs,Nbs; 2913 PetscInt *bindx,rmax=a->rmax,j; 2914 2915 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2916 mbs = m/bs; Nbs = N/cbs; 2917 if (n == PETSC_DECIDE) { 2918 ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 2919 } 2920 /* Check sum(n) = Nbs */ 2921 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2922 if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 2923 2924 ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2925 rstart -= mbs; 2926 2927 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 2928 ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 2929 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2930 for (i=0; i<mbs; i++) { 2931 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 2932 nnz = nnz/bs; 2933 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 2934 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 2935 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 2936 } 2937 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2938 ierr = PetscFree(bindx);CHKERRQ(ierr); 2939 2940 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2941 ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2942 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 2943 ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr); 2944 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2945 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2946 } 2947 2948 /* numeric phase */ 2949 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 2950 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 2951 2952 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr); 2953 for (i=0; i<m; i++) { 2954 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2955 Ii = i + rstart; 2956 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2957 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2958 } 2959 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr); 2960 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2961 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2962 PetscFunctionReturn(0); 2963 } 2964